On Thu, Sep 22, 2022, 17:51 Charles-François Natali <cf.natali@gmail.com>
wrote:

>
> On Mon, Sep 5, 2022, 23:51 Charles-Francois Natali <cf.natali@gmail.com>
> wrote:
>
>> `basic_filebuf::xsputn` would bypass the buffer when passed a chunk of
>> size 1024 and above, seemingly as an optimisation.
>>
>> This can have a significant performance impact if the overhead of a
>> `write` syscall is non-negligible, e.g. on a slow disk, on network
>> filesystems, or simply during IO contention because instead of flushing
>> every `BUFSIZ` (by default), we can flush every 1024 char.
>> The impact is even greater with custom larger buffers, e.g. for network
>> filesystems, because the code could issue `write` for example 1000X more
>> often than necessary with respect to the buffer size.
>> It also introduces a significant discontinuity in performance when
>> writing chunks of size 1024 and above.
>>
>> See this reproducer which writes down a fixed number of chunks to a file
>> open with `O_SYNC` - to replicate high-latency `write` - for varying
>> size of chunks:
>>
>> ```
>> $ cat test_fstream_flush.cpp
>>
>> int
>> main(int argc, char* argv[])
>> {
>>   assert(argc == 3);
>>
>>   const auto* path = argv[1];
>>   const auto chunk_size = std::stoul(argv[2]);
>>
>>   const auto fd =
>>     open(path, O_CREAT | O_TRUNC | O_WRONLY | O_SYNC | O_CLOEXEC, 0666);
>>   assert(fd >= 0);
>>
>>   auto filebuf = __gnu_cxx::stdio_filebuf<char>(fd, std::ios_base::out);
>>   auto stream = std::ostream(&filebuf);
>>
>>   const auto chunk = std::vector<char>(chunk_size);
>>
>>   for (auto i = 0; i < 1'000; ++i) {
>>     stream.write(chunk.data(), chunk.size());
>>   }
>>
>>   return 0;
>> }
>> ```
>>
>> ```
>> $ g++ -o /tmp/test_fstream_flush test_fstream_flush.cpp -std=c++17
>> $ for i in $(seq 1021 1025); do echo -e "\n$i"; time
>> /tmp/test_fstream_flush /tmp/foo $i; done
>>
>> 1021
>>
>> real    0m0.997s
>> user    0m0.000s
>> sys     0m0.038s
>>
>> 1022
>>
>> real    0m0.939s
>> user    0m0.005s
>> sys     0m0.032s
>>
>> 1023
>>
>> real    0m0.954s
>> user    0m0.005s
>> sys     0m0.034s
>>
>> 1024
>>
>> real    0m7.102s
>> user    0m0.040s
>> sys     0m0.192s
>>
>> 1025
>>
>> real    0m7.204s
>> user    0m0.025s
>> sys     0m0.209s
>> ```
>>
>> See the huge drop in performance at the 1024-boundary.
>>
>> An `strace` confirms that from size 1024 we effectively defeat
>> buffering:
>> 1023-sized writes
>> ```
>> $ strace -P /tmp/foo -e openat,write,writev /tmp/test_fstream_flush
>> /tmp/foo 1023 2>&1 | head -n5
>> openat(AT_FDCWD, "/tmp/foo", O_WRONLY|O_CREAT|O_TRUNC|O_SYNC|O_CLOEXEC,
>> 0666) = 3
>> writev(3,
>> [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
>> iov_len=8184},
>> {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
>> iov_len=1023}], 2) = 9207
>> writev(3,
>> [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
>> iov_len=8184},
>> {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
>> iov_len=1023}], 2) = 9207
>> writev(3,
>> [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
>> iov_len=8184},
>> {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
>> iov_len=1023}], 2) = 9207
>> writev(3,
>> [{iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
>> iov_len=8184},
>> {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
>> iov_len=1023}], 2) = 9207
>> ```
>>
>> vs 1024-sized writes
>> ```
>> $ strace -P /tmp/foo -e openat,write,writev /tmp/test_fstream_flush
>> /tmp/foo 1024 2>&1 | head -n5
>> openat(AT_FDCWD, "/tmp/foo", O_WRONLY|O_CREAT|O_TRUNC|O_SYNC|O_CLOEXEC,
>> 0666) = 3
>> writev(3, [{iov_base=NULL, iov_len=0},
>> {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
>> iov_len=1024}], 2) = 1024
>> writev(3, [{iov_base="", iov_len=0},
>> {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
>> iov_len=1024}], 2) = 1024
>> writev(3, [{iov_base="", iov_len=0},
>> {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
>> iov_len=1024}], 2) = 1024
>> writev(3, [{iov_base="", iov_len=0},
>> {iov_base="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"...,
>> iov_len=1024}], 2) = 1024
>> ```
>>
>> Instead, it makes sense to only bypass the buffer if the amount of data
>> to be written is larger than the buffer capacity.
>>
>> Closes https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63746
>> ---
>>  libstdc++-v3/include/bits/fstream.tcc         |  9 +--
>>  .../27_io/basic_filebuf/sputn/char/63746.cc   | 55 +++++++++++++++++++
>>  2 files changed, 58 insertions(+), 6 deletions(-)
>>  create mode 100644
>> libstdc++-v3/testsuite/27_io/basic_filebuf/sputn/char/63746.cc
>>
>> diff --git a/libstdc++-v3/include/bits/fstream.tcc
>> b/libstdc++-v3/include/bits/fstream.tcc
>> index 7ccc887b8..2e9369628 100644
>> --- a/libstdc++-v3/include/bits/fstream.tcc
>> +++ b/libstdc++-v3/include/bits/fstream.tcc
>> @@ -757,23 +757,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>      {
>>        streamsize __ret = 0;
>>        // Optimization in the always_noconv() case, to be generalized in
>> the
>> -      // future: when __n is sufficiently large we write directly
>> instead of
>> -      // using the buffer.
>> +      // future: when __n is larger than the available capacity we write
>> +      // directly instead of using the buffer.
>>        const bool __testout = (_M_mode & ios_base::out
>>                               || _M_mode & ios_base::app);
>>        if (__check_facet(_M_codecvt).always_noconv()
>>           && __testout && !_M_reading)
>>         {
>> -         // Measurement would reveal the best choice.
>> -         const streamsize __chunk = 1ul << 10;
>>           streamsize __bufavail = this->epptr() - this->pptr();
>>
>>           // Don't mistake 'uncommitted' mode buffered with unbuffered.
>>           if (!_M_writing && _M_buf_size > 1)
>>             __bufavail = _M_buf_size - 1;
>>
>> -         const streamsize __limit = std::min(__chunk, __bufavail);
>> -         if (__n >= __limit)
>> +         if (__n >= __bufavail)
>>             {
>>               const streamsize __buffill = this->pptr() - this->pbase();
>>               const char* __buf = reinterpret_cast<const
>> char*>(this->pbase());
>> diff --git
>> a/libstdc++-v3/testsuite/27_io/basic_filebuf/sputn/char/63746.cc
>> b/libstdc++-v3/testsuite/27_io/basic_filebuf/sputn/char/63746.cc
>> new file mode 100644
>> index 000000000..36448e049
>> --- /dev/null
>> +++ b/libstdc++-v3/testsuite/27_io/basic_filebuf/sputn/char/63746.cc
>> @@ -0,0 +1,55 @@
>> +// Copyright (C) 2013-2022 Free Software Foundation, Inc.
>> +//
>> +// This file is part of the GNU ISO C++ Library.  This library is free
>> +// software; you can redistribute it and/or modify it under the
>> +// terms of the GNU General Public License as published by the
>> +// Free Software Foundation; either version 3, or (at your option)
>> +// any later version.
>> +
>> +// This library is distributed in the hope that it will be useful,
>> +// but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> +// GNU General Public License for more details.
>> +
>> +// You should have received a copy of the GNU General Public License
>> along
>> +// with this library; see the file COPYING3.  If not see
>> +// <http://www.gnu.org/licenses/>.
>> +
>> +// { dg-require-fileio "" }
>> +
>> +#include <fstream>
>> +#include <testsuite_hooks.h>
>> +
>> +class testbuf : public std::filebuf {
>> +public:
>> +  char_type* pub_pprt() const
>> +  {
>> +    return this->pptr();
>> +  }
>> +
>> +  char_type* pub_pbase() const
>> +  {
>> +    return this->pbase();
>> +  }
>> +};
>> +
>> +void test01()
>> +{
>> +  using namespace std;
>> +
>> +  // Leave capacity to avoid flush.
>> +  const streamsize chunk_size = BUFSIZ - 1 - 1;
>> +  const char data[chunk_size] = {};
>> +
>> +  testbuf a_f;
>> +  VERIFY( a_f.open("tmp_63746_sputn", ios_base::out) );
>> +  VERIFY( chunk_size == a_f.sputn(data, chunk_size) );
>> +  VERIFY( (a_f.pub_pprt() - a_f.pub_pbase()) == chunk_size );
>> +  VERIFY( a_f.close() );
>> +}
>> +
>> +int main()
>> +{
>> +  test01();
>> +  return 0;
>> +}
>> --
>> 2.30.2
>>
>>