From: Jonathan Wakely <jwakely@redhat.com>
To: libstdc++@gcc.gnu.org, gcc-patches@gcc.gnu.org
Subject: [committed] libstdc++: Avoid unwanted allocations in filesystem::path
Date: Wed, 1 Dec 2021 15:08:07 +0000 [thread overview]
Message-ID: <20211201150807.217431-1-jwakely@redhat.com> (raw)
Tested x86_64-linux, pushed to trunk.
When using COW strings, accessing _M_pathname[0] and similar non-const
accessors can cause the string to "leak", meaning it reallocates itself
if it shares ownership with another string object.
This causes test failures for --enable-fully-dynamic-string builds:
/home/jwakely/src/gcc/libstdc++-v3/testsuite/experimental/filesystem/path/construct/90634.cc:62: void test01(): Assertion 'bytes_allocated == 0' failed.
FAIL: experimental/filesystem/path/construct/90634.cc execution test
This FAIL happens because the fully-dynamic move constructor results in
shared ownership, so for path(std::move(std::string("foo"))) the
_M_pathname member shares ownership with the temporary, and the
non-const accesses in _M_split_cmpts() cause a new copy of the string to
be allocated. This un-sharing is wasteful, and entirely unnecessary when
sharing ownership with an rvalue that is about to release its ownership
anyway. Even for lvalues, sharing ownership is not a problem and
reallocating a unique copy of the string is wasteful.
This removes non-const accesses of _M_pathname in the
path::_M_split_cmpts() members.
libstdc++-v3/ChangeLog:
* src/c++17/fs_path.cc (path::_M_split_cmpts()): Remove
micro-optimization for "/" path.
* src/filesystem/path.cc (path::_M_split_cmpts()): Only access
the contents of _M_pathname using const member functions.
---
libstdc++-v3/src/c++17/fs_path.cc | 5 -----
libstdc++-v3/src/filesystem/path.cc | 31 ++++++++++++++++-------------
2 files changed, 17 insertions(+), 19 deletions(-)
diff --git a/libstdc++-v3/src/c++17/fs_path.cc b/libstdc++-v3/src/c++17/fs_path.cc
index 506ff25f9a6..967d9471134 100644
--- a/libstdc++-v3/src/c++17/fs_path.cc
+++ b/libstdc++-v3/src/c++17/fs_path.cc
@@ -1872,11 +1872,6 @@ path::_M_split_cmpts()
_M_cmpts.type(_Type::_Filename);
return;
}
- if (_M_pathname.length() == 1 && _M_pathname[0] == preferred_separator)
- {
- _M_cmpts.type(_Type::_Root_dir);
- return;
- }
_Parser parser(_M_pathname);
diff --git a/libstdc++-v3/src/filesystem/path.cc b/libstdc++-v3/src/filesystem/path.cc
index a935573740f..8e8806a953f 100644
--- a/libstdc++-v3/src/filesystem/path.cc
+++ b/libstdc++-v3/src/filesystem/path.cc
@@ -337,15 +337,18 @@ path::_M_split_cmpts()
_M_type = _Type::_Multi;
_M_cmpts.clear();
- if (_M_pathname.empty())
+ // Use const-reference to access _M_pathname, to avoid "leaking" COW string.
+ const auto& pathname = _M_pathname;
+
+ if (pathname.empty())
return;
{
// Approximate count of components, to reserve space in _M_cmpts vector:
int count = 1;
- bool saw_sep_last = _S_is_dir_sep(_M_pathname[0]);
+ bool saw_sep_last = _S_is_dir_sep(pathname[0]);
bool saw_non_sep = !saw_sep_last;
- for (value_type c : _M_pathname)
+ for (value_type c : pathname)
{
if (_S_is_dir_sep(c))
saw_sep_last = true;
@@ -363,13 +366,13 @@ path::_M_split_cmpts()
}
size_t pos = 0;
- const size_t len = _M_pathname.size();
+ const size_t len = pathname.size();
// look for root name or root directory
- if (_S_is_dir_sep(_M_pathname[0]))
+ if (_S_is_dir_sep(pathname[0]))
{
// look for root name, such as "//" or "//foo"
- if (len > 1 && _M_pathname[1] == _M_pathname[0])
+ if (len > 1 && pathname[1] == pathname[0])
{
if (len == 2)
{
@@ -378,11 +381,11 @@ path::_M_split_cmpts()
return;
}
- if (!_S_is_dir_sep(_M_pathname[2]))
+ if (!_S_is_dir_sep(pathname[2]))
{
// got root name, find its end
pos = 3;
- while (pos < len && !_S_is_dir_sep(_M_pathname[pos]))
+ while (pos < len && !_S_is_dir_sep(pathname[pos]))
++pos;
if (pos == len)
{
@@ -409,7 +412,7 @@ path::_M_split_cmpts()
++pos;
}
#ifdef _GLIBCXX_FILESYSTEM_IS_WINDOWS
- else if (len > 1 && _M_pathname[1] == L':')
+ else if (len > 1 && pathname[1] == L':')
{
// got disk designator
if (len == 2)
@@ -418,7 +421,7 @@ path::_M_split_cmpts()
return;
}
_M_add_root_name(2);
- if (len > 2 && _S_is_dir_sep(_M_pathname[2]))
+ if (len > 2 && _S_is_dir_sep(pathname[2]))
_M_add_root_dir(2);
pos = 2;
}
@@ -426,9 +429,9 @@ path::_M_split_cmpts()
else
{
size_t n = 1;
- for (; n < _M_pathname.size() && !_S_is_dir_sep(_M_pathname[n]); ++n)
+ for (; n < pathname.size() && !_S_is_dir_sep(pathname[n]); ++n)
{ }
- if (n == _M_pathname.size())
+ if (n == pathname.size())
{
_M_type = _Type::_Filename;
return;
@@ -438,7 +441,7 @@ path::_M_split_cmpts()
size_t back = pos;
while (pos < len)
{
- if (_S_is_dir_sep(_M_pathname[pos]))
+ if (_S_is_dir_sep(pathname[pos]))
{
if (back != pos)
_M_add_filename(back, pos - back);
@@ -450,7 +453,7 @@ path::_M_split_cmpts()
if (back != pos)
_M_add_filename(back, pos - back);
- else if (_S_is_dir_sep(_M_pathname.back()))
+ else if (_S_is_dir_sep(pathname.back()))
{
// [path.itr]/8
// "Dot, if one or more trailing non-root slash characters are present."
--
2.31.1
reply other threads:[~2021-12-01 15:08 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20211201150807.217431-1-jwakely@redhat.com \
--to=jwakely@redhat.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=libstdc++@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).