* [committed] libstdc++: Optimize std::to_string using std::string::resize_and_overwrite
@ 2023-08-17 20:30 Jonathan Wakely
0 siblings, 0 replies; only message in thread
From: Jonathan Wakely @ 2023-08-17 20:30 UTC (permalink / raw)
To: libstdc++, gcc-patches
Tested x86_64-linux. Pushed to trunk.
-- >8 --
This uses std::string::__resize_and_overwrite to avoid initializing the
string buffer with characters that are immediately overwritten. This
results in about 6% better performance for the std_to_string case in
int-benchmark.cc from https://github.com/fmtlib/format-benchmark
This requires a change to a testcase. The previous implementation
guaranteed that the string returned from std::to_string(integral-type)
would have no excess capacity, because it was constructed with the
correct length. The new implementation constructs an empty string and
then resizes it with resize_and_overwrite, which over-allocates. This
means that the "no-excess capacity" guarantee no longer holds.
We can also greatly improve the performance of std::to_wstring by using
std::to_string and then widening it with a new helper function, instead
of using std::swprintf to do the formatting.
libstdc++-v3/ChangeLog:
* include/bits/basic_string.h (to_string(integral-type)): Use
resize_and_overwrite when available.
(__to_wstring_numeric): New helper functions.
(to_wstring): Use std::to_string then __to_wstring_numeric.
* testsuite/21_strings/basic_string/numeric_conversions/char/to_string_int.cc:
Remove check for no excess capacity.
---
libstdc++-v3/include/bits/basic_string.h | 173 +++++++++++++-----
.../numeric_conversions/char/to_string_int.cc | 2 -
2 files changed, 123 insertions(+), 52 deletions(-)
diff --git a/libstdc++-v3/include/bits/basic_string.h b/libstdc++-v3/include/bits/basic_string.h
index e6f94640150..46326d02597 100644
--- a/libstdc++-v3/include/bits/basic_string.h
+++ b/libstdc++-v3/include/bits/basic_string.h
@@ -4197,8 +4197,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
const bool __neg = __val < 0;
const unsigned __uval = __neg ? (unsigned)~__val + 1u : __val;
const auto __len = __detail::__to_chars_len(__uval);
- string __str(__neg + __len, '-');
- __detail::__to_chars_10_impl(&__str[__neg], __len, __uval);
+ string __str;
+ __str.__resize_and_overwrite(__neg + __len, [=](char* __p, size_t __n) {
+ __p[0] = '-';
+ __detail::__to_chars_10_impl(__p + (int)__neg, __len, __uval);
+ return __n;
+ });
return __str;
}
@@ -4209,8 +4213,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
noexcept // any 32-bit value fits in the SSO buffer
#endif
{
- string __str(__detail::__to_chars_len(__val), '\0');
- __detail::__to_chars_10_impl(&__str[0], __str.size(), __val);
+ const auto __len = __detail::__to_chars_len(__val);
+ string __str;
+ __str.__resize_and_overwrite(__len, [__val](char* __p, size_t __n) {
+ __detail::__to_chars_10_impl(__p, __n, __val);
+ return __n;
+ });
return __str;
}
@@ -4224,8 +4232,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
const bool __neg = __val < 0;
const unsigned long __uval = __neg ? (unsigned long)~__val + 1ul : __val;
const auto __len = __detail::__to_chars_len(__uval);
- string __str(__neg + __len, '-');
- __detail::__to_chars_10_impl(&__str[__neg], __len, __uval);
+ string __str;
+ __str.__resize_and_overwrite(__neg + __len, [=](char* __p, size_t __n) {
+ __p[0] = '-';
+ __detail::__to_chars_10_impl(__p + (int)__neg, __len, __uval);
+ return __n;
+ });
return __str;
}
@@ -4236,8 +4248,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
noexcept // any 32-bit value fits in the SSO buffer
#endif
{
- string __str(__detail::__to_chars_len(__val), '\0');
- __detail::__to_chars_10_impl(&__str[0], __str.size(), __val);
+ const auto __len = __detail::__to_chars_len(__val);
+ string __str;
+ __str.__resize_and_overwrite(__len, [__val](char* __p, size_t __n) {
+ __detail::__to_chars_10_impl(__p, __n, __val);
+ return __n;
+ });
return __str;
}
@@ -4249,8 +4265,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
const unsigned long long __uval
= __neg ? (unsigned long long)~__val + 1ull : __val;
const auto __len = __detail::__to_chars_len(__uval);
- string __str(__neg + __len, '-');
- __detail::__to_chars_10_impl(&__str[__neg], __len, __uval);
+ string __str;
+ __str.__resize_and_overwrite(__neg + __len, [=](char* __p, size_t __n) {
+ __p[0] = '-';
+ __detail::__to_chars_10_impl(__p + (int)__neg, __len, __uval);
+ return __n;
+ });
return __str;
}
@@ -4258,8 +4278,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
inline string
to_string(unsigned long long __val)
{
- string __str(__detail::__to_chars_len(__val), '\0');
- __detail::__to_chars_10_impl(&__str[0], __str.size(), __val);
+ const auto __len = __detail::__to_chars_len(__val);
+ string __str;
+ __str.__resize_and_overwrite(__len, [__val](char* __p, size_t __n) {
+ __detail::__to_chars_10_impl(__p, __n, __val);
+ return __n;
+ });
return __str;
}
@@ -4335,80 +4359,129 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
inline long double
stold(const wstring& __str, size_t* __idx = 0)
{ return __gnu_cxx::__stoa(&std::wcstold, "stold", __str.c_str(), __idx); }
+#endif
+
+#ifdef _GLIBCXX_USE_WCHAR_T
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wc++17-extensions"
+ _GLIBCXX20_CONSTEXPR
+ inline void
+ __to_wstring_numeric(const char* __s, int __len, wchar_t* __wout)
+ {
+ // This condition is true if exec-charset and wide-exec-charset share the
+ // same values for the ASCII subset or the EBCDIC invariant character set.
+ if constexpr (wchar_t('0') == L'0' && wchar_t('-') == L'-'
+ && wchar_t('.') == L'.' && wchar_t('e') == L'e')
+ {
+ for (int __i = 0; __i < __len; ++__i)
+ __wout[__i] = (wchar_t) __s[__i];
+ }
+ else
+ {
+ wchar_t __wc[256];
+ for (int __i = '0'; __i <= '9'; ++__i)
+ __wc[__i] = L'0' + __i;
+ __wc['.'] = L'.';
+ __wc['+'] = L'+';
+ __wc['-'] = L'-';
+ __wc['a'] = L'a';
+ __wc['b'] = L'b';
+ __wc['c'] = L'c';
+ __wc['d'] = L'd';
+ __wc['e'] = L'e';
+ __wc['f'] = L'f';
+ __wc['n'] = L'n'; // for "nan" and "inf"
+ __wc['p'] = L'p'; // for hexfloats "0x1p1"
+ __wc['x'] = L'x';
+ __wc['A'] = L'A';
+ __wc['B'] = L'B';
+ __wc['C'] = L'C';
+ __wc['D'] = L'D';
+ __wc['E'] = L'E';
+ __wc['F'] = L'F';
+ __wc['N'] = L'N';
+ __wc['P'] = L'P';
+ __wc['X'] = L'X';
+
+ for (int __i = 0; __i < __len; ++__i)
+ __wout[__i] = __wc[(int)__s[__i]];
+ }
+ }
+
+#if __cpp_lib_constexpr_string >= 201907L
+ constexpr
+#endif
+ inline wstring
+#if __cplusplus >= 201703L
+ __to_wstring_numeric(string_view __s)
+#else
+ __to_wstring_numeric(const string& __s)
+#endif
+ {
+ if constexpr (wchar_t('0') == L'0' && wchar_t('-') == L'-'
+ && wchar_t('.') == L'.' && wchar_t('e') == L'e')
+ return wstring(__s.data(), __s.data() + __s.size());
+ else
+ {
+ wstring __ws;
+ auto __f = __s.data();
+ __ws.__resize_and_overwrite(__s.size(),
+ [__f] (wchar_t* __to, int __n) {
+ std::__to_wstring_numeric(__f, __n, __to);
+ return __n;
+ });
+ return __ws;
+ }
+ }
+#pragma GCC diagnostic pop
-#ifndef _GLIBCXX_HAVE_BROKEN_VSWPRINTF
- // DR 1261.
_GLIBCXX_NODISCARD
inline wstring
to_wstring(int __val)
- { return __gnu_cxx::__to_xstring<wstring>(&std::vswprintf, 4 * sizeof(int),
- L"%d", __val); }
+ { return std::__to_wstring_numeric(std::to_string(__val)); }
_GLIBCXX_NODISCARD
inline wstring
to_wstring(unsigned __val)
- { return __gnu_cxx::__to_xstring<wstring>(&std::vswprintf,
- 4 * sizeof(unsigned),
- L"%u", __val); }
+ { return std::__to_wstring_numeric(std::to_string(__val)); }
_GLIBCXX_NODISCARD
inline wstring
to_wstring(long __val)
- { return __gnu_cxx::__to_xstring<wstring>(&std::vswprintf, 4 * sizeof(long),
- L"%ld", __val); }
+ { return std::__to_wstring_numeric(std::to_string(__val)); }
_GLIBCXX_NODISCARD
inline wstring
to_wstring(unsigned long __val)
- { return __gnu_cxx::__to_xstring<wstring>(&std::vswprintf,
- 4 * sizeof(unsigned long),
- L"%lu", __val); }
+ { return std::__to_wstring_numeric(std::to_string(__val)); }
_GLIBCXX_NODISCARD
inline wstring
to_wstring(long long __val)
- { return __gnu_cxx::__to_xstring<wstring>(&std::vswprintf,
- 4 * sizeof(long long),
- L"%lld", __val); }
+ { return std::__to_wstring_numeric(std::to_string(__val)); }
_GLIBCXX_NODISCARD
inline wstring
to_wstring(unsigned long long __val)
- { return __gnu_cxx::__to_xstring<wstring>(&std::vswprintf,
- 4 * sizeof(unsigned long long),
- L"%llu", __val); }
+ { return std::__to_wstring_numeric(std::to_string(__val)); }
+#if _GLIBCXX_USE_C99_STDIO
_GLIBCXX_NODISCARD
inline wstring
to_wstring(float __val)
- {
- const int __n =
- __gnu_cxx::__numeric_traits<float>::__max_exponent10 + 20;
- return __gnu_cxx::__to_xstring<wstring>(&std::vswprintf, __n,
- L"%f", __val);
- }
+ { return std::__to_wstring_numeric(std::to_string(__val)); }
_GLIBCXX_NODISCARD
inline wstring
to_wstring(double __val)
- {
- const int __n =
- __gnu_cxx::__numeric_traits<double>::__max_exponent10 + 20;
- return __gnu_cxx::__to_xstring<wstring>(&std::vswprintf, __n,
- L"%f", __val);
- }
+ { return std::__to_wstring_numeric(std::to_string(__val)); }
_GLIBCXX_NODISCARD
inline wstring
to_wstring(long double __val)
- {
- const int __n =
- __gnu_cxx::__numeric_traits<long double>::__max_exponent10 + 20;
- return __gnu_cxx::__to_xstring<wstring>(&std::vswprintf, __n,
- L"%Lf", __val);
- }
-#endif // _GLIBCXX_HAVE_BROKEN_VSWPRINTF
-#endif // _GLIBCXX_USE_WCHAR_T && _GLIBCXX_USE_C99_WCHAR
+ { return std::__to_wstring_numeric(std::to_string(__val)); }
+#endif
+#endif // _GLIBCXX_USE_WCHAR_T
_GLIBCXX_END_NAMESPACE_CXX11
_GLIBCXX_END_NAMESPACE_VERSION
diff --git a/libstdc++-v3/testsuite/21_strings/basic_string/numeric_conversions/char/to_string_int.cc b/libstdc++-v3/testsuite/21_strings/basic_string/numeric_conversions/char/to_string_int.cc
index 53e6e81f93c..eae983cebb4 100644
--- a/libstdc++-v3/testsuite/21_strings/basic_string/numeric_conversions/char/to_string_int.cc
+++ b/libstdc++-v3/testsuite/21_strings/basic_string/numeric_conversions/char/to_string_int.cc
@@ -104,8 +104,6 @@ template<typename T>
const std::string expected = test::to_string(val);
VERIFY( s == expected );
VERIFY( s[s.size()] == '\0' ); // null-terminator not overwritten!
- if (s.size() > empty_string_capacity)
- VERIFY( s.capacity() == s.size() ); // GNU-specific guarantee
}
#ifdef SIMULATOR_TEST
--
2.41.0
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-08-17 20:31 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-17 20:30 [committed] libstdc++: Optimize std::to_string using std::string::resize_and_overwrite Jonathan Wakely
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).