public inbox for libstdc++@gcc.gnu.org
 help / color / mirror / Atom feed
* [committed] libstdc++: Optimize std::to_string using std::string::resize_and_overwrite
@ 2023-08-17 20:30 Jonathan Wakely
  0 siblings, 0 replies; only message in thread
From: Jonathan Wakely @ 2023-08-17 20:30 UTC (permalink / raw)
  To: libstdc++, gcc-patches

Tested x86_64-linux. Pushed to trunk.

-- >8 --

This uses std::string::__resize_and_overwrite to avoid initializing the
string buffer with characters that are immediately overwritten. This
results in about 6% better performance for the std_to_string case in
int-benchmark.cc from https://github.com/fmtlib/format-benchmark

This requires a change to a testcase. The previous implementation
guaranteed that the string returned from std::to_string(integral-type)
would have no excess capacity, because it was constructed with the
correct length. The new implementation constructs an empty string and
then resizes it with resize_and_overwrite, which over-allocates. This
means that the "no-excess capacity" guarantee no longer holds.

We can also greatly improve the performance of std::to_wstring by using
std::to_string and then widening it with a new helper function, instead
of using std::swprintf to do the formatting.

libstdc++-v3/ChangeLog:

	* include/bits/basic_string.h (to_string(integral-type)): Use
	resize_and_overwrite when available.
	(__to_wstring_numeric): New helper functions.
	(to_wstring): Use std::to_string then __to_wstring_numeric.
	* testsuite/21_strings/basic_string/numeric_conversions/char/to_string_int.cc:
	Remove check for no excess capacity.
---
 libstdc++-v3/include/bits/basic_string.h      | 173 +++++++++++++-----
 .../numeric_conversions/char/to_string_int.cc |   2 -
 2 files changed, 123 insertions(+), 52 deletions(-)

diff --git a/libstdc++-v3/include/bits/basic_string.h b/libstdc++-v3/include/bits/basic_string.h
index e6f94640150..46326d02597 100644
--- a/libstdc++-v3/include/bits/basic_string.h
+++ b/libstdc++-v3/include/bits/basic_string.h
@@ -4197,8 +4197,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
     const bool __neg = __val < 0;
     const unsigned __uval = __neg ? (unsigned)~__val + 1u : __val;
     const auto __len = __detail::__to_chars_len(__uval);
-    string __str(__neg + __len, '-');
-    __detail::__to_chars_10_impl(&__str[__neg], __len, __uval);
+    string __str;
+    __str.__resize_and_overwrite(__neg + __len, [=](char* __p, size_t __n) {
+      __p[0] = '-';
+      __detail::__to_chars_10_impl(__p + (int)__neg, __len, __uval);
+      return __n;
+    });
     return __str;
   }
 
@@ -4209,8 +4213,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   noexcept // any 32-bit value fits in the SSO buffer
 #endif
   {
-    string __str(__detail::__to_chars_len(__val), '\0');
-    __detail::__to_chars_10_impl(&__str[0], __str.size(), __val);
+    const auto __len = __detail::__to_chars_len(__val);
+    string __str;
+    __str.__resize_and_overwrite(__len, [__val](char* __p, size_t __n) {
+      __detail::__to_chars_10_impl(__p, __n, __val);
+      return __n;
+    });
     return __str;
   }
 
@@ -4224,8 +4232,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
     const bool __neg = __val < 0;
     const unsigned long __uval = __neg ? (unsigned long)~__val + 1ul : __val;
     const auto __len = __detail::__to_chars_len(__uval);
-    string __str(__neg + __len, '-');
-    __detail::__to_chars_10_impl(&__str[__neg], __len, __uval);
+    string __str;
+    __str.__resize_and_overwrite(__neg + __len, [=](char* __p, size_t __n) {
+      __p[0] = '-';
+      __detail::__to_chars_10_impl(__p + (int)__neg, __len, __uval);
+      return __n;
+    });
     return __str;
   }
 
@@ -4236,8 +4248,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   noexcept // any 32-bit value fits in the SSO buffer
 #endif
   {
-    string __str(__detail::__to_chars_len(__val), '\0');
-    __detail::__to_chars_10_impl(&__str[0], __str.size(), __val);
+    const auto __len = __detail::__to_chars_len(__val);
+    string __str;
+    __str.__resize_and_overwrite(__len, [__val](char* __p, size_t __n) {
+      __detail::__to_chars_10_impl(__p, __n, __val);
+      return __n;
+    });
     return __str;
   }
 
@@ -4249,8 +4265,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
     const unsigned long long __uval
       = __neg ? (unsigned long long)~__val + 1ull : __val;
     const auto __len = __detail::__to_chars_len(__uval);
-    string __str(__neg + __len, '-');
-    __detail::__to_chars_10_impl(&__str[__neg], __len, __uval);
+    string __str;
+    __str.__resize_and_overwrite(__neg + __len, [=](char* __p, size_t __n) {
+      __p[0] = '-';
+      __detail::__to_chars_10_impl(__p + (int)__neg, __len, __uval);
+      return __n;
+    });
     return __str;
   }
 
@@ -4258,8 +4278,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   inline string
   to_string(unsigned long long __val)
   {
-    string __str(__detail::__to_chars_len(__val), '\0');
-    __detail::__to_chars_10_impl(&__str[0], __str.size(), __val);
+    const auto __len = __detail::__to_chars_len(__val);
+    string __str;
+    __str.__resize_and_overwrite(__len, [__val](char* __p, size_t __n) {
+      __detail::__to_chars_10_impl(__p, __n, __val);
+      return __n;
+    });
     return __str;
   }
 
@@ -4335,80 +4359,129 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   inline long double
   stold(const wstring& __str, size_t* __idx = 0)
   { return __gnu_cxx::__stoa(&std::wcstold, "stold", __str.c_str(), __idx); }
+#endif
+
+#ifdef _GLIBCXX_USE_WCHAR_T
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wc++17-extensions"
+  _GLIBCXX20_CONSTEXPR
+  inline void
+  __to_wstring_numeric(const char* __s, int __len, wchar_t* __wout)
+  {
+    // This condition is true if exec-charset and wide-exec-charset share the
+    // same values for the ASCII subset or the EBCDIC invariant character set.
+    if constexpr (wchar_t('0') == L'0' && wchar_t('-') == L'-'
+		    && wchar_t('.') == L'.' && wchar_t('e') == L'e')
+      {
+	for (int __i = 0; __i < __len; ++__i)
+	  __wout[__i] = (wchar_t) __s[__i];
+      }
+    else
+      {
+	wchar_t __wc[256];
+	for (int __i = '0'; __i <= '9'; ++__i)
+	  __wc[__i] = L'0' + __i;
+	__wc['.'] = L'.';
+	__wc['+'] = L'+';
+	__wc['-'] = L'-';
+	__wc['a'] = L'a';
+	__wc['b'] = L'b';
+	__wc['c'] = L'c';
+	__wc['d'] = L'd';
+	__wc['e'] = L'e';
+	__wc['f'] = L'f';
+	__wc['n'] = L'n'; // for "nan" and "inf"
+	__wc['p'] = L'p'; // for hexfloats "0x1p1"
+	__wc['x'] = L'x';
+	__wc['A'] = L'A';
+	__wc['B'] = L'B';
+	__wc['C'] = L'C';
+	__wc['D'] = L'D';
+	__wc['E'] = L'E';
+	__wc['F'] = L'F';
+	__wc['N'] = L'N';
+	__wc['P'] = L'P';
+	__wc['X'] = L'X';
+
+	for (int __i = 0; __i < __len; ++__i)
+	  __wout[__i] = __wc[(int)__s[__i]];
+      }
+  }
+
+#if __cpp_lib_constexpr_string >= 201907L
+  constexpr
+#endif
+  inline wstring
+#if __cplusplus >= 201703L
+  __to_wstring_numeric(string_view __s)
+#else
+  __to_wstring_numeric(const string& __s)
+#endif
+  {
+    if constexpr (wchar_t('0') == L'0' && wchar_t('-') == L'-'
+		    && wchar_t('.') == L'.' && wchar_t('e') == L'e')
+      return wstring(__s.data(), __s.data() + __s.size());
+    else
+      {
+	wstring __ws;
+	auto __f = __s.data();
+	__ws.__resize_and_overwrite(__s.size(),
+				    [__f] (wchar_t* __to, int __n) {
+				      std::__to_wstring_numeric(__f, __n, __to);
+				      return __n;
+				    });
+	return __ws;
+      }
+  }
+#pragma GCC diagnostic pop
 
-#ifndef _GLIBCXX_HAVE_BROKEN_VSWPRINTF
-  // DR 1261.
   _GLIBCXX_NODISCARD
   inline wstring
   to_wstring(int __val)
-  { return __gnu_cxx::__to_xstring<wstring>(&std::vswprintf, 4 * sizeof(int),
-					    L"%d", __val); }
+  { return std::__to_wstring_numeric(std::to_string(__val)); }
 
   _GLIBCXX_NODISCARD
   inline wstring
   to_wstring(unsigned __val)
-  { return __gnu_cxx::__to_xstring<wstring>(&std::vswprintf,
-					    4 * sizeof(unsigned),
-					    L"%u", __val); }
+  { return std::__to_wstring_numeric(std::to_string(__val)); }
 
   _GLIBCXX_NODISCARD
   inline wstring
   to_wstring(long __val)
-  { return __gnu_cxx::__to_xstring<wstring>(&std::vswprintf, 4 * sizeof(long),
-					    L"%ld", __val); }
+  { return std::__to_wstring_numeric(std::to_string(__val)); }
 
   _GLIBCXX_NODISCARD
   inline wstring
   to_wstring(unsigned long __val)
-  { return __gnu_cxx::__to_xstring<wstring>(&std::vswprintf,
-					    4 * sizeof(unsigned long),
-					    L"%lu", __val); }
+  { return std::__to_wstring_numeric(std::to_string(__val)); }
 
   _GLIBCXX_NODISCARD
   inline wstring
   to_wstring(long long __val)
-  { return __gnu_cxx::__to_xstring<wstring>(&std::vswprintf,
-					    4 * sizeof(long long),
-					    L"%lld", __val); }
+  { return std::__to_wstring_numeric(std::to_string(__val)); }
 
   _GLIBCXX_NODISCARD
   inline wstring
   to_wstring(unsigned long long __val)
-  { return __gnu_cxx::__to_xstring<wstring>(&std::vswprintf,
-					    4 * sizeof(unsigned long long),
-					    L"%llu", __val); }
+  { return std::__to_wstring_numeric(std::to_string(__val)); }
 
+#if _GLIBCXX_USE_C99_STDIO
   _GLIBCXX_NODISCARD
   inline wstring
   to_wstring(float __val)
-  {
-    const int __n =
-      __gnu_cxx::__numeric_traits<float>::__max_exponent10 + 20;
-    return __gnu_cxx::__to_xstring<wstring>(&std::vswprintf, __n,
-					    L"%f", __val);
-  }
+  { return std::__to_wstring_numeric(std::to_string(__val)); }
 
   _GLIBCXX_NODISCARD
   inline wstring
   to_wstring(double __val)
-  {
-    const int __n =
-      __gnu_cxx::__numeric_traits<double>::__max_exponent10 + 20;
-    return __gnu_cxx::__to_xstring<wstring>(&std::vswprintf, __n,
-					    L"%f", __val);
-  }
+  { return std::__to_wstring_numeric(std::to_string(__val)); }
 
   _GLIBCXX_NODISCARD
   inline wstring
   to_wstring(long double __val)
-  {
-    const int __n =
-      __gnu_cxx::__numeric_traits<long double>::__max_exponent10 + 20;
-    return __gnu_cxx::__to_xstring<wstring>(&std::vswprintf, __n,
-					    L"%Lf", __val);
-  }
-#endif // _GLIBCXX_HAVE_BROKEN_VSWPRINTF
-#endif // _GLIBCXX_USE_WCHAR_T && _GLIBCXX_USE_C99_WCHAR
+  { return std::__to_wstring_numeric(std::to_string(__val)); }
+#endif
+#endif // _GLIBCXX_USE_WCHAR_T
 
 _GLIBCXX_END_NAMESPACE_CXX11
 _GLIBCXX_END_NAMESPACE_VERSION
diff --git a/libstdc++-v3/testsuite/21_strings/basic_string/numeric_conversions/char/to_string_int.cc b/libstdc++-v3/testsuite/21_strings/basic_string/numeric_conversions/char/to_string_int.cc
index 53e6e81f93c..eae983cebb4 100644
--- a/libstdc++-v3/testsuite/21_strings/basic_string/numeric_conversions/char/to_string_int.cc
+++ b/libstdc++-v3/testsuite/21_strings/basic_string/numeric_conversions/char/to_string_int.cc
@@ -104,8 +104,6 @@ template<typename T>
     const std::string expected = test::to_string(val);
     VERIFY( s == expected );
     VERIFY( s[s.size()] == '\0' ); // null-terminator not overwritten!
-    if (s.size() > empty_string_capacity)
-      VERIFY( s.capacity() == s.size() ); // GNU-specific guarantee
   }
 
 #ifdef SIMULATOR_TEST
-- 
2.41.0


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-08-17 20:31 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-17 20:30 [committed] libstdc++: Optimize std::to_string using std::string::resize_and_overwrite Jonathan Wakely

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).