From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) by sourceware.org (Postfix) with ESMTPS id ED01B385DC3B for ; Thu, 17 Aug 2023 20:31:02 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org ED01B385DC3B Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=redhat.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=redhat.com DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1692304262; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding; bh=aeiKPbrs9FvhYqKwHv1o9d2LEd4aVlRmmu0aMjBi7LQ=; b=HOc5nf41NiA8/OlSIP1sEGljjo/ecFSbc8WeXWSX8y9CutZgoLDOSil8ZKtceUfpvpPHyH 359Bnw0p3uKJQBAwAeprmIq8HxWdDaQNz9l3mBB4RKqZ0MfsgUxhxGMoYeyLyikV8NGagU xxlLxRPT3tg1NLIl7YlTaUPjrqUJ8D0= Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-615-7XM1hjutMX-jDppRpBHF4w-1; Thu, 17 Aug 2023 16:31:00 -0400 X-MC-Unique: 7XM1hjutMX-jDppRpBHF4w-1 Received: from smtp.corp.redhat.com (int-mx07.intmail.prod.int.rdu2.redhat.com [10.11.54.7]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 4CC29101A528; Thu, 17 Aug 2023 20:31:00 +0000 (UTC) Received: from localhost (unknown [10.42.28.201]) by smtp.corp.redhat.com (Postfix) with ESMTP id 14C9A140E950; Thu, 17 Aug 2023 20:30:59 +0000 (UTC) From: Jonathan Wakely To: libstdc++@gcc.gnu.org, gcc-patches@gcc.gnu.org Subject: [committed] libstdc++: Optimize std::to_string using std::string::resize_and_overwrite Date: Thu, 17 Aug 2023 21:30:53 +0100 Message-ID: <20230817203100.1131311-1-jwakely@redhat.com> MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.7 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Type: text/plain Content-Transfer-Encoding: 8bit X-Spam-Status: No, score=-11.8 required=5.0 tests=BAYES_00,DKIMWL_WL_HIGH,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,GIT_PATCH_0,RCVD_IN_DNSWL_NONE,RCVD_IN_MSPIKE_H4,RCVD_IN_MSPIKE_WL,SPF_HELO_NONE,SPF_NONE,TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: Tested x86_64-linux. Pushed to trunk. -- >8 -- This uses std::string::__resize_and_overwrite to avoid initializing the string buffer with characters that are immediately overwritten. This results in about 6% better performance for the std_to_string case in int-benchmark.cc from https://github.com/fmtlib/format-benchmark This requires a change to a testcase. The previous implementation guaranteed that the string returned from std::to_string(integral-type) would have no excess capacity, because it was constructed with the correct length. The new implementation constructs an empty string and then resizes it with resize_and_overwrite, which over-allocates. This means that the "no-excess capacity" guarantee no longer holds. We can also greatly improve the performance of std::to_wstring by using std::to_string and then widening it with a new helper function, instead of using std::swprintf to do the formatting. libstdc++-v3/ChangeLog: * include/bits/basic_string.h (to_string(integral-type)): Use resize_and_overwrite when available. (__to_wstring_numeric): New helper functions. (to_wstring): Use std::to_string then __to_wstring_numeric. * testsuite/21_strings/basic_string/numeric_conversions/char/to_string_int.cc: Remove check for no excess capacity. --- libstdc++-v3/include/bits/basic_string.h | 173 +++++++++++++----- .../numeric_conversions/char/to_string_int.cc | 2 - 2 files changed, 123 insertions(+), 52 deletions(-) diff --git a/libstdc++-v3/include/bits/basic_string.h b/libstdc++-v3/include/bits/basic_string.h index e6f94640150..46326d02597 100644 --- a/libstdc++-v3/include/bits/basic_string.h +++ b/libstdc++-v3/include/bits/basic_string.h @@ -4197,8 +4197,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 const bool __neg = __val < 0; const unsigned __uval = __neg ? (unsigned)~__val + 1u : __val; const auto __len = __detail::__to_chars_len(__uval); - string __str(__neg + __len, '-'); - __detail::__to_chars_10_impl(&__str[__neg], __len, __uval); + string __str; + __str.__resize_and_overwrite(__neg + __len, [=](char* __p, size_t __n) { + __p[0] = '-'; + __detail::__to_chars_10_impl(__p + (int)__neg, __len, __uval); + return __n; + }); return __str; } @@ -4209,8 +4213,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 noexcept // any 32-bit value fits in the SSO buffer #endif { - string __str(__detail::__to_chars_len(__val), '\0'); - __detail::__to_chars_10_impl(&__str[0], __str.size(), __val); + const auto __len = __detail::__to_chars_len(__val); + string __str; + __str.__resize_and_overwrite(__len, [__val](char* __p, size_t __n) { + __detail::__to_chars_10_impl(__p, __n, __val); + return __n; + }); return __str; } @@ -4224,8 +4232,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 const bool __neg = __val < 0; const unsigned long __uval = __neg ? (unsigned long)~__val + 1ul : __val; const auto __len = __detail::__to_chars_len(__uval); - string __str(__neg + __len, '-'); - __detail::__to_chars_10_impl(&__str[__neg], __len, __uval); + string __str; + __str.__resize_and_overwrite(__neg + __len, [=](char* __p, size_t __n) { + __p[0] = '-'; + __detail::__to_chars_10_impl(__p + (int)__neg, __len, __uval); + return __n; + }); return __str; } @@ -4236,8 +4248,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 noexcept // any 32-bit value fits in the SSO buffer #endif { - string __str(__detail::__to_chars_len(__val), '\0'); - __detail::__to_chars_10_impl(&__str[0], __str.size(), __val); + const auto __len = __detail::__to_chars_len(__val); + string __str; + __str.__resize_and_overwrite(__len, [__val](char* __p, size_t __n) { + __detail::__to_chars_10_impl(__p, __n, __val); + return __n; + }); return __str; } @@ -4249,8 +4265,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 const unsigned long long __uval = __neg ? (unsigned long long)~__val + 1ull : __val; const auto __len = __detail::__to_chars_len(__uval); - string __str(__neg + __len, '-'); - __detail::__to_chars_10_impl(&__str[__neg], __len, __uval); + string __str; + __str.__resize_and_overwrite(__neg + __len, [=](char* __p, size_t __n) { + __p[0] = '-'; + __detail::__to_chars_10_impl(__p + (int)__neg, __len, __uval); + return __n; + }); return __str; } @@ -4258,8 +4278,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 inline string to_string(unsigned long long __val) { - string __str(__detail::__to_chars_len(__val), '\0'); - __detail::__to_chars_10_impl(&__str[0], __str.size(), __val); + const auto __len = __detail::__to_chars_len(__val); + string __str; + __str.__resize_and_overwrite(__len, [__val](char* __p, size_t __n) { + __detail::__to_chars_10_impl(__p, __n, __val); + return __n; + }); return __str; } @@ -4335,80 +4359,129 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 inline long double stold(const wstring& __str, size_t* __idx = 0) { return __gnu_cxx::__stoa(&std::wcstold, "stold", __str.c_str(), __idx); } +#endif + +#ifdef _GLIBCXX_USE_WCHAR_T +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wc++17-extensions" + _GLIBCXX20_CONSTEXPR + inline void + __to_wstring_numeric(const char* __s, int __len, wchar_t* __wout) + { + // This condition is true if exec-charset and wide-exec-charset share the + // same values for the ASCII subset or the EBCDIC invariant character set. + if constexpr (wchar_t('0') == L'0' && wchar_t('-') == L'-' + && wchar_t('.') == L'.' && wchar_t('e') == L'e') + { + for (int __i = 0; __i < __len; ++__i) + __wout[__i] = (wchar_t) __s[__i]; + } + else + { + wchar_t __wc[256]; + for (int __i = '0'; __i <= '9'; ++__i) + __wc[__i] = L'0' + __i; + __wc['.'] = L'.'; + __wc['+'] = L'+'; + __wc['-'] = L'-'; + __wc['a'] = L'a'; + __wc['b'] = L'b'; + __wc['c'] = L'c'; + __wc['d'] = L'd'; + __wc['e'] = L'e'; + __wc['f'] = L'f'; + __wc['n'] = L'n'; // for "nan" and "inf" + __wc['p'] = L'p'; // for hexfloats "0x1p1" + __wc['x'] = L'x'; + __wc['A'] = L'A'; + __wc['B'] = L'B'; + __wc['C'] = L'C'; + __wc['D'] = L'D'; + __wc['E'] = L'E'; + __wc['F'] = L'F'; + __wc['N'] = L'N'; + __wc['P'] = L'P'; + __wc['X'] = L'X'; + + for (int __i = 0; __i < __len; ++__i) + __wout[__i] = __wc[(int)__s[__i]]; + } + } + +#if __cpp_lib_constexpr_string >= 201907L + constexpr +#endif + inline wstring +#if __cplusplus >= 201703L + __to_wstring_numeric(string_view __s) +#else + __to_wstring_numeric(const string& __s) +#endif + { + if constexpr (wchar_t('0') == L'0' && wchar_t('-') == L'-' + && wchar_t('.') == L'.' && wchar_t('e') == L'e') + return wstring(__s.data(), __s.data() + __s.size()); + else + { + wstring __ws; + auto __f = __s.data(); + __ws.__resize_and_overwrite(__s.size(), + [__f] (wchar_t* __to, int __n) { + std::__to_wstring_numeric(__f, __n, __to); + return __n; + }); + return __ws; + } + } +#pragma GCC diagnostic pop -#ifndef _GLIBCXX_HAVE_BROKEN_VSWPRINTF - // DR 1261. _GLIBCXX_NODISCARD inline wstring to_wstring(int __val) - { return __gnu_cxx::__to_xstring(&std::vswprintf, 4 * sizeof(int), - L"%d", __val); } + { return std::__to_wstring_numeric(std::to_string(__val)); } _GLIBCXX_NODISCARD inline wstring to_wstring(unsigned __val) - { return __gnu_cxx::__to_xstring(&std::vswprintf, - 4 * sizeof(unsigned), - L"%u", __val); } + { return std::__to_wstring_numeric(std::to_string(__val)); } _GLIBCXX_NODISCARD inline wstring to_wstring(long __val) - { return __gnu_cxx::__to_xstring(&std::vswprintf, 4 * sizeof(long), - L"%ld", __val); } + { return std::__to_wstring_numeric(std::to_string(__val)); } _GLIBCXX_NODISCARD inline wstring to_wstring(unsigned long __val) - { return __gnu_cxx::__to_xstring(&std::vswprintf, - 4 * sizeof(unsigned long), - L"%lu", __val); } + { return std::__to_wstring_numeric(std::to_string(__val)); } _GLIBCXX_NODISCARD inline wstring to_wstring(long long __val) - { return __gnu_cxx::__to_xstring(&std::vswprintf, - 4 * sizeof(long long), - L"%lld", __val); } + { return std::__to_wstring_numeric(std::to_string(__val)); } _GLIBCXX_NODISCARD inline wstring to_wstring(unsigned long long __val) - { return __gnu_cxx::__to_xstring(&std::vswprintf, - 4 * sizeof(unsigned long long), - L"%llu", __val); } + { return std::__to_wstring_numeric(std::to_string(__val)); } +#if _GLIBCXX_USE_C99_STDIO _GLIBCXX_NODISCARD inline wstring to_wstring(float __val) - { - const int __n = - __gnu_cxx::__numeric_traits::__max_exponent10 + 20; - return __gnu_cxx::__to_xstring(&std::vswprintf, __n, - L"%f", __val); - } + { return std::__to_wstring_numeric(std::to_string(__val)); } _GLIBCXX_NODISCARD inline wstring to_wstring(double __val) - { - const int __n = - __gnu_cxx::__numeric_traits::__max_exponent10 + 20; - return __gnu_cxx::__to_xstring(&std::vswprintf, __n, - L"%f", __val); - } + { return std::__to_wstring_numeric(std::to_string(__val)); } _GLIBCXX_NODISCARD inline wstring to_wstring(long double __val) - { - const int __n = - __gnu_cxx::__numeric_traits::__max_exponent10 + 20; - return __gnu_cxx::__to_xstring(&std::vswprintf, __n, - L"%Lf", __val); - } -#endif // _GLIBCXX_HAVE_BROKEN_VSWPRINTF -#endif // _GLIBCXX_USE_WCHAR_T && _GLIBCXX_USE_C99_WCHAR + { return std::__to_wstring_numeric(std::to_string(__val)); } +#endif +#endif // _GLIBCXX_USE_WCHAR_T _GLIBCXX_END_NAMESPACE_CXX11 _GLIBCXX_END_NAMESPACE_VERSION diff --git a/libstdc++-v3/testsuite/21_strings/basic_string/numeric_conversions/char/to_string_int.cc b/libstdc++-v3/testsuite/21_strings/basic_string/numeric_conversions/char/to_string_int.cc index 53e6e81f93c..eae983cebb4 100644 --- a/libstdc++-v3/testsuite/21_strings/basic_string/numeric_conversions/char/to_string_int.cc +++ b/libstdc++-v3/testsuite/21_strings/basic_string/numeric_conversions/char/to_string_int.cc @@ -104,8 +104,6 @@ template const std::string expected = test::to_string(val); VERIFY( s == expected ); VERIFY( s[s.size()] == '\0' ); // null-terminator not overwritten! - if (s.size() > empty_string_capacity) - VERIFY( s.capacity() == s.size() ); // GNU-specific guarantee } #ifdef SIMULATOR_TEST -- 2.41.0