From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1075) id BA8B23858D35; Tue, 21 Nov 2023 14:17:47 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org BA8B23858D35 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1700576267; bh=WX3QNi62cJu18E3UWJhT1RaNxqsOpZ8OEsN2SgNpiOU=; h=From:To:Subject:Date:From; b=p0cmiVyycJFCY4eDjOJe2Rkqf0YQVD2H36X+AwuGWe6MRxTiqT8EuDiaBAZdyn7Jv hxnrfG+S7urY4STtIlM66NYQoA+6P+Jfcxhx1mf2ljY6ECHMKgH99EvTs7eQKjtvcp DlUZRcFDmY2LLAEpFO00aZSsOL2oxrKeiQbZeH2o= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Jan Hubicka To: gcc-cvs@gcc.gnu.org, libstdc++-cvs@gcc.gnu.org Subject: [gcc r14-5679] optimize std::vector::push_back X-Act-Checkin: gcc X-Git-Author: Jan Hubicka X-Git-Refname: refs/heads/master X-Git-Oldrev: 1250858ac9c1426da06116823bd3e1fca64c7d56 X-Git-Newrev: 1d82fc2e6824bf83159389729c31a942f7b91b04 Message-Id: <20231121141747.BA8B23858D35@sourceware.org> Date: Tue, 21 Nov 2023 14:17:47 +0000 (GMT) List-Id: https://gcc.gnu.org/g:1d82fc2e6824bf83159389729c31a942f7b91b04 commit r14-5679-g1d82fc2e6824bf83159389729c31a942f7b91b04 Author: Jan Hubicka Date: Tue Nov 21 15:17:16 2023 +0100 optimize std::vector::push_back this patch speeds up the push_back at -O3 significantly by making the reallocation to be inlined by default. _M_realloc_insert is general insertion that takes iterator pointing to location where the value should be inserted. As such it contains code to move other entries around that is quite large. Since appending to the end of array is common operation, I think we should have specialized code for that. Sadly it is really hard to work out this from IPA passes, since we basically care whether the iterator points to the same place as the end pointer, which are both passed by reference. This is inter-procedural value numbering that is quite out of reach. I also added extra check making it clear that the new length of the vector is non-zero. This saves extra conditionals. Again it is quite hard case since _M_check_len seem to be able to return 0 if its parameter is 0. This never happens here, but we are not able to propagate this early nor at IPA stage. libstdc++-v3/ChangeLog: PR libstdc++/110287 PR middle-end/109811 PR middle-end/109849 * include/bits/stl_vector.h (_M_realloc_append): New member function. (push_back): Use it. * include/bits/vector.tcc: (emplace_back): Use it. (_M_realloc_insert): Let compiler know that new vector size is non-zero. (_M_realloc_append): New member function. Diff: --- libstdc++-v3/include/bits/stl_vector.h | 10 ++- libstdc++-v3/include/bits/vector.tcc | 125 ++++++++++++++++++++++++++++++++- 2 files changed, 133 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/include/bits/stl_vector.h b/libstdc++-v3/include/bits/stl_vector.h index 5e18f6eedce..973f4d7e2e9 100644 --- a/libstdc++-v3/include/bits/stl_vector.h +++ b/libstdc++-v3/include/bits/stl_vector.h @@ -1288,7 +1288,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER _GLIBCXX_ASAN_ANNOTATE_GREW(1); } else - _M_realloc_insert(end(), __x); + _M_realloc_append(__x); } #if __cplusplus >= 201103L @@ -1822,6 +1822,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER void _M_realloc_insert(iterator __position, const value_type& __x); + + void + _M_realloc_append(const value_type& __x); #else // A value_type object constructed with _Alloc_traits::construct() // and destroyed with _Alloc_traits::destroy(). @@ -1871,6 +1874,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER void _M_realloc_insert(iterator __position, _Args&&... __args); + template + _GLIBCXX20_CONSTEXPR + void + _M_realloc_append(_Args&&... __args); + // Either move-construct at the end, or forward to _M_insert_aux. _GLIBCXX20_CONSTEXPR iterator diff --git a/libstdc++-v3/include/bits/vector.tcc b/libstdc++-v3/include/bits/vector.tcc index 80631d1e2a1..0ccef7911b3 100644 --- a/libstdc++-v3/include/bits/vector.tcc +++ b/libstdc++-v3/include/bits/vector.tcc @@ -120,7 +120,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER _GLIBCXX_ASAN_ANNOTATE_GREW(1); } else - _M_realloc_insert(end(), std::forward<_Args>(__args)...); + _M_realloc_append(std::forward<_Args>(__args)...); #if __cplusplus > 201402L return back(); #endif @@ -459,6 +459,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER #endif { const size_type __len = _M_check_len(1u, "vector::_M_realloc_insert"); + if (__len <= 0) + __builtin_unreachable (); pointer __old_start = this->_M_impl._M_start; pointer __old_finish = this->_M_impl._M_finish; const size_type __elems_before = __position - begin(); @@ -571,6 +573,127 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER this->_M_impl._M_end_of_storage = __new_start + __len; } +#if __cplusplus >= 201103L + template + template + _GLIBCXX20_CONSTEXPR + void + vector<_Tp, _Alloc>:: + _M_realloc_append(_Args&&... __args) +#else + template + void + vector<_Tp, _Alloc>:: + _M_realloc_append(const _Tp& __x) +#endif + { + const size_type __len = _M_check_len(1u, "vector::_M_realloc_append"); + if (__len <= 0) + __builtin_unreachable (); + pointer __old_start = this->_M_impl._M_start; + pointer __old_finish = this->_M_impl._M_finish; + const size_type __elems = end() - begin(); + pointer __new_start(this->_M_allocate(__len)); + pointer __new_finish(__new_start); + + // RAII guard for allocated storage. + struct _Guard + { + pointer _M_storage; // Storage to deallocate + size_type _M_len; + _Tp_alloc_type& _M_alloc; + + _GLIBCXX20_CONSTEXPR + _Guard(pointer __s, size_type __l, _Tp_alloc_type& __a) + : _M_storage(__s), _M_len(__l), _M_alloc(__a) + { } + + _GLIBCXX20_CONSTEXPR + ~_Guard() + { + if (_M_storage) + __gnu_cxx::__alloc_traits<_Tp_alloc_type>:: + deallocate(_M_alloc, _M_storage, _M_len); + } + + private: + _Guard(const _Guard&); + }; + + { + _Guard __guard(__new_start, __len, _M_impl); + + // The order of the three operations is dictated by the C++11 + // case, where the moves could alter a new element belonging + // to the existing vector. This is an issue only for callers + // taking the element by lvalue ref (see last bullet of C++11 + // [res.on.arguments]). + + // If this throws, the existing elements are unchanged. +#if __cplusplus >= 201103L + _Alloc_traits::construct(this->_M_impl, + std::__to_address(__new_start + __elems), + std::forward<_Args>(__args)...); +#else + _Alloc_traits::construct(this->_M_impl, + __new_start + __elems, + __x); +#endif + +#if __cplusplus >= 201103L + if _GLIBCXX17_CONSTEXPR (_S_use_relocate()) + { + // Relocation cannot throw. + __new_finish = _S_relocate(__old_start, __old_finish, + __new_start, _M_get_Tp_allocator()); + ++__new_finish; + } + else +#endif + { + // RAII type to destroy initialized elements. + struct _Guard_elts + { + pointer _M_first, _M_last; // Elements to destroy + _Tp_alloc_type& _M_alloc; + + _GLIBCXX20_CONSTEXPR + _Guard_elts(pointer __elt, _Tp_alloc_type& __a) + : _M_first(__elt), _M_last(__elt + 1), _M_alloc(__a) + { } + + _GLIBCXX20_CONSTEXPR + ~_Guard_elts() + { std::_Destroy(_M_first, _M_last, _M_alloc); } + + private: + _Guard_elts(const _Guard_elts&); + }; + + // Guard the new element so it will be destroyed if anything throws. + _Guard_elts __guard_elts(__new_start + __elems, _M_impl); + + __new_finish = std::__uninitialized_move_if_noexcept_a( + __old_start, __old_finish, + __new_start, _M_get_Tp_allocator()); + + ++__new_finish; + + // New storage has been fully initialized, destroy the old elements. + __guard_elts._M_first = __old_start; + __guard_elts._M_last = __old_finish; + } + __guard._M_storage = __old_start; + __guard._M_len = this->_M_impl._M_end_of_storage - __old_start; + } + // deallocate should be called before assignments to _M_impl, + // to avoid call-clobbering + + this->_M_impl._M_start = __new_start; + this->_M_impl._M_finish = __new_finish; + this->_M_impl._M_end_of_storage = __new_start + __len; + } + template _GLIBCXX20_CONSTEXPR void