From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from nikam.ms.mff.cuni.cz (nikam.ms.mff.cuni.cz [195.113.20.16]) by sourceware.org (Postfix) with ESMTPS id F0DB83858D33; Tue, 21 Nov 2023 12:50:46 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org F0DB83858D33 Authentication-Results: sourceware.org; dmarc=fail (p=none dis=none) header.from=ucw.cz Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=kam.mff.cuni.cz ARC-Filter: OpenARC Filter v1.0.0 sourceware.org F0DB83858D33 Authentication-Results: server2.sourceware.org; arc=none smtp.remote-ip=195.113.20.16 ARC-Seal: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1700571048; cv=none; b=ZK0q1hF7sBkjtvC3FrsZJZzLl6kjNUJsRrnEF2YB7aybEA2+PkftGA2+JtSz3shoNqCT+29+e1P6or2k4rl//BaH8m5XDxCHT0qI/8sWRuax61KUujwQlACXR2a0rkZxD8Kh2OOv1f5rgAnp6qnNXL+I1D5jq9E5ahXUL8iunOI= ARC-Message-Signature: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1700571048; c=relaxed/simple; bh=uoaNX1Fcp2yCHZoBBkR4cA1cg6ugpnAW3eDizafT7SQ=; h=DKIM-Signature:Date:From:To:Subject:Message-ID:MIME-Version; b=iqR4S0d/Djf+omtDOWVWpr3e8W5xXX8kGV379PdDFpegbq3e1KGE8CQuYQaGKYyValEnOUAfe2mtUo/RcSX9DypknHck4cxoxha/iL90XxnxQQlPvW7Xptmuuvw2zE6XkaPPS/rODjNA7oj/MsLU2yTKOHIVAYso1e97tLKIqKU= ARC-Authentication-Results: i=1; server2.sourceware.org Received: by nikam.ms.mff.cuni.cz (Postfix, from userid 16202) id 7E19B28B8A3; Tue, 21 Nov 2023 13:50:45 +0100 (CET) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=ucw.cz; s=gen1; t=1700571045; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: in-reply-to:in-reply-to:references:references; bh=VtNZ4gO6RWZ2g1aqFIerf+iKkVr7QGjxAbaL5X9Tyn8=; b=G3vYDuljZx1qASNhF/pImExj6FWquskwcjQuLune9azcYF6ajrRgGF0aWFgn/o6BJnplYr C39e7KoaLN7cO6dNaI/xCPbH1p8PV9gspJwRAf8QQISykzCU8bzqO0xmLVSEoFwb4sDdYG KceIDPG5MMneWdGgzvEIpbSFxDWTytM= Date: Tue, 21 Nov 2023 13:50:45 +0100 From: Jan Hubicka To: Jonathan Wakely Cc: libstdc++@gcc.gnu.org, gcc-patches@gcc.gnu.org Subject: Re: libstdc++: Speed up push_back Message-ID: References: MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: X-Spam-Status: No, score=-11.0 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,GIT_PATCH_0,HEADER_FROM_DIFFERENT_DOMAINS,JMQ_SPF_NEUTRAL,RCVD_IN_MSPIKE_H3,RCVD_IN_MSPIKE_WL,SPF_HELO_NONE,SPF_PASS,TXREP,T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: > > + // RAII type to destroy initialized elements. > > There's only one initialized element, not "elements". > > > + struct _Guard_elts > > + { > > + pointer _M_first, _M_last; // Elements to destroy > > We only need to store one pointer here, call it _M_p. > > > + _Tp_alloc_type& _M_alloc; > > + > > + _GLIBCXX20_CONSTEXPR > > + _Guard_elts(pointer __elt, _Tp_alloc_type& __a) > > + : _M_first(__elt), _M_last(__elt + 1), _M_alloc(__a) > > + { } > > + > > + _GLIBCXX20_CONSTEXPR > > + ~_Guard_elts() > > + { std::_Destroy(_M_first, _M_last, _M_alloc); } > > This should be either: > > std::_Destroy(_M_p, _M_p+1, _M_alloc); > > or avoid the loop that happens in that _Destroy function: > > _Alloc_traits::destroy(_M_alloc, _M_p); > > > + > > + private: > > + _Guard_elts(const _Guard_elts&); > > + }; > > + > > + // Guard the new element so it will be destroyed if anything throws. > > + _Guard_elts __guard_elts(__new_start + __elems, _M_impl); > > + > > + __new_finish = std::__uninitialized_move_if_noexcept_a( > > + __old_start, __old_finish, > > + __new_start, _M_get_Tp_allocator()); > > + > > + ++__new_finish; > > + // Guard everything before the new element too. > > + __guard_elts._M_first = __new_start; > > This seems redundant, we're not doing any more insertions now, and so > this store is dead. I am attaching patch with this check removed. However I still think Guard_elts needs to stay to be able to destroy the old ellements > > > + > > + // New storage has been fully initialized, destroy the old elements. > > + __guard_elts._M_first = __old_start; > > + __guard_elts._M_last = __old_finish; ... here Does it look better? (I am not really that confidend with libstdc++). I also was wondering if we do have some data on what libstdc++ functions are perofrmance critical in practice. Given that the push_back can be sped up very noticeably, I wonder if we don't have problems elsewhere? Thank you, Honza diff --git a/libstdc++-v3/include/bits/stl_vector.h b/libstdc++-v3/include/bits/stl_vector.h index 5e18f6eedce..973f4d7e2e9 100644 --- a/libstdc++-v3/include/bits/stl_vector.h +++ b/libstdc++-v3/include/bits/stl_vector.h @@ -1288,7 +1288,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER _GLIBCXX_ASAN_ANNOTATE_GREW(1); } else - _M_realloc_insert(end(), __x); + _M_realloc_append(__x); } #if __cplusplus >= 201103L @@ -1822,6 +1822,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER void _M_realloc_insert(iterator __position, const value_type& __x); + + void + _M_realloc_append(const value_type& __x); #else // A value_type object constructed with _Alloc_traits::construct() // and destroyed with _Alloc_traits::destroy(). @@ -1871,6 +1874,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER void _M_realloc_insert(iterator __position, _Args&&... __args); + template + _GLIBCXX20_CONSTEXPR + void + _M_realloc_append(_Args&&... __args); + // Either move-construct at the end, or forward to _M_insert_aux. _GLIBCXX20_CONSTEXPR iterator diff --git a/libstdc++-v3/include/bits/vector.tcc b/libstdc++-v3/include/bits/vector.tcc index 80631d1e2a1..0ccef7911b3 100644 --- a/libstdc++-v3/include/bits/vector.tcc +++ b/libstdc++-v3/include/bits/vector.tcc @@ -120,7 +120,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER _GLIBCXX_ASAN_ANNOTATE_GREW(1); } else - _M_realloc_insert(end(), std::forward<_Args>(__args)...); + _M_realloc_append(std::forward<_Args>(__args)...); #if __cplusplus > 201402L return back(); #endif @@ -459,6 +459,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER #endif { const size_type __len = _M_check_len(1u, "vector::_M_realloc_insert"); + if (__len <= 0) + __builtin_unreachable (); pointer __old_start = this->_M_impl._M_start; pointer __old_finish = this->_M_impl._M_finish; const size_type __elems_before = __position - begin(); @@ -571,6 +573,127 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER this->_M_impl._M_end_of_storage = __new_start + __len; } +#if __cplusplus >= 201103L + template + template + _GLIBCXX20_CONSTEXPR + void + vector<_Tp, _Alloc>:: + _M_realloc_append(_Args&&... __args) +#else + template + void + vector<_Tp, _Alloc>:: + _M_realloc_append(const _Tp& __x) +#endif + { + const size_type __len = _M_check_len(1u, "vector::_M_realloc_append"); + if (__len <= 0) + __builtin_unreachable (); + pointer __old_start = this->_M_impl._M_start; + pointer __old_finish = this->_M_impl._M_finish; + const size_type __elems = end() - begin(); + pointer __new_start(this->_M_allocate(__len)); + pointer __new_finish(__new_start); + + // RAII guard for allocated storage. + struct _Guard + { + pointer _M_storage; // Storage to deallocate + size_type _M_len; + _Tp_alloc_type& _M_alloc; + + _GLIBCXX20_CONSTEXPR + _Guard(pointer __s, size_type __l, _Tp_alloc_type& __a) + : _M_storage(__s), _M_len(__l), _M_alloc(__a) + { } + + _GLIBCXX20_CONSTEXPR + ~_Guard() + { + if (_M_storage) + __gnu_cxx::__alloc_traits<_Tp_alloc_type>:: + deallocate(_M_alloc, _M_storage, _M_len); + } + + private: + _Guard(const _Guard&); + }; + + { + _Guard __guard(__new_start, __len, _M_impl); + + // The order of the three operations is dictated by the C++11 + // case, where the moves could alter a new element belonging + // to the existing vector. This is an issue only for callers + // taking the element by lvalue ref (see last bullet of C++11 + // [res.on.arguments]). + + // If this throws, the existing elements are unchanged. +#if __cplusplus >= 201103L + _Alloc_traits::construct(this->_M_impl, + std::__to_address(__new_start + __elems), + std::forward<_Args>(__args)...); +#else + _Alloc_traits::construct(this->_M_impl, + __new_start + __elems, + __x); +#endif + +#if __cplusplus >= 201103L + if _GLIBCXX17_CONSTEXPR (_S_use_relocate()) + { + // Relocation cannot throw. + __new_finish = _S_relocate(__old_start, __old_finish, + __new_start, _M_get_Tp_allocator()); + ++__new_finish; + } + else +#endif + { + // RAII type to destroy initialized elements. + struct _Guard_elts + { + pointer _M_first, _M_last; // Elements to destroy + _Tp_alloc_type& _M_alloc; + + _GLIBCXX20_CONSTEXPR + _Guard_elts(pointer __elt, _Tp_alloc_type& __a) + : _M_first(__elt), _M_last(__elt + 1), _M_alloc(__a) + { } + + _GLIBCXX20_CONSTEXPR + ~_Guard_elts() + { std::_Destroy(_M_first, _M_last, _M_alloc); } + + private: + _Guard_elts(const _Guard_elts&); + }; + + // Guard the new element so it will be destroyed if anything throws. + _Guard_elts __guard_elts(__new_start + __elems, _M_impl); + + __new_finish = std::__uninitialized_move_if_noexcept_a( + __old_start, __old_finish, + __new_start, _M_get_Tp_allocator()); + + ++__new_finish; + + // New storage has been fully initialized, destroy the old elements. + __guard_elts._M_first = __old_start; + __guard_elts._M_last = __old_finish; + } + __guard._M_storage = __old_start; + __guard._M_len = this->_M_impl._M_end_of_storage - __old_start; + } + // deallocate should be called before assignments to _M_impl, + // to avoid call-clobbering + + this->_M_impl._M_start = __new_start; + this->_M_impl._M_finish = __new_finish; + this->_M_impl._M_end_of_storage = __new_start + __len; + } + template _GLIBCXX20_CONSTEXPR void