From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 48) id A1E163858CDA; Tue, 23 Apr 2024 10:51:15 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org A1E163858CDA DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1713869475; bh=8niK7eqjHVFXOx3OQMX1ZgrAoc+T6eWLfH28JtfDCWk=; h=From:To:Subject:Date:In-Reply-To:References:From; b=GfCf4E1B6PtZwa2qk6AXG8QotMAUlm11Q4wH/2d1ShBK6FfRislEyJPxk1HlAGFuH bMABWmibjXaqO4m3iFm97LIa2nIkM9bTBCbGe8zKw9u/I06VNxRpuxZW30BYehpxzh av7m3AhKd7LNj7QOM68nvybhNJNHsscB5PickxxQ= From: "redi at gcc dot gnu.org" To: gcc-bugs@gcc.gnu.org Subject: [Bug libstdc++/114821] _M_realloc_append should use memcpy instead of loop to copy data when possible Date: Tue, 23 Apr 2024 10:51:15 +0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: changed X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: libstdc++ X-Bugzilla-Version: 14.0 X-Bugzilla-Keywords: X-Bugzilla-Severity: normal X-Bugzilla-Who: redi at gcc dot gnu.org X-Bugzilla-Status: UNCONFIRMED X-Bugzilla-Resolution: X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Flags: X-Bugzilla-Changed-Fields: Message-ID: In-Reply-To: References: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated MIME-Version: 1.0 List-Id: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D114821 --- Comment #4 from Jonathan Wakely --- (In reply to Jan Hubicka from comment #2) > --- a/libstdc++-v3/include/bits/stl_uninitialized.h > +++ b/libstdc++-v3/include/bits/stl_uninitialized.h > @@ -1130,7 +1130,58 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > } > return __result + __count; > } > + > + template > + _GLIBCXX20_CONSTEXPR > + inline __enable_if_t::value, _Tp*> > + __relocate_a(_Tp * __restrict __first, _Tp *__last, > + _Tp * __restrict __result, _Allocator& __alloc) noexcept This is wrong, we can't optimize arbitrary allocators, only when the alloca= tor is std::allocator<_Tp>. That's what the existing code is doing with the indirection from __relocate_a to __relocate_a_1. > + { > + ptrdiff_t __count =3D __last - __first; > + if (__count > 0) > + { > +#ifdef __cpp_lib_is_constant_evaluated > + if (std::is_constant_evaluated()) > + { > + for (; __first !=3D __last; ++__first, (void)++__result) You don't need the (void) case here because __first and __result are both pointers. That's only needed for the generic __relocate_a that deals with arbitrary iterator types. > + { > + // manually inline relocate_object_a to not lose restri= ct > qualifiers We don't care about restrict when is_constant_evaluated is true, we're not optimizing this code. It just gets interpreted at compile time. There is no reason to inline __relocate_object_a for the is_constant_evaluated case. > + typedef std::allocator_traits<_Allocator> __traits; > + __traits::construct(__alloc, __result, > std::move(*__first)); > + __traits::destroy(__alloc, std::__addressof(*__first)); > + } > + return __result; > + } > #endif > + __builtin_memcpy(__result, __first, __count * sizeof(_Tp)); > + } > + return __result + __count; > + } > +#endif > + > + template > + _GLIBCXX20_CONSTEXPR > +#if _GLIBCXX_HOSTED > + inline __enable_if_t::value, _Tp= *> > +#else > + inline _Tp * > +#endif > + __relocate_a(_Tp * __restrict __first, _Tp *__last, > + _Tp * __restrict __result, _Allocator& __alloc) > + noexcept(noexcept(std::allocator_traits<_Allocator>::construct(__all= oc, > + __result, std::move(*__first))) > + && noexcept(std::allocator_traits<_Allocator>::destroy( > + __alloc, std::__addressof(*__first)))) > + { > + for (; __first !=3D __last; ++__first, (void)++__result) > + { > + // manually inline relocate_object_a to not lose restrict > qualifiers > + typedef std::allocator_traits<_Allocator> __traits; > + __traits::construct(__alloc, __result, std::move(*__first)); > + __traits::destroy(__alloc, std::__addressof(*__first)); > + } I don't understand this overload at all. Is this overload the one that actu= ally gets used for your testcase? I think it should be, because std::pair is not bitwise_relocatable. Why does the restrict qualifier get lost if we don't inline relocate_object= _a? That function is restrict-qualified as well. > + return __result; > + } >=20=20 > template typename _Allocator>=