From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) by sourceware.org (Postfix) with ESMTPS id B0453385734F for ; Thu, 26 Oct 2023 10:43:59 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org B0453385734F Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=redhat.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=redhat.com ARC-Filter: OpenARC Filter v1.0.0 sourceware.org B0453385734F Authentication-Results: server2.sourceware.org; arc=none smtp.remote-ip=170.10.129.124 ARC-Seal: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1698317041; cv=none; b=TyOHxIHUBxfvaQMJMAEhCpya2v6UiHkH80Wdmj86xYmOgPO0tze+UpXeYLIjMk5zh4PGu44MuAmoGBhvW+GqRdMmDzKG9oiL4C+RP7vNkE9tFruu+D4IjV4qi1p7ld/ATmqrJIn5zv4b8y4OryzxFFoqSz9al49VbTzPdM8umek= ARC-Message-Signature: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1698317041; c=relaxed/simple; bh=I6GGtwvgvdqi8In0EhFwpy7CVttor4W2MgBNVSmt8jA=; h=DKIM-Signature:Date:From:To:Subject:Message-ID:MIME-Version; b=EErj0K9Wr9HkZH47kC6D86Xt7KXXhh73rzC2XWn3fQrotP+E1d0zIPEb16TYOnNKuBevVUiVbsLjsee0ojU9UjxMuSUP+fFEQ87UgMReFKW4g/onAVlCicrELTDmgGG/zNfmW6OhDx3s8ZO8Xj6cUaWkyWlTRzm+be8MtJpYcas= ARC-Authentication-Results: i=1; server2.sourceware.org DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1698317039; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=XZgB7aj4rtcjlmOl6chQuq55dMjG0iOxEbDYRcaZJDk=; b=iD36V4IxKoxrcxiOOaaDiLQcL9wGfeGXXQZoHs80GakzQSCU713rqn3ZVc4cGUejAbRVWt xb7sq2c2syfH10r/pyrXBNZSKIBLNN4JxnX4xSNbon+tY1/ZVcUtEC0ozXzB03sn3S0Aug jKqW3GtqQ1RAPQ/PYRqruFSppDBcBH4= Received: from mimecast-mx02.redhat.com (mx-ext.redhat.com [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id us-mta-471-SF58hh2OOkmofWZlCP8Icg-1; Thu, 26 Oct 2023 06:43:56 -0400 X-MC-Unique: SF58hh2OOkmofWZlCP8Icg-1 Received: from smtp.corp.redhat.com (int-mx04.intmail.prod.int.rdu2.redhat.com [10.11.54.4]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 1A4062825EB3; Thu, 26 Oct 2023 10:43:56 +0000 (UTC) Received: from localhost (unknown [10.42.28.221]) by smtp.corp.redhat.com (Postfix) with ESMTP id BCE802026D4C; Thu, 26 Oct 2023 10:43:55 +0000 (UTC) Date: Thu, 26 Oct 2023 11:43:55 +0100 From: Jonathan Wakely To: =?iso-8859-1?Q?Fran=E7ois?= Dumont Cc: libstdc++ , gcc-patches Subject: Re: [PATCH][_Hashtable] Use RAII to restore Rehash state Message-ID: References: <7f61df18-dd99-4ff5-9fcd-8ca7820403d4@gmail.com> MIME-Version: 1.0 In-Reply-To: <7f61df18-dd99-4ff5-9fcd-8ca7820403d4@gmail.com> X-Clacks-Overhead: GNU Terry Pratchett X-Scanned-By: MIMEDefang 3.4.1 on 10.11.54.4 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Type: text/plain; charset=iso-8859-1; format=flowed Content-Disposition: inline Content-Transfer-Encoding: 8bit X-Spam-Status: No, score=-10.6 required=5.0 tests=BAYES_00,BODY_8BITS,DKIMWL_WL_HIGH,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,GIT_PATCH_0,RCVD_IN_DNSWL_NONE,RCVD_IN_MSPIKE_H4,RCVD_IN_MSPIKE_WL,SPF_HELO_NONE,SPF_NONE,TXREP autolearn=unavailable autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: On 26/10/23 07:18 +0200, François Dumont wrote: >    libstdc++: [_Hashtable] Use RAII type to manage rehash functor state > >    Replace usage of __try/__catch with a RAII type to restore rehash >functor >    state when needed. Generally I really like replacing try-catch with RAII but I have some questions below. >    libstdc++-v3/ChangeLog: > >            * include/bits/hashtable_policy.h (_RehashStateGuard): New. >            (_Insert_base<>::_M_insert_range(_IIt, _IIt, const >_NodeGet&, false_type)): >            Adapt. >            * include/bits/hashtable.h (__rehash_guard_t): New. >            (__rehash_state): Remove. >            (_M_rehash): Remove. >            (_M_rehash_aux): Rename into _M_rehash. >            (_M_assign_elements, _M_insert_unique_node, >_M_insert_multi_node): Adapt. >            (rehash): Adapt. > > >Tested under Linux x64. > >Ok to commit ? > >François >diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h >index 0857448f7ed..64071ac1fb2 100644 >--- a/libstdc++-v3/include/bits/hashtable.h >+++ b/libstdc++-v3/include/bits/hashtable.h >@@ -234,6 +234,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > _RehashPolicy, _Traits>; > using __enable_default_ctor > = _Hashtable_enable_default_ctor<_Equal, _Hash, _Alloc>; >+ using __rehash_guard_t >+ = __detail::_RehashStateGuard<_RehashPolicy>; > > public: > typedef _Key key_type; >@@ -264,7 +266,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > > private: > using __rehash_type = _RehashPolicy; >- using __rehash_state = typename __rehash_type::_State; > > using __unique_keys = typename __traits_type::__unique_keys; > >@@ -1200,14 +1201,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > > private: > // Helper rehash method used when keys are unique. >- void _M_rehash_aux(size_type __bkt_count, true_type __uks); >+ void _M_rehash(size_type __bkt_count, true_type __uks); > > // Helper rehash method used when keys can be non-unique. >- void _M_rehash_aux(size_type __bkt_count, false_type __uks); >- >- // Unconditionally change size of bucket array to n, restore >- // hash policy state to __state on exception. >- void _M_rehash(size_type __bkt_count, const __rehash_state& __state); >+ void _M_rehash(size_type __bkt_count, false_type __uks); > }; > > // Definitions of class template _Hashtable's out-of-line member functions. >@@ -1337,7 +1334,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > { > __buckets_ptr __former_buckets = nullptr; > std::size_t __former_bucket_count = _M_bucket_count; >- const __rehash_state& __former_state = _M_rehash_policy._M_state(); >+ __rehash_guard_t __rehash_guard(_M_rehash_policy); > > if (_M_bucket_count != __ht._M_bucket_count) > { >@@ -1359,6 +1356,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > _M_assign(std::forward<_Ht>(__ht), __roan); > if (__former_buckets) > _M_deallocate_buckets(__former_buckets, __former_bucket_count); >+ __rehash_guard._M_reset = false; I find this confusing. Usually "reset" means that something is cleared, so won't take an action in the destructor. e.g. if you use std::unique_ptr::reset() then the object is destroyed immediately, and then nothing happens in the destructor. Here it's the opposite, _M_reset=true means that it _sould_ do something in the destructor. The problem is the ambiguity between "reset the state in the destructor later" and "reset the object to an empty state now". If the member was called _M_guarded then it might be clearer. _M_guarded=true means the guard is active, and will restore the state later. Or _M_active, or _M_armed, or even _M_reset_in_dtor. Any of those names avoids the confusion with the semantics of std::unique_ptr::reset() and similar well-known APIs. Or what I usually do is store a pointer to the guarded object in the RAII guard type, and then just null the pointer to disarm the guard. That means you don't need a separate bool member variable. If _RehashStateGuard::_M_rehash_policy was called _M_guarded_obj and was a _RehashPolicy* instead of _RehashPolicy& then disarming it would be: __rehash_guard._M_guarded_obj = nullptr; This seems clear to me, as it says that the guard no longer has anything to guard, so won't do anything in the destructor. > } > __catch(...) > { >@@ -1366,7 +1364,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > { > // Restore previous buckets. > _M_deallocate_buckets(); >- _M_rehash_policy._M_reset(__former_state); > _M_buckets = __former_buckets; > _M_bucket_count = __former_bucket_count; > } >@@ -2142,17 +2139,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > __node_ptr __node, size_type __n_elt) > -> iterator > { >- const __rehash_state& __saved_state = _M_rehash_policy._M_state(); >+ __rehash_guard_t __rehash_guard(_M_rehash_policy); > std::pair __do_rehash > = _M_rehash_policy._M_need_rehash(_M_bucket_count, _M_element_count, > __n_elt); > > if (__do_rehash.first) > { >- _M_rehash(__do_rehash.second, __saved_state); >+ _M_rehash(__do_rehash.second, true_type{}); > __bkt = _M_bucket_index(__code); > } > >+ __rehash_guard._M_reset = false; > this->_M_store_code(*__node, __code); This changes behaviour. Previously the try-catch was inside the call to _M_rehash. Now we're starting to guard before calling _M_need_rehash, and we run the destructor unconditionally even if we never call _M_rehash. I think that's OK, because _M_need_rehash is noexcept (but it's confusing because we have a const overload of _M_need_rehash which is noexcept(false) and a non-const overload which is noexcept(true) ... should they both be noexcept?) Can we do this instead: - const __rehash_state& __saved_state = _M_rehash_policy._M_state(); std::pair __do_rehash = _M_rehash_policy._M_need_rehash(_M_bucket_count, _M_element_count, __n_elt); if (__do_rehash.first) { + __rehash_guard_t __rehash_guard(_M_rehash_policy); - _M_rehash(__do_rehash.second, __saved_state); + _M_rehash(__do_rehash.second, true_type{}); + __rehash_guard._M_reset = false; __bkt = _M_bucket_index(__code); } this->_M_store_code(*__node, __code); This only creates a guard object if we're actually going to rehash, so we don't run its destructor (and check if we need t restore anything) when no rehash is needed. N.B. _M_bucket_index is also confusing. There are two overloads of it, one is noexcept and one isn't. The one that we call here is noexcept(false), so that call could throw ... is it OK that we don't restore the old state if that happens? Should that be guarded too? (I have no idea). Also, I see that the noexcept(true) overload of _M_bucket_index calls __hash_code_base::_M_bucket_index, which is noexcept(/* condition */), so the caller says it cannot throw, but it calls something that sometimes throws. Is that correct? > > // Always insert at the beginning of the bucket. >@@ -2172,13 +2170,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > __hash_code __code, __node_ptr __node) > -> iterator > { >- const __rehash_state& __saved_state = _M_rehash_policy._M_state(); >+ __rehash_guard_t __rehash_guard(_M_rehash_policy); > std::pair __do_rehash > = _M_rehash_policy._M_need_rehash(_M_bucket_count, _M_element_count, 1); > > if (__do_rehash.first) >- _M_rehash(__do_rehash.second, __saved_state); >+ _M_rehash(__do_rehash.second, false_type{}); > >+ __rehash_guard._M_reset = false; Again, we're creating the guard object in a larger scope than needed. > this->_M_store_code(*__node, __code); > const key_type& __k = _ExtractKey{}(__node->_M_v()); > size_type __bkt = _M_bucket_index(__code); >@@ -2509,39 +2508,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>:: > rehash(size_type __bkt_count) > { >- const __rehash_state& __saved_state = _M_rehash_policy._M_state(); >+ __rehash_guard_t __rehash_guard(_M_rehash_policy); > __bkt_count > = std::max(_M_rehash_policy._M_bkt_for_elements(_M_element_count + 1), > __bkt_count); > __bkt_count = _M_rehash_policy._M_next_bkt(__bkt_count); > > if (__bkt_count != _M_bucket_count) >- _M_rehash(__bkt_count, __saved_state); >- else >- // No rehash, restore previous state to keep it consistent with >- // container state. >- _M_rehash_policy._M_reset(__saved_state); >- } >- >- template- typename _ExtractKey, typename _Equal, >- typename _Hash, typename _RangeHash, typename _Unused, >- typename _RehashPolicy, typename _Traits> >- void >- _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, >- _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>:: >- _M_rehash(size_type __bkt_count, const __rehash_state& __state) >- { >- __try >- { >- _M_rehash_aux(__bkt_count, __unique_keys{}); >- } >- __catch(...) > { >- // A failure here means that buckets allocation failed. We only >- // have to restore hash policy previous state. >- _M_rehash_policy._M_reset(__state); >- __throw_exception_again; >+ _M_rehash(__bkt_count, __unique_keys{}); >+ __rehash_guard._M_reset = false; And a larger scope again. > } > } > >@@ -2553,7 +2529,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > void > _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, > _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>:: >- _M_rehash_aux(size_type __bkt_count, true_type /* __uks */) >+ _M_rehash(size_type __bkt_count, true_type /* __uks */) > { > __buckets_ptr __new_buckets = _M_allocate_buckets(__bkt_count); > __node_ptr __p = _M_begin(); >@@ -2596,7 +2572,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > void > _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, > _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>:: >- _M_rehash_aux(size_type __bkt_count, false_type /* __uks */) >+ _M_rehash(size_type __bkt_count, false_type /* __uks */) > { > __buckets_ptr __new_buckets = _M_allocate_buckets(__bkt_count); > __node_ptr __p = _M_begin(); >diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h >index 5d162463dc3..8b9626b1575 100644 >--- a/libstdc++-v3/include/bits/hashtable_policy.h >+++ b/libstdc++-v3/include/bits/hashtable_policy.h >@@ -715,6 +715,26 @@ namespace __detail > std::size_t _M_next_resize; > }; > >+ template >+ struct _RehashStateGuard >+ { >+ _RehashPolicy& _M_rehash_policy; >+ typename _RehashPolicy::_State _M_prev_state; >+ bool _M_reset = true; This could be: + _RehashPolicy* _M_guarded_obj; + typename _RehashPolicy::_State _M_prev_state; >+ >+ _RehashStateGuard(_RehashPolicy& __policy) >+ : _M_rehash_policy(__policy) >+ , _M_prev_state(__policy._M_state()) + _RehashStateGuard(_RehashPolicy& __policy) + : _M_guarded_obj(std::__addressof(__policy)) + , _M_prev_state(__policy._M_state()) >+ { } >+ _RehashStateGuard(const _RehashStateGuard&) = delete; >+ >+ ~_RehashStateGuard() >+ { >+ if (_M_reset) >+ _M_rehash_policy._M_reset(_M_prev_state); + if (_M_guarded_obj) + _M_guarded_obj->_M_reset(_M_prev_state); >+ } >+ }; >+ > // Base classes for std::_Hashtable. We define these base classes > // because in some cases we want to do different things depending on > // the value of a policy class. In some cases the policy class >@@ -1007,7 +1027,7 @@ namespace __detail > const _NodeGetter& __node_gen, false_type __uks) > { > using __rehash_type = typename __hashtable::__rehash_type; >- using __rehash_state = typename __hashtable::__rehash_state; >+ using __rehash_guard_t = typename __hashtable::__rehash_guard_t; > using pair_type = std::pair; > > size_type __n_elt = __detail::__distance_fw(__first, __last); >@@ -1016,14 +1036,15 @@ namespace __detail > > __hashtable& __h = _M_conjure_hashtable(); > __rehash_type& __rehash = __h._M_rehash_policy; >- const __rehash_state& __saved_state = __rehash._M_state(); >+ __rehash_guard_t __rehash_guard(__rehash); > pair_type __do_rehash = __rehash._M_need_rehash(__h._M_bucket_count, > __h._M_element_count, > __n_elt); > > if (__do_rehash.first) >- __h._M_rehash(__do_rehash.second, __saved_state); >+ __h._M_rehash(__do_rehash.second, __uks); > >+ __rehash_guard._M_reset = false; A larger scope again. > for (; __first != __last; ++__first) > __h._M_insert(*__first, __node_gen, __uks); > }