From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-wr1-x42a.google.com (mail-wr1-x42a.google.com [IPv6:2a00:1450:4864:20::42a]) by sourceware.org (Postfix) with ESMTPS id 9A7EE3858403; Sun, 14 Nov 2021 13:29:40 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 9A7EE3858403 Received: by mail-wr1-x42a.google.com with SMTP id t30so24933857wra.10; Sun, 14 Nov 2021 05:29:40 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:to:cc:from:subject:message-id:date:user-agent :mime-version:content-language; bh=84kEJhFIN7uQOZyjFi2guhtg8tE0W+LFjKIjQqr8/Tc=; b=nd8yuVG0czBdhhDYBRlNY/j0+EHHqPtvHrFPqNfoARjvGVVwm/32i+FWE8BJEgbNz+ jZVT3GIEkmYBXu+utQ31jN1Lhu9mg4AVvv9VQ8Q/E3ySN5QVNY88zN+Et6MG7k8VkAvx KLA78w6nw3zryqmq25W0PQSySuKb9pVxs6MQ7X3QGcAR+/Cg9sd4R2Yidz/qeQWp+9py bEjATtPoug3XiU9ntJ4M6IA8sNqdMzjvLQJCMY/Gbog3TYqI4DLUY7Uywvb4CfzNw6k/ vPACpo6OyHY6x/yIWJfswWCpahkOPfpr3RfQAhk1sLPTTH6YMC2bbjqd0qDEgNQqRYoX IXuQ== X-Gm-Message-State: AOAM532ge+XJjR8ASfxBh6XkAyHrimVbep3AxKZ8nnkbIuOHp37J5fon sArMvqVsRTFyuhWcg4rcCv4YhvPscW4= X-Google-Smtp-Source: ABdhPJydGjvRBFIYFUgURbiHTp31pptIB4I/sIm0e9B2FgzgUZapx6kcr7ZNiwXqvo4INu3c9n081g== X-Received: by 2002:a5d:6211:: with SMTP id y17mr35933701wru.97.1636896579345; Sun, 14 Nov 2021 05:29:39 -0800 (PST) Received: from ?IPv6:2a01:e0a:1dc:b1c0:f404:a64b:a1ae:2923? ([2a01:e0a:1dc:b1c0:f404:a64b:a1ae:2923]) by smtp.googlemail.com with ESMTPSA id n8sm11126790wrp.95.2021.11.14.05.29.38 (version=TLS1_3 cipher=TLS_AES_128_GCM_SHA256 bits=128/128); Sun, 14 Nov 2021 05:29:38 -0800 (PST) To: "libstdc++@gcc.gnu.org" Cc: gcc-patches From: =?UTF-8?Q?Fran=c3=a7ois_Dumont?= Subject: [PATCH] Enhance unordered container merge Message-ID: Date: Sun, 14 Nov 2021 14:29:37 +0100 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Thunderbird/78.13.0 MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------B62ED087389A841285C63BA9" Content-Language: fr X-Spam-Status: No, score=-9.9 required=5.0 tests=BAYES_00, BODY_8BITS, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: libstdc++@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libstdc++ mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 14 Nov 2021 13:29:42 -0000 This is a multi-part message in MIME format. --------------B62ED087389A841285C63BA9 Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 8bit     libstdc++: Unordered containers merge re-use hash code.     When merging between 2 unordered containers with same hasher we can re-use     the cached hash code if any.     Use previous insert iterator as a hint for the next insert in case of multi container.             * include/bits/hashtable_policy.h (_Hash_code_base<>::_ReuseOrComputeHash<>): New. (_Hash_code_base<>::_M_hash_code<_H2>(const _H2&, const _Hash_node_value<>&)): New.             * include/bits/hashtable.h (_Hashtable<>::_M_merge_unique): Use latter.             (_Hashtable<>::_M_merge_multi): Likewise.             * testsuite/23_containers/unordered_multiset/modifiers/merge.cc (test05): New test.             * testsuite/23_containers/unordered_set/modifiers/merge.cc (test04): New test. Tested under Linux x86_64. Ok to commit ? François --------------B62ED087389A841285C63BA9 Content-Type: text/x-patch; charset=UTF-8; name="hashtable_merge.patch" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="hashtable_merge.patch" diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h index 0e949d73614..6e2d4c10cfe 100644 --- a/libstdc++-v3/include/bits/hashtable.h +++ b/libstdc++-v3/include/bits/hashtable.h @@ -1076,7 +1076,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { auto __pos = __i++; const key_type& __k = _ExtractKey{}(*__pos); - __hash_code __code = this->_M_hash_code(__k); + __hash_code __code + = this->_M_hash_code(__src.hash_function(), *__pos._M_cur); size_type __bkt = _M_bucket_index(__code); if (_M_find_node(__bkt, __k, __code) == nullptr) { @@ -1099,14 +1100,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION node_type>, "Node types are compatible"); __glibcxx_assert(get_allocator() == __src.get_allocator()); + __node_ptr __hint = nullptr; this->reserve(size() + __src.size()); for (auto __i = __src.cbegin(), __end = __src.cend(); __i != __end;) { auto __pos = __i++; - const key_type& __k = _ExtractKey{}(*__pos); - __hash_code __code = this->_M_hash_code(__k); + __hash_code __code + = this->_M_hash_code(__src.hash_function(), *__pos._M_cur); auto __nh = __src.extract(__pos); - _M_insert_multi_node(nullptr, __code, __nh._M_ptr); + __hint = _M_insert_multi_node(__hint, __code, __nh._M_ptr)._M_cur; __nh._M_ptr = nullptr; } } diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h index c0295b75963..95a1c45e634 100644 --- a/libstdc++-v3/include/bits/hashtable_policy.h +++ b/libstdc++-v3/include/bits/hashtable_policy.h @@ -1217,6 +1217,26 @@ namespace __detail friend struct _Local_iterator_base<_Key, _Value, _ExtractKey, _Hash, _RangeHash, _Unused, false>; + template + struct _ReuseOrComputeHash + { + std::size_t + operator()(const _Hash_node_value<_Value, __with_cache>& __n) const + { return _M_hash_code_base._M_hash_code(_ExtractKey{}(__n._M_v())); } + + const _Hash_code_base& _M_hash_code_base; + }; + + template + struct _ReuseOrComputeHash<_Hn, _Hn, true> + { + _ReuseOrComputeHash(const _Hash_code_base&) { } + + std::size_t + operator()(const _Hash_node_value<_Value, true>& __n) const + { return __n._M_hash_code; } + }; + public: typedef _Hash hasher; @@ -1250,6 +1270,12 @@ namespace __detail return _M_hash()(__k); } + template + __hash_code + _M_hash_code(const _H2&, + const _Hash_node_value<_Value, __cache_hash_code>& __n) const + { return _ReuseOrComputeHash<_Hash, _H2, __cache_hash_code>{ *this }(__n); } + std::size_t _M_bucket_index(__hash_code __c, std::size_t __bkt_count) const { return _RangeHash{}(__c, __bkt_count); } diff --git a/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc b/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc index 1ed2ce234a1..07b8a344169 100644 --- a/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc +++ b/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc @@ -17,6 +17,7 @@ // { dg-do run { target c++17 } } +#include #include #include #include @@ -105,6 +106,26 @@ test04() VERIFY( c2.empty() ); } +void +test05() +{ + const std::unordered_multiset c0{ "abcd", "abcd", "efgh", "efgh", "ijkl", "ijkl" }; + std::unordered_multiset c1 = c0; + std::unordered_set c2( c0.begin(), c0.end() ); + + c1.merge(c2); + VERIFY( c1.size() == (1.5 * c0.size()) ); + for (auto& i : c1) + VERIFY( c1.count(i) == (1.5 * c0.count(i)) ); + VERIFY( c2.empty() ); + + c1.clear(); + c2.insert( c0.begin(), c0.end() ); + c1.merge(std::move(c2)); + VERIFY( c1.size() == (0.5 * c0.size()) ); + VERIFY( c2.empty() ); +} + int main() { @@ -112,4 +133,5 @@ main() test02(); test03(); test04(); + test05(); } diff --git a/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/merge.cc b/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/merge.cc index c9c8a60fd54..0e184b10c60 100644 --- a/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/merge.cc +++ b/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/merge.cc @@ -17,6 +17,7 @@ // { dg-do run { target c++17 } } +#include #include #include #include @@ -125,10 +126,52 @@ test03() VERIFY( c2.empty() ); } +void +test04() +{ + const std::unordered_set c0{ "abcd", "efgh", "ijkl", }; + std::unordered_set c1 = c0; + std::unordered_multiset c2( c0.begin(), c0.end() ); + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.clear(); + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( c2.empty() ); + + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( equal_elements(c2, c0) ); + + c1 = c0; + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( c2.size() == (2 * c0.size()) ); + VERIFY( c2.count("abcd") == 2 ); + VERIFY( c2.count("efgh") == 2 ); + VERIFY( c2.count("ijkl") == 2 ); + + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.merge(std::move(c2)); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.clear(); + c1.merge(std::move(c2)); + VERIFY( c1 == c0 ); + VERIFY( c2.empty() ); +} + int main() { test01(); test02(); test03(); + test04(); } --------------B62ED087389A841285C63BA9--