From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1720) id 059273858D1E; Thu, 19 Oct 2023 17:07:42 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 059273858D1E DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1697735262; bh=fbe7Vjoc3bEA2dsIFvsq+LldJpG7Hw/sexaPeOuqsew=; h=From:To:Subject:Date:From; b=vaqUWghh1EvKMXN91uYp4ERIQNRQcZ1HyR4bTOp7sVRA23yyMCv202MCtoLLTnnl9 uxW8h5ILYqpJTe3WJTEABHava3EQLOnxVO9vIRxGxMdxFNb0iioy0MGXbxUzy+JNLP z2nLNcZHY0ZHhfx9ODXDCVyK0NAyCoE2rkysf+qM= MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset="utf-8" From: Francois Dumont To: gcc-cvs@gcc.gnu.org, libstdc++-cvs@gcc.gnu.org Subject: [gcc r14-4760] libstdc++: [_Hashtable] Do not reuse untrusted cached hash code X-Act-Checkin: gcc X-Git-Author: =?utf-8?q?Fran=C3=A7ois_Dumont?= X-Git-Refname: refs/heads/master X-Git-Oldrev: 2454ba9e2d1ce2d1b9b2b46f6111e022364bf9b5 X-Git-Newrev: c714b4d30d229e2b21064f0ce1e2fa3259fe06c0 Message-Id: <20231019170742.059273858D1E@sourceware.org> Date: Thu, 19 Oct 2023 17:07:42 +0000 (GMT) List-Id: https://gcc.gnu.org/g:c714b4d30d229e2b21064f0ce1e2fa3259fe06c0 commit r14-4760-gc714b4d30d229e2b21064f0ce1e2fa3259fe06c0 Author: François Dumont Date: Wed Oct 18 19:35:32 2023 +0200 libstdc++: [_Hashtable] Do not reuse untrusted cached hash code On merge, reuse a merged node's possibly cached hash code only if we are on the same type of hash and this hash is stateless. Usage of function pointers or std::function as hash functor will prevent reusing cached hash code. libstdc++-v3/ChangeLog * include/bits/hashtable_policy.h (_Hash_code_base::_M_hash_code(const _Hash&, const _Hash_node_value<>&)): Remove. (_Hash_code_base::_M_hash_code<_H2>(const _H2&, const _Hash_node_value<>&)): Remove. * include/bits/hashtable.h (_M_src_hash_code<_H2>(const _H2&, const key_type&, const __node_value_type&)): New. (_M_merge_unique<>, _M_merge_multi<>): Use latter. * testsuite/23_containers/unordered_map/modifiers/merge.cc (test04, test05, test06): New test cases. Diff: --- libstdc++-v3/include/bits/hashtable.h | 19 ++- libstdc++-v3/include/bits/hashtable_policy.h | 13 -- .../23_containers/unordered_map/modifiers/merge.cc | 178 ++++++++++++++++++++- 3 files changed, 190 insertions(+), 20 deletions(-) diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h index 4c12dc895b2a..0857448f7edf 100644 --- a/libstdc++-v3/include/bits/hashtable.h +++ b/libstdc++-v3/include/bits/hashtable.h @@ -1109,6 +1109,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return { __n, this->_M_node_allocator() }; } + // Only use the possibly cached node's hash code if its hash function + // _H2 matches _Hash and is stateless. Otherwise recompute it using _Hash. + template + __hash_code + _M_src_hash_code(const _H2&, const key_type& __k, + const __node_value_type& __src_n) const + { + if constexpr (std::is_same_v<_H2, _Hash>) + if constexpr (std::is_empty_v<_Hash>) + return this->_M_hash_code(__src_n); + + return this->_M_hash_code(__k); + } + public: // Extract a node. node_type @@ -1146,7 +1160,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION auto __pos = __i++; const key_type& __k = _ExtractKey{}(*__pos); __hash_code __code - = this->_M_hash_code(__src.hash_function(), *__pos._M_cur); + = _M_src_hash_code(__src.hash_function(), __k, *__pos._M_cur); size_type __bkt = _M_bucket_index(__code); if (_M_find_node(__bkt, __k, __code) == nullptr) { @@ -1174,8 +1188,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION for (auto __i = __src.cbegin(), __end = __src.cend(); __i != __end;) { auto __pos = __i++; + const key_type& __k = _ExtractKey{}(*__pos); __hash_code __code - = this->_M_hash_code(__src.hash_function(), *__pos._M_cur); + = _M_src_hash_code(__src.hash_function(), __k, *__pos._M_cur); auto __nh = __src.extract(__pos); __hint = _M_insert_multi_node(__hint, __code, __nh._M_ptr)._M_cur; __nh._M_ptr = nullptr; diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h index 86b32fb15f23..5d162463dc35 100644 --- a/libstdc++-v3/include/bits/hashtable_policy.h +++ b/libstdc++-v3/include/bits/hashtable_policy.h @@ -1319,19 +1319,6 @@ namespace __detail return _M_hash()(__k); } - __hash_code - _M_hash_code(const _Hash&, - const _Hash_node_value<_Value, true>& __n) const - { return __n._M_hash_code; } - - // Compute hash code using _Hash as __n _M_hash_code, if present, was - // computed using _H2. - template - __hash_code - _M_hash_code(const _H2&, - const _Hash_node_value<_Value, __cache_hash_code>& __n) const - { return _M_hash_code(_ExtractKey{}(__n._M_v())); } - __hash_code _M_hash_code(const _Hash_node_value<_Value, false>& __n) const { return _M_hash_code(_ExtractKey{}(__n._M_v())); } diff --git a/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc b/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc index b140ce452aab..c051b58137a4 100644 --- a/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc +++ b/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc @@ -17,15 +17,29 @@ // { dg-do run { target c++17 } } +#include +#include #include #include #include using test_type = std::unordered_map; -struct hash { - auto operator()(int i) const noexcept { return ~std::hash()(i); } -}; +template + struct xhash + { + auto operator()(const T& i) const noexcept + { return ~std::hash()(i); } + }; + + +namespace std +{ + template + struct __is_fast_hash> : __is_fast_hash> + { }; +} + struct equal : std::equal_to<> { }; template @@ -64,7 +78,7 @@ test02() { const test_type c0{ {1, 10}, {2, 20}, {3, 30} }; test_type c1 = c0; - std::unordered_map c2( c0.begin(), c0.end() ); + std::unordered_map, equal> c2( c0.begin(), c0.end() ); c1.merge(c2); VERIFY( c1 == c0 ); @@ -89,7 +103,7 @@ test03() { const test_type c0{ {1, 10}, {2, 20}, {3, 30} }; test_type c1 = c0; - std::unordered_multimap c2( c0.begin(), c0.end() ); + std::unordered_multimap, equal> c2( c0.begin(), c0.end() ); c1.merge(c2); VERIFY( c1 == c0 ); VERIFY( equal_elements(c2, c0) ); @@ -125,10 +139,164 @@ test03() VERIFY( c2.empty() ); } +void +test04() +{ + const std::unordered_map c0 + { {"one", 10}, {"two", 20}, {"three", 30} }; + + std::unordered_map c1 = c0; + std::unordered_multimap c2( c0.begin(), c0.end() ); + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.clear(); + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( c2.empty() ); + + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( equal_elements(c2, c0) ); + + c1 = c0; + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( c2.size() == (2 * c0.size()) ); + VERIFY( c2.count("one") == 2 ); + VERIFY( c2.count("two") == 2 ); + VERIFY( c2.count("three") == 2 ); + + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.merge(std::move(c2)); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.clear(); + c1.merge(std::move(c2)); + VERIFY( c1 == c0 ); + VERIFY( c2.empty() ); +} + +void +test05() +{ + const std::unordered_map c0 + { {"one", 10}, {"two", 20}, {"three", 30} }; + + std::unordered_map c1 = c0; + std::unordered_multimap, equal> c2( c0.begin(), c0.end() ); + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.clear(); + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( c2.empty() ); + + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( equal_elements(c2, c0) ); + + c1 = c0; + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( c2.size() == (2 * c0.size()) ); + VERIFY( c2.count("one") == 2 ); + VERIFY( c2.count("two") == 2 ); + VERIFY( c2.count("three") == 2 ); + + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.merge(std::move(c2)); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.clear(); + c1.merge(std::move(c2)); + VERIFY( c1 == c0 ); + VERIFY( c2.empty() ); +} + +template + using hash_f = + std::function; + +std::size_t +hash_func(const std::string& str) +{ return std::hash{}(str); } + +std::size_t +xhash_func(const std::string& str) +{ return xhash{}(str); } + +namespace std +{ + template + struct __is_fast_hash> : __is_fast_hash> + { }; +} + +void +test06() +{ + const std::unordered_map, equal> + c0({ {"one", 10}, {"two", 20}, {"three", 30} }, 3, &hash_func); + + std::unordered_map, equal> + c1(3, &hash_func); + c1 = c0; + std::unordered_multimap, equal> + c2(c0.begin(), c0.end(), 3, &xhash_func); + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.clear(); + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( c2.empty() ); + + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( equal_elements(c2, c0) ); + + c1 = c0; + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( c2.size() == (2 * c0.size()) ); + VERIFY( c2.count("one") == 2 ); + VERIFY( c2.count("two") == 2 ); + VERIFY( c2.count("three") == 2 ); + + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.merge(std::move(c2)); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.clear(); + c1.merge(std::move(c2)); + VERIFY( c1 == c0 ); + VERIFY( c2.empty() ); +} + int main() { test01(); test02(); test03(); + test04(); + test05(); + test06(); }