From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1720) id C2B9C3858D37; Mon, 23 Oct 2023 20:24:14 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org C2B9C3858D37 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1698092654; bh=K3O8BizZ5z5BkpH2FSKvK1acFrw1eOOgaz5fubNTnqI=; h=From:To:Subject:Date:From; b=JxZesNZBxbM2PnJKGT7u/46eTQBa1iUbrCAQcI0QUTxMBwXevZ7a7haPSZIxvMtu+ iCROaLbaVNbHV7pEYgE+5NI4Q1e1k9rkngUPnmHWxO1akve0PNH6yqyzZH0NDHWqUz nvnK4UpLGTS9z42pSjD+HHRGsZZs+kdHo6S1CLD4= MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset="utf-8" From: Francois Dumont To: gcc-cvs@gcc.gnu.org, libstdc++-cvs@gcc.gnu.org Subject: [gcc r13-7977] libstdc++: [_Hashtable] Do not reuse untrusted cached hash code X-Act-Checkin: gcc X-Git-Author: =?utf-8?q?Fran=C3=A7ois_Dumont?= X-Git-Refname: refs/heads/releases/gcc-13 X-Git-Oldrev: 4e1320e4af413b7126f3b998096f60f8b7d5cb32 X-Git-Newrev: 25306f0590686bd8ba3f041d94c78bc744d92dd1 Message-Id: <20231023202414.C2B9C3858D37@sourceware.org> Date: Mon, 23 Oct 2023 20:24:14 +0000 (GMT) List-Id: https://gcc.gnu.org/g:25306f0590686bd8ba3f041d94c78bc744d92dd1 commit r13-7977-g25306f0590686bd8ba3f041d94c78bc744d92dd1 Author: François Dumont Date: Wed Oct 18 19:35:32 2023 +0200 libstdc++: [_Hashtable] Do not reuse untrusted cached hash code On merge, reuse a merged node's possibly cached hash code only if we are on the same type of hash and this hash is stateless. Usage of function pointers or std::function as hash functor will prevent reusing cached hash code. libstdc++-v3/ChangeLog * include/bits/hashtable_policy.h (_Hash_code_base::_M_hash_code(const _Hash&, const _Hash_node_value<>&)): Remove. (_Hash_code_base::_M_hash_code<_H2>(const _H2&, const _Hash_node_value<>&)): Remove. * include/bits/hashtable.h (_M_src_hash_code<_H2>(const _H2&, const key_type&, const __node_value_type&)): New. (_M_merge_unique<>, _M_merge_multi<>): Use latter. * testsuite/23_containers/unordered_map/modifiers/merge.cc (test04, test05, test06): New test cases. Diff: --- libstdc++-v3/include/bits/hashtable.h | 19 ++- libstdc++-v3/include/bits/hashtable_policy.h | 13 -- .../23_containers/unordered_map/modifiers/merge.cc | 178 ++++++++++++++++++++- 3 files changed, 190 insertions(+), 20 deletions(-) diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h index d2ff15320fc8..dd3e655866ac 100644 --- a/libstdc++-v3/include/bits/hashtable.h +++ b/libstdc++-v3/include/bits/hashtable.h @@ -1066,6 +1066,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return { __n, this->_M_node_allocator() }; } + // Only use the possibly cached node's hash code if its hash function + // _H2 matches _Hash and is stateless. Otherwise recompute it using _Hash. + template + __hash_code + _M_src_hash_code(const _H2&, const key_type& __k, + const __node_value_type& __src_n) const + { + if constexpr (std::is_same_v<_H2, _Hash>) + if constexpr (std::is_empty_v<_Hash>) + return this->_M_hash_code(__src_n); + + return this->_M_hash_code(__k); + } + public: // Extract a node. node_type @@ -1103,7 +1117,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION auto __pos = __i++; const key_type& __k = _ExtractKey{}(*__pos); __hash_code __code - = this->_M_hash_code(__src.hash_function(), *__pos._M_cur); + = _M_src_hash_code(__src.hash_function(), __k, *__pos._M_cur); size_type __bkt = _M_bucket_index(__code); if (_M_find_node(__bkt, __k, __code) == nullptr) { @@ -1131,8 +1145,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION for (auto __i = __src.cbegin(), __end = __src.cend(); __i != __end;) { auto __pos = __i++; + const key_type& __k = _ExtractKey{}(*__pos); __hash_code __code - = this->_M_hash_code(__src.hash_function(), *__pos._M_cur); + = _M_src_hash_code(__src.hash_function(), __k, *__pos._M_cur); auto __nh = __src.extract(__pos); __hint = _M_insert_multi_node(__hint, __code, __nh._M_ptr)._M_cur; __nh._M_ptr = nullptr; diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h index 2f79394d89ec..2528dbe21091 100644 --- a/libstdc++-v3/include/bits/hashtable_policy.h +++ b/libstdc++-v3/include/bits/hashtable_policy.h @@ -1319,19 +1319,6 @@ namespace __detail return _M_hash()(__k); } - __hash_code - _M_hash_code(const _Hash&, - const _Hash_node_value<_Value, true>& __n) const - { return __n._M_hash_code; } - - // Compute hash code using _Hash as __n _M_hash_code, if present, was - // computed using _H2. - template - __hash_code - _M_hash_code(const _H2&, - const _Hash_node_value<_Value, __cache_hash_code>& __n) const - { return _M_hash_code(_ExtractKey{}(__n._M_v())); } - __hash_code _M_hash_code(const _Hash_node_value<_Value, false>& __n) const { return _M_hash_code(_ExtractKey{}(__n._M_v())); } diff --git a/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc b/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc index b140ce452aab..c051b58137a4 100644 --- a/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc +++ b/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc @@ -17,15 +17,29 @@ // { dg-do run { target c++17 } } +#include +#include #include #include #include using test_type = std::unordered_map; -struct hash { - auto operator()(int i) const noexcept { return ~std::hash()(i); } -}; +template + struct xhash + { + auto operator()(const T& i) const noexcept + { return ~std::hash()(i); } + }; + + +namespace std +{ + template + struct __is_fast_hash> : __is_fast_hash> + { }; +} + struct equal : std::equal_to<> { }; template @@ -64,7 +78,7 @@ test02() { const test_type c0{ {1, 10}, {2, 20}, {3, 30} }; test_type c1 = c0; - std::unordered_map c2( c0.begin(), c0.end() ); + std::unordered_map, equal> c2( c0.begin(), c0.end() ); c1.merge(c2); VERIFY( c1 == c0 ); @@ -89,7 +103,7 @@ test03() { const test_type c0{ {1, 10}, {2, 20}, {3, 30} }; test_type c1 = c0; - std::unordered_multimap c2( c0.begin(), c0.end() ); + std::unordered_multimap, equal> c2( c0.begin(), c0.end() ); c1.merge(c2); VERIFY( c1 == c0 ); VERIFY( equal_elements(c2, c0) ); @@ -125,10 +139,164 @@ test03() VERIFY( c2.empty() ); } +void +test04() +{ + const std::unordered_map c0 + { {"one", 10}, {"two", 20}, {"three", 30} }; + + std::unordered_map c1 = c0; + std::unordered_multimap c2( c0.begin(), c0.end() ); + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.clear(); + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( c2.empty() ); + + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( equal_elements(c2, c0) ); + + c1 = c0; + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( c2.size() == (2 * c0.size()) ); + VERIFY( c2.count("one") == 2 ); + VERIFY( c2.count("two") == 2 ); + VERIFY( c2.count("three") == 2 ); + + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.merge(std::move(c2)); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.clear(); + c1.merge(std::move(c2)); + VERIFY( c1 == c0 ); + VERIFY( c2.empty() ); +} + +void +test05() +{ + const std::unordered_map c0 + { {"one", 10}, {"two", 20}, {"three", 30} }; + + std::unordered_map c1 = c0; + std::unordered_multimap, equal> c2( c0.begin(), c0.end() ); + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.clear(); + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( c2.empty() ); + + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( equal_elements(c2, c0) ); + + c1 = c0; + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( c2.size() == (2 * c0.size()) ); + VERIFY( c2.count("one") == 2 ); + VERIFY( c2.count("two") == 2 ); + VERIFY( c2.count("three") == 2 ); + + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.merge(std::move(c2)); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.clear(); + c1.merge(std::move(c2)); + VERIFY( c1 == c0 ); + VERIFY( c2.empty() ); +} + +template + using hash_f = + std::function; + +std::size_t +hash_func(const std::string& str) +{ return std::hash{}(str); } + +std::size_t +xhash_func(const std::string& str) +{ return xhash{}(str); } + +namespace std +{ + template + struct __is_fast_hash> : __is_fast_hash> + { }; +} + +void +test06() +{ + const std::unordered_map, equal> + c0({ {"one", 10}, {"two", 20}, {"three", 30} }, 3, &hash_func); + + std::unordered_map, equal> + c1(3, &hash_func); + c1 = c0; + std::unordered_multimap, equal> + c2(c0.begin(), c0.end(), 3, &xhash_func); + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.clear(); + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( c2.empty() ); + + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( equal_elements(c2, c0) ); + + c1 = c0; + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( c2.size() == (2 * c0.size()) ); + VERIFY( c2.count("one") == 2 ); + VERIFY( c2.count("two") == 2 ); + VERIFY( c2.count("three") == 2 ); + + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.merge(std::move(c2)); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.clear(); + c1.merge(std::move(c2)); + VERIFY( c1 == c0 ); + VERIFY( c2.empty() ); +} + int main() { test01(); test02(); test03(); + test04(); + test05(); + test06(); }