public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: Francois Dumont <fdumont@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org, libstdc++-cvs@gcc.gnu.org Subject: [gcc r14-4760] libstdc++: [_Hashtable] Do not reuse untrusted cached hash code Date: Thu, 19 Oct 2023 17:07:42 +0000 (GMT) [thread overview] Message-ID: <20231019170742.059273858D1E@sourceware.org> (raw) https://gcc.gnu.org/g:c714b4d30d229e2b21064f0ce1e2fa3259fe06c0 commit r14-4760-gc714b4d30d229e2b21064f0ce1e2fa3259fe06c0 Author: François Dumont <fdumont@gcc.gnu.org> Date: Wed Oct 18 19:35:32 2023 +0200 libstdc++: [_Hashtable] Do not reuse untrusted cached hash code On merge, reuse a merged node's possibly cached hash code only if we are on the same type of hash and this hash is stateless. Usage of function pointers or std::function as hash functor will prevent reusing cached hash code. libstdc++-v3/ChangeLog * include/bits/hashtable_policy.h (_Hash_code_base::_M_hash_code(const _Hash&, const _Hash_node_value<>&)): Remove. (_Hash_code_base::_M_hash_code<_H2>(const _H2&, const _Hash_node_value<>&)): Remove. * include/bits/hashtable.h (_M_src_hash_code<_H2>(const _H2&, const key_type&, const __node_value_type&)): New. (_M_merge_unique<>, _M_merge_multi<>): Use latter. * testsuite/23_containers/unordered_map/modifiers/merge.cc (test04, test05, test06): New test cases. Diff: --- libstdc++-v3/include/bits/hashtable.h | 19 ++- libstdc++-v3/include/bits/hashtable_policy.h | 13 -- .../23_containers/unordered_map/modifiers/merge.cc | 178 ++++++++++++++++++++- 3 files changed, 190 insertions(+), 20 deletions(-) diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h index 4c12dc895b2a..0857448f7edf 100644 --- a/libstdc++-v3/include/bits/hashtable.h +++ b/libstdc++-v3/include/bits/hashtable.h @@ -1109,6 +1109,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return { __n, this->_M_node_allocator() }; } + // Only use the possibly cached node's hash code if its hash function + // _H2 matches _Hash and is stateless. Otherwise recompute it using _Hash. + template<typename _H2> + __hash_code + _M_src_hash_code(const _H2&, const key_type& __k, + const __node_value_type& __src_n) const + { + if constexpr (std::is_same_v<_H2, _Hash>) + if constexpr (std::is_empty_v<_Hash>) + return this->_M_hash_code(__src_n); + + return this->_M_hash_code(__k); + } + public: // Extract a node. node_type @@ -1146,7 +1160,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION auto __pos = __i++; const key_type& __k = _ExtractKey{}(*__pos); __hash_code __code - = this->_M_hash_code(__src.hash_function(), *__pos._M_cur); + = _M_src_hash_code(__src.hash_function(), __k, *__pos._M_cur); size_type __bkt = _M_bucket_index(__code); if (_M_find_node(__bkt, __k, __code) == nullptr) { @@ -1174,8 +1188,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION for (auto __i = __src.cbegin(), __end = __src.cend(); __i != __end;) { auto __pos = __i++; + const key_type& __k = _ExtractKey{}(*__pos); __hash_code __code - = this->_M_hash_code(__src.hash_function(), *__pos._M_cur); + = _M_src_hash_code(__src.hash_function(), __k, *__pos._M_cur); auto __nh = __src.extract(__pos); __hint = _M_insert_multi_node(__hint, __code, __nh._M_ptr)._M_cur; __nh._M_ptr = nullptr; diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h index 86b32fb15f23..5d162463dc35 100644 --- a/libstdc++-v3/include/bits/hashtable_policy.h +++ b/libstdc++-v3/include/bits/hashtable_policy.h @@ -1319,19 +1319,6 @@ namespace __detail return _M_hash()(__k); } - __hash_code - _M_hash_code(const _Hash&, - const _Hash_node_value<_Value, true>& __n) const - { return __n._M_hash_code; } - - // Compute hash code using _Hash as __n _M_hash_code, if present, was - // computed using _H2. - template<typename _H2> - __hash_code - _M_hash_code(const _H2&, - const _Hash_node_value<_Value, __cache_hash_code>& __n) const - { return _M_hash_code(_ExtractKey{}(__n._M_v())); } - __hash_code _M_hash_code(const _Hash_node_value<_Value, false>& __n) const { return _M_hash_code(_ExtractKey{}(__n._M_v())); } diff --git a/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc b/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc index b140ce452aab..c051b58137a4 100644 --- a/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc +++ b/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc @@ -17,15 +17,29 @@ // { dg-do run { target c++17 } } +#include <string> +#include <functional> #include <unordered_map> #include <algorithm> #include <testsuite_hooks.h> using test_type = std::unordered_map<int, int>; -struct hash { - auto operator()(int i) const noexcept { return ~std::hash<int>()(i); } -}; +template<typename T> + struct xhash + { + auto operator()(const T& i) const noexcept + { return ~std::hash<T>()(i); } + }; + + +namespace std +{ + template<typename T> + struct __is_fast_hash<xhash<T>> : __is_fast_hash<std::hash<T>> + { }; +} + struct equal : std::equal_to<> { }; template<typename C1, typename C2> @@ -64,7 +78,7 @@ test02() { const test_type c0{ {1, 10}, {2, 20}, {3, 30} }; test_type c1 = c0; - std::unordered_map<int, int, hash, equal> c2( c0.begin(), c0.end() ); + std::unordered_map<int, int, xhash<int>, equal> c2( c0.begin(), c0.end() ); c1.merge(c2); VERIFY( c1 == c0 ); @@ -89,7 +103,7 @@ test03() { const test_type c0{ {1, 10}, {2, 20}, {3, 30} }; test_type c1 = c0; - std::unordered_multimap<int, int, hash, equal> c2( c0.begin(), c0.end() ); + std::unordered_multimap<int, int, xhash<int>, equal> c2( c0.begin(), c0.end() ); c1.merge(c2); VERIFY( c1 == c0 ); VERIFY( equal_elements(c2, c0) ); @@ -125,10 +139,164 @@ test03() VERIFY( c2.empty() ); } +void +test04() +{ + const std::unordered_map<std::string, int> c0 + { {"one", 10}, {"two", 20}, {"three", 30} }; + + std::unordered_map<std::string, int> c1 = c0; + std::unordered_multimap<std::string, int> c2( c0.begin(), c0.end() ); + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.clear(); + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( c2.empty() ); + + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( equal_elements(c2, c0) ); + + c1 = c0; + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( c2.size() == (2 * c0.size()) ); + VERIFY( c2.count("one") == 2 ); + VERIFY( c2.count("two") == 2 ); + VERIFY( c2.count("three") == 2 ); + + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.merge(std::move(c2)); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.clear(); + c1.merge(std::move(c2)); + VERIFY( c1 == c0 ); + VERIFY( c2.empty() ); +} + +void +test05() +{ + const std::unordered_map<std::string, int> c0 + { {"one", 10}, {"two", 20}, {"three", 30} }; + + std::unordered_map<std::string, int> c1 = c0; + std::unordered_multimap<std::string, int, xhash<std::string>, equal> c2( c0.begin(), c0.end() ); + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.clear(); + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( c2.empty() ); + + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( equal_elements(c2, c0) ); + + c1 = c0; + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( c2.size() == (2 * c0.size()) ); + VERIFY( c2.count("one") == 2 ); + VERIFY( c2.count("two") == 2 ); + VERIFY( c2.count("three") == 2 ); + + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.merge(std::move(c2)); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.clear(); + c1.merge(std::move(c2)); + VERIFY( c1 == c0 ); + VERIFY( c2.empty() ); +} + +template<typename T> + using hash_f = + std::function<std::size_t(const T&)>; + +std::size_t +hash_func(const std::string& str) +{ return std::hash<std::string>{}(str); } + +std::size_t +xhash_func(const std::string& str) +{ return xhash<std::string>{}(str); } + +namespace std +{ + template<typename T> + struct __is_fast_hash<hash_f<T>> : __is_fast_hash<std::hash<T>> + { }; +} + +void +test06() +{ + const std::unordered_map<std::string, int, hash_f<std::string>, equal> + c0({ {"one", 10}, {"two", 20}, {"three", 30} }, 3, &hash_func); + + std::unordered_map<std::string, int, hash_f<std::string>, equal> + c1(3, &hash_func); + c1 = c0; + std::unordered_multimap<std::string, int, hash_f<std::string>, equal> + c2(c0.begin(), c0.end(), 3, &xhash_func); + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.clear(); + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( c2.empty() ); + + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( equal_elements(c2, c0) ); + + c1 = c0; + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( c2.size() == (2 * c0.size()) ); + VERIFY( c2.count("one") == 2 ); + VERIFY( c2.count("two") == 2 ); + VERIFY( c2.count("three") == 2 ); + + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.merge(std::move(c2)); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.clear(); + c1.merge(std::move(c2)); + VERIFY( c1 == c0 ); + VERIFY( c2.empty() ); +} + int main() { test01(); test02(); test03(); + test04(); + test05(); + test06(); }
reply other threads:[~2023-10-19 17:07 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20231019170742.059273858D1E@sourceware.org \ --to=fdumont@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ --cc=libstdc++-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).