diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h index 92516b81ae5..fedb5a7ec3d 100644 --- a/libstdc++-v3/include/bits/hashtable.h +++ b/libstdc++-v3/include/bits/hashtable.h @@ -426,6 +426,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_uses_single_bucket() const { return _M_uses_single_bucket(_M_buckets); } + static constexpr size_t + __small_size_threshold() + { + return + __detail::_Hashtable_hash_traits<_Hash>::__small_size_threshold(); + } + __hashtable_alloc& _M_base_alloc() { return *this; } @@ -795,6 +802,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_bucket_index(__hash_code __c) const { return __hash_code_base::_M_bucket_index(__c, _M_bucket_count); } + __node_base_ptr + _M_find_before_node(const key_type&); + // Find and insert helper functions and types // Find the node before the one matching the criteria. __node_base_ptr @@ -838,6 +848,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __node_base_ptr _M_get_previous_node(size_type __bkt, __node_ptr __n); + pair + _M_compute_hash_code(const_iterator __hint, const key_type& __k) const; + // Insert node __n with hash code __code, in bucket __bkt if no // rehash (assumes no element with same key already present). // Takes ownership of __n if insertion succeeds, throws otherwise. @@ -1124,7 +1137,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION void _M_rehash(size_type __bkt_count, const __rehash_state& __state); }; - // Definitions of class template _Hashtable's out-of-line member functions. template iterator { + if (size() <= __small_size_threshold()) + { + for (auto __it = begin(); __it != end(); ++__it) + if (this->_M_key_equals(__k, *__it._M_cur)) + return __it; + return end(); + } + __hash_code __code = this->_M_hash_code(__k); std::size_t __bkt = _M_bucket_index(__code); return iterator(_M_find_node(__bkt, __k, __code)); @@ -1632,6 +1652,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION find(const key_type& __k) const -> const_iterator { + if (size() <= __small_size_threshold()) + { + for (auto __it = begin(); __it != end(); ++__it) + if (this->_M_key_equals(__k, *__it._M_cur)) + return __it; + return end(); + } + __hash_code __code = this->_M_hash_code(__k); std::size_t __bkt = _M_bucket_index(__code); return const_iterator(_M_find_node(__bkt, __k, __code)); @@ -1844,6 +1872,35 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } #endif + // Find the node before the one whose key compares equal to k. + // Return nullptr if no node is found. + template + auto + _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, + _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>:: + _M_find_before_node(const key_type& __k) + -> __node_base_ptr + { + __node_base_ptr __prev_p = &_M_before_begin; + if (!__prev_p->_M_nxt) + return nullptr; + + for (__node_ptr __p = static_cast<__node_ptr>(__prev_p->_M_nxt); + __p != nullptr; + __p = __p->_M_next()) + { + if (this->_M_key_equals(__k, *__p)) + return __prev_p; + + __prev_p = __p; + } + + return nullptr; + } + // Find the node before the one whose key compares equal to k in the bucket // bkt. Return nullptr if no node is found. template(__args)... }; const key_type& __k = _ExtractKey{}(__node._M_node->_M_v()); + if (size() <= __small_size_threshold()) + { + for (auto __it = begin(); __it != end(); ++__it) + if (this->_M_key_equals(__k, *__it._M_cur)) + // There is already an equivalent node, no insertion + return { __it, false }; + } + __hash_code __code = this->_M_hash_code(__k); size_type __bkt = _M_bucket_index(__code); + if (size() > __small_size_threshold()) if (__node_ptr __p = _M_find_node(__bkt, __k, __code)) // There is already an equivalent node, no insertion - return std::make_pair(iterator(__p), false); + return { iterator(__p), false }; // Insert the node auto __pos = _M_insert_unique_node(__bkt, __code, __node._M_node); @@ -2020,13 +2086,41 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _Scoped_node __node { this, std::forward<_Args>(__args)... }; const key_type& __k = _ExtractKey{}(__node._M_node->_M_v()); - __hash_code __code = this->_M_hash_code(__k); + auto __res = this->_M_compute_hash_code(__hint, __k); auto __pos - = _M_insert_multi_node(__hint._M_cur, __code, __node._M_node); + = _M_insert_multi_node(__res.first._M_cur, __res.second, + __node._M_node); __node._M_node = nullptr; return __pos; } + template + auto + _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, + _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>:: + _M_compute_hash_code(const_iterator __hint, const key_type& __k) const + -> pair + { + if (size() <= __small_size_threshold()) + { + if (__hint != cend()) + { + for (auto __it = __hint; __it != cend(); ++__it) + if (this->_M_key_equals(__k, *__it._M_cur)) + return { __it, this->_M_hash_code(*__it._M_cur) }; + } + + for (auto __it = cbegin(); __it != __hint; ++__it) + if (this->_M_key_equals(__k, *__it._M_cur)) + return { __it, this->_M_hash_code(*__it._M_cur) }; + } + + return { __hint, this->_M_hash_code(__k) }; + } + template pair { + if (size() <= __small_size_threshold()) + for (auto __it = begin(); __it != end(); ++__it) + if (this->_M_key_equals_tr(__k, *__it._M_cur)) + return { __it, false }; + __hash_code __code = this->_M_hash_code_tr(__k); size_type __bkt = _M_bucket_index(__code); + if (size() > __small_size_threshold()) if (__node_ptr __node = _M_find_node_tr(__bkt, __k, __code)) return { iterator(__node), false }; @@ -2161,11 +2261,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _Scoped_node __node{ __node_gen(std::forward<_Arg>(__v)), this }; // Second compute the hash code so that we don't rehash if it throws. - __hash_code __code - = this->_M_hash_code(_ExtractKey{}(__node._M_node->_M_v())); + auto __res = this->_M_compute_hash_code( + __hint, _ExtractKey{}(__node._M_node->_M_v())); auto __pos - = _M_insert_multi_node(__hint._M_cur, __code, __node._M_node); + = _M_insert_multi_node(__res.first._M_cur, __res.second, + __node._M_node); __node._M_node = nullptr; return __pos; } @@ -2227,17 +2328,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>:: _M_erase(true_type /* __uks */, const key_type& __k) -> size_type + { + __node_base_ptr __prev_n; + __node_ptr __n; + std::size_t __bkt; + if (size() <= __small_size_threshold()) + { + __prev_n = _M_find_before_node(__k); + if (!__prev_n) + return 0; + + // We found a matching node, erase it. + __n = static_cast<__node_ptr>(__prev_n->_M_nxt); + __bkt = _M_bucket_index(*__n); + } + else { __hash_code __code = this->_M_hash_code(__k); - std::size_t __bkt = _M_bucket_index(__code); + __bkt = _M_bucket_index(__code); // Look for the node before the first matching node. - __node_base_ptr __prev_n = _M_find_before_node(__bkt, __k, __code); + __prev_n = _M_find_before_node(__bkt, __k, __code); if (!__prev_n) return 0; // We found a matching node, erase it. - __node_ptr __n = static_cast<__node_ptr>(__prev_n->_M_nxt); + __n = static_cast<__node_ptr>(__prev_n->_M_nxt); + } + _M_erase(__bkt, __prev_n, __n); return 1; } @@ -2251,22 +2369,39 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>:: _M_erase(false_type /* __uks */, const key_type& __k) -> size_type + { + std::size_t __bkt; + __node_base_ptr __prev_n; + __node_ptr __n; + if (size() <= __small_size_threshold()) + { + __prev_n = _M_find_before_node(__k); + if (!__prev_n) + return 0; + + // We found a matching node, erase it. + __n = static_cast<__node_ptr>(__prev_n->_M_nxt); + __bkt = _M_bucket_index(*__n); + } + else { __hash_code __code = this->_M_hash_code(__k); - std::size_t __bkt = _M_bucket_index(__code); + __bkt = _M_bucket_index(__code); // Look for the node before the first matching node. - __node_base_ptr __prev_n = _M_find_before_node(__bkt, __k, __code); + __prev_n = _M_find_before_node(__bkt, __k, __code); if (!__prev_n) return 0; + __n = static_cast<__node_ptr>(__prev_n->_M_nxt); + } + // _GLIBCXX_RESOLVE_LIB_DEFECTS // 526. Is it undefined if a function in the standard changes // in parameters? // We use one loop to find all matching nodes and another to deallocate // them so that the key stays valid during the first loop. It might be // invalidated indirectly when destroying nodes. - __node_ptr __n = static_cast<__node_ptr>(__prev_n->_M_nxt); __node_ptr __n_last = __n->_M_next(); while (__n_last && this->_M_node_equals(*__n, *__n_last)) __n_last = __n_last->_M_next(); diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h index 2130c958262..93d1962c74f 100644 --- a/libstdc++-v3/include/bits/hashtable_policy.h +++ b/libstdc++-v3/include/bits/hashtable_policy.h @@ -59,19 +59,19 @@ namespace __detail // Helper function: return distance(first, last) for forward // iterators, or 0/1 for input iterators. - template + template inline typename std::iterator_traits<_Iterator>::difference_type __distance_fw(_Iterator __first, _Iterator __last, std::input_iterator_tag) { return __first != __last ? 1 : 0; } - template + template inline typename std::iterator_traits<_Iterator>::difference_type __distance_fw(_Iterator __first, _Iterator __last, std::forward_iterator_tag) { return std::distance(__first, __last); } - template + template inline typename std::iterator_traits<_Iterator>::difference_type __distance_fw(_Iterator __first, _Iterator __last) { return __distance_fw(__first, __last, @@ -242,6 +242,20 @@ namespace __detail using __unique_keys = __bool_constant<_Unique_keys>; }; + /** + * struct _Hashtable_hash_traits + * + * Important traits for hash tables depending on associated hasher. + * + */ + template + struct _Hashtable_hash_traits + { + static constexpr std::size_t + __small_size_threshold() + { return std::__is_fast_hash<_Hash>::value ? 0 : 20; } + }; + /** * struct _Hash_node_base * @@ -1121,10 +1135,12 @@ namespace __detail _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits, true_type /* Has load factor */> { + private: using __hashtable = _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal, _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>; + public: float max_load_factor() const noexcept { @@ -1266,6 +1282,14 @@ namespace __detail return _M_hash()(__k); } + __hash_code + _M_hash_code(const _Hash_node_value<_Value, false>& __n) const + { return _M_hash_code(_ExtractKey{}(__n._M_v())); } + + __hash_code + _M_hash_code(const _Hash_node_value<_Value, true>& __n) const + { return __n._M_hash_code; } + std::size_t _M_bucket_index(__hash_code __c, std::size_t __bkt_count) const { return _RangeHash{}(__c, __bkt_count); } @@ -1276,17 +1300,14 @@ namespace __detail noexcept( noexcept(declval()(declval())) && noexcept(declval()((__hash_code)0, (std::size_t)0)) ) - { - return _RangeHash{}(_M_hash_code(_ExtractKey{}(__n._M_v())), - __bkt_count); - } + { return _M_bucket_index(_M_hash_code(__n), __bkt_count); } std::size_t _M_bucket_index(const _Hash_node_value<_Value, true>& __n, std::size_t __bkt_count) const noexcept( noexcept(declval()((__hash_code)0, (std::size_t)0)) ) - { return _RangeHash{}(__n._M_hash_code, __bkt_count); } + { return _M_bucket_index(__n._M_hash_code, __bkt_count); } void _M_store_code(_Hash_node_code_cache&, __hash_code) const @@ -1646,18 +1667,20 @@ namespace __detail { } bool - _M_equals(const _Key& __k, __hash_code __c, - const _Hash_node_value<_Value, __hash_cached::value>& __n) const + _M_key_equals(const _Key& __k, + const _Hash_node_value<_Value, + __hash_cached::value>& __n) const { - static_assert(__is_invocable{}, + static_assert( + __is_invocable{}, "key equality predicate must be invocable with two arguments of " "key type"); - return _S_equals(__c, __n) && _M_eq()(__k, _ExtractKey{}(__n._M_v())); + return _M_eq()(__k, _ExtractKey{}(__n._M_v())); } template bool - _M_equals_tr(const _Kt& __k, __hash_code __c, + _M_key_equals_tr(const _Kt& __k, const _Hash_node_value<_Value, __hash_cached::value>& __n) const { @@ -1665,16 +1688,28 @@ namespace __detail __is_invocable{}, "key equality predicate must be invocable with two arguments of " "key type"); - return _S_equals(__c, __n) && _M_eq()(__k, _ExtractKey{}(__n._M_v())); + return _M_eq()(__k, _ExtractKey{}(__n._M_v())); } + bool + _M_equals(const _Key& __k, __hash_code __c, + const _Hash_node_value<_Value, __hash_cached::value>& __n) const + { return _S_equals(__c, __n) && _M_key_equals(__k, __n); } + + template + bool + _M_equals_tr(const _Kt& __k, __hash_code __c, + const _Hash_node_value<_Value, + __hash_cached::value>& __n) const + { return _S_equals(__c, __n) && _M_key_equals_tr(__k, __n); } + bool _M_node_equals( const _Hash_node_value<_Value, __hash_cached::value>& __lhn, const _Hash_node_value<_Value, __hash_cached::value>& __rhn) const { return _S_node_equals(__lhn, __rhn) - && _M_eq()(_ExtractKey{}(__lhn._M_v()), _ExtractKey{}(__rhn._M_v())); + && _M_key_equals(_ExtractKey{}(__lhn._M_v()), __rhn); } void diff --git a/libstdc++-v3/src/c++11/hashtable_c++0x.cc b/libstdc++-v3/src/c++11/hashtable_c++0x.cc index b9dec61b2ea..116192dfcf1 100644 --- a/libstdc++-v3/src/c++11/hashtable_c++0x.cc +++ b/libstdc++-v3/src/c++11/hashtable_c++0x.cc @@ -30,6 +30,7 @@ #include #include #include +#include #include namespace std _GLIBCXX_VISIBILITY(default) diff --git a/libstdc++-v3/testsuite/performance/23_containers/insert_erase/unordered_small_size.cc b/libstdc++-v3/testsuite/performance/23_containers/insert_erase/unordered_small_size.cc new file mode 100644 index 00000000000..ae63c15b5da --- /dev/null +++ b/libstdc++-v3/testsuite/performance/23_containers/insert_erase/unordered_small_size.cc @@ -0,0 +1,125 @@ +// { dg-do run { target c++11 } } + +// Copyright (C) 2021 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +#include +#include +#include +#include +#include + +namespace +{ + const int nb_elements = 20; + const int nb_insts = 150000; + + template + void bench(const char* desc, const std::vector<_ElemType>& elems) + { + using namespace __gnu_test; + + time_counter time; + resource_counter resource; + + std::vector> insts(nb_insts); + for (int j = 0; j != nb_insts; ++j) + insts.emplace_back(); + + start_counters(time, resource); + + for (auto& us : insts) + for (int i = 0; i != nb_elements; ++i) + us.insert(elems[i]); + + stop_counters(time, resource); + + std::ostringstream ostr; + ostr << desc << " 1st insert"; + report_performance(__FILE__, ostr.str().c_str(), time, resource); + + start_counters(time, resource); + + for (auto& us : insts) + for (int i = nb_elements - 1; i >= 0; --i) + { + auto it = us.find(elems[i]); + if (it != us.end()) + us.erase(it); + } + + stop_counters(time, resource); + + ostr.str(""); + ostr << desc << " find/erase"; + report_performance(__FILE__, ostr.str().c_str(), time, resource); + + start_counters(time, resource); + + for (auto& us : insts) + { + us.insert(elems[0]); + for (int i = nb_elements - 1; i >= 0; --i) + us.insert(elems[i]); + } + + stop_counters(time, resource); + ostr.str(""); + ostr << desc << " 2nd insert"; + report_performance(__FILE__, ostr.str().c_str(), time, resource); + + start_counters(time, resource); + + for (auto& us : insts) + for (int j = nb_elements - 1; j >= 0; --j) + us.erase(elems[j]); + + stop_counters(time, resource); + ostr.str(""); + ostr << desc << " erase key "; + report_performance(__FILE__, ostr.str().c_str(), time, resource); + } +} + +int main() +{ + { + std::vector elems; + elems.reserve(nb_elements); + for (int i = 0; i != nb_elements; ++i) + elems.push_back(i); + + bench("std::unordered_set: ", elems); + } + + { + std::vector elems; + { + elems.reserve(nb_elements); + for (int i = 0; i != nb_elements; ++i) + { + std::ostringstream ostr; + ostr << "string #" << i << ' ' << std::string(1000, 'a' + i); + elems.push_back(ostr.str()); + } + } + + bench("std::unordered_set: ", elems); + } + + return 0; +} diff --git a/libstdc++-v3/testsuite/util/testsuite_performance.h b/libstdc++-v3/testsuite/util/testsuite_performance.h index cba3a0d4b17..4ca15ab0e71 100644 --- a/libstdc++-v3/testsuite/util/testsuite_performance.h +++ b/libstdc++-v3/testsuite/util/testsuite_performance.h @@ -239,7 +239,7 @@ namespace __gnu_test out << std::setw(4) << t.real_time() << "r" << space; out << std::setw(4) << t.user_time() << "u" << space; out << std::setw(4) << t.system_time() << "s" << space; - out << std::setw(8) << r.allocated_memory() << "mem" << space; + out << std::setw(9) << r.allocated_memory() << "mem" << space; out << std::setw(4) << r.hard_page_fault() << "pf" << space; out << std::endl;