* [PATCH] Enhance unordered container merge
@ 2021-11-14 13:29 François Dumont
2021-11-14 23:25 ` Jonathan Wakely
0 siblings, 1 reply; 4+ messages in thread
From: François Dumont @ 2021-11-14 13:29 UTC (permalink / raw)
To: libstdc++; +Cc: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 910 bytes --]
libstdc++: Unordered containers merge re-use hash code.
When merging between 2 unordered containers with same hasher we can
re-use
the cached hash code if any.
Use previous insert iterator as a hint for the next insert in case
of multi container.
* include/bits/hashtable_policy.h
(_Hash_code_base<>::_ReuseOrComputeHash<>): New.
(_Hash_code_base<>::_M_hash_code<_H2>(const _H2&, const
_Hash_node_value<>&)): New.
* include/bits/hashtable.h (_Hashtable<>::_M_merge_unique):
Use latter.
(_Hashtable<>::_M_merge_multi): Likewise.
*
testsuite/23_containers/unordered_multiset/modifiers/merge.cc (test05):
New test.
* testsuite/23_containers/unordered_set/modifiers/merge.cc
(test04): New test.
Tested under Linux x86_64.
Ok to commit ?
François
[-- Attachment #2: hashtable_merge.patch --]
[-- Type: text/x-patch, Size: 5615 bytes --]
diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h
index 0e949d73614..6e2d4c10cfe 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -1076,7 +1076,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
auto __pos = __i++;
const key_type& __k = _ExtractKey{}(*__pos);
- __hash_code __code = this->_M_hash_code(__k);
+ __hash_code __code
+ = this->_M_hash_code(__src.hash_function(), *__pos._M_cur);
size_type __bkt = _M_bucket_index(__code);
if (_M_find_node(__bkt, __k, __code) == nullptr)
{
@@ -1099,14 +1100,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
node_type>, "Node types are compatible");
__glibcxx_assert(get_allocator() == __src.get_allocator());
+ __node_ptr __hint = nullptr;
this->reserve(size() + __src.size());
for (auto __i = __src.cbegin(), __end = __src.cend(); __i != __end;)
{
auto __pos = __i++;
- const key_type& __k = _ExtractKey{}(*__pos);
- __hash_code __code = this->_M_hash_code(__k);
+ __hash_code __code
+ = this->_M_hash_code(__src.hash_function(), *__pos._M_cur);
auto __nh = __src.extract(__pos);
- _M_insert_multi_node(nullptr, __code, __nh._M_ptr);
+ __hint = _M_insert_multi_node(__hint, __code, __nh._M_ptr)._M_cur;
__nh._M_ptr = nullptr;
}
}
diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h
index c0295b75963..95a1c45e634 100644
--- a/libstdc++-v3/include/bits/hashtable_policy.h
+++ b/libstdc++-v3/include/bits/hashtable_policy.h
@@ -1217,6 +1217,26 @@ namespace __detail
friend struct _Local_iterator_base<_Key, _Value, _ExtractKey,
_Hash, _RangeHash, _Unused, false>;
+ template<typename _H1, typename _H2, bool __with_cache>
+ struct _ReuseOrComputeHash
+ {
+ std::size_t
+ operator()(const _Hash_node_value<_Value, __with_cache>& __n) const
+ { return _M_hash_code_base._M_hash_code(_ExtractKey{}(__n._M_v())); }
+
+ const _Hash_code_base& _M_hash_code_base;
+ };
+
+ template<typename _Hn>
+ struct _ReuseOrComputeHash<_Hn, _Hn, true>
+ {
+ _ReuseOrComputeHash(const _Hash_code_base&) { }
+
+ std::size_t
+ operator()(const _Hash_node_value<_Value, true>& __n) const
+ { return __n._M_hash_code; }
+ };
+
public:
typedef _Hash hasher;
@@ -1250,6 +1270,12 @@ namespace __detail
return _M_hash()(__k);
}
+ template<typename _H2>
+ __hash_code
+ _M_hash_code(const _H2&,
+ const _Hash_node_value<_Value, __cache_hash_code>& __n) const
+ { return _ReuseOrComputeHash<_Hash, _H2, __cache_hash_code>{ *this }(__n); }
+
std::size_t
_M_bucket_index(__hash_code __c, std::size_t __bkt_count) const
{ return _RangeHash{}(__c, __bkt_count); }
diff --git a/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc b/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc
index 1ed2ce234a1..07b8a344169 100644
--- a/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc
+++ b/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc
@@ -17,6 +17,7 @@
// { dg-do run { target c++17 } }
+#include <string>
#include <unordered_set>
#include <algorithm>
#include <testsuite_hooks.h>
@@ -105,6 +106,26 @@ test04()
VERIFY( c2.empty() );
}
+void
+test05()
+{
+ const std::unordered_multiset<std::string> c0{ "abcd", "abcd", "efgh", "efgh", "ijkl", "ijkl" };
+ std::unordered_multiset<std::string> c1 = c0;
+ std::unordered_set<std::string> c2( c0.begin(), c0.end() );
+
+ c1.merge(c2);
+ VERIFY( c1.size() == (1.5 * c0.size()) );
+ for (auto& i : c1)
+ VERIFY( c1.count(i) == (1.5 * c0.count(i)) );
+ VERIFY( c2.empty() );
+
+ c1.clear();
+ c2.insert( c0.begin(), c0.end() );
+ c1.merge(std::move(c2));
+ VERIFY( c1.size() == (0.5 * c0.size()) );
+ VERIFY( c2.empty() );
+}
+
int
main()
{
@@ -112,4 +133,5 @@ main()
test02();
test03();
test04();
+ test05();
}
diff --git a/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/merge.cc b/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/merge.cc
index c9c8a60fd54..0e184b10c60 100644
--- a/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/merge.cc
+++ b/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/merge.cc
@@ -17,6 +17,7 @@
// { dg-do run { target c++17 } }
+#include <string>
#include <unordered_set>
#include <algorithm>
#include <testsuite_hooks.h>
@@ -125,10 +126,52 @@ test03()
VERIFY( c2.empty() );
}
+void
+test04()
+{
+ const std::unordered_set<std::string> c0{ "abcd", "efgh", "ijkl", };
+ std::unordered_set<std::string> c1 = c0;
+ std::unordered_multiset<std::string> c2( c0.begin(), c0.end() );
+ c1.merge(c2);
+ VERIFY( c1 == c0 );
+ VERIFY( equal_elements(c2, c0) );
+
+ c1.clear();
+ c1.merge(c2);
+ VERIFY( c1 == c0 );
+ VERIFY( c2.empty() );
+
+ c2.merge(c1);
+ VERIFY( c1.empty() );
+ VERIFY( equal_elements(c2, c0) );
+
+ c1 = c0;
+ c2.merge(c1);
+ VERIFY( c1.empty() );
+ VERIFY( c2.size() == (2 * c0.size()) );
+ VERIFY( c2.count("abcd") == 2 );
+ VERIFY( c2.count("efgh") == 2 );
+ VERIFY( c2.count("ijkl") == 2 );
+
+ c1.merge(c2);
+ VERIFY( c1 == c0 );
+ VERIFY( equal_elements(c2, c0) );
+
+ c1.merge(std::move(c2));
+ VERIFY( c1 == c0 );
+ VERIFY( equal_elements(c2, c0) );
+
+ c1.clear();
+ c1.merge(std::move(c2));
+ VERIFY( c1 == c0 );
+ VERIFY( c2.empty() );
+}
+
int
main()
{
test01();
test02();
test03();
+ test04();
}
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] Enhance unordered container merge
2021-11-14 13:29 [PATCH] Enhance unordered container merge François Dumont
@ 2021-11-14 23:25 ` Jonathan Wakely
2021-11-15 6:00 ` François Dumont
0 siblings, 1 reply; 4+ messages in thread
From: Jonathan Wakely @ 2021-11-14 23:25 UTC (permalink / raw)
To: François Dumont; +Cc: libstdc++, gcc-patches
On Sun, 14 Nov 2021 at 13:31, François Dumont via Libstdc++
<libstdc++@gcc.gnu.org> wrote:
>
> libstdc++: Unordered containers merge re-use hash code.
>
> When merging between 2 unordered containers with same hasher we can
> re-use
> the cached hash code if any.
Instead of introducing the _ReuseOrComputeHash type, wouldn't it be
simpler to just overload _M_hash_code?
// Same hash function, use the cached hash code.
__hash_code
_M_hash_code(const _Hash&,
const _Hash_node_value<_Value, true>& __n) const
{ return __n._M_hash_code; }
// Compute hash code using a different hash function, _H2
template<typename _H2>
__hash_code
_M_hash_code(const _H2&,
const _Hash_node_value<_Value, __cache_hash_code>& __n) const
{ return this->_M_hash_code(_ExtractKey{}(__n._M_v()); }
The first overload is more specialized, so will be chosen when the
first argument is the same type as _Hash and the cache_has_code
boolean is true.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] Enhance unordered container merge
2021-11-14 23:25 ` Jonathan Wakely
@ 2021-11-15 6:00 ` François Dumont
2021-11-15 9:10 ` Jonathan Wakely
0 siblings, 1 reply; 4+ messages in thread
From: François Dumont @ 2021-11-15 6:00 UTC (permalink / raw)
To: Jonathan Wakely; +Cc: libstdc++, gcc-patches
[-- Attachment #1: Type: text/plain, Size: 1137 bytes --]
On 15/11/21 12:25 am, Jonathan Wakely wrote:
> On Sun, 14 Nov 2021 at 13:31, François Dumont via Libstdc++
> <libstdc++@gcc.gnu.org> wrote:
>> libstdc++: Unordered containers merge re-use hash code.
>>
>> When merging between 2 unordered containers with same hasher we can
>> re-use
>> the cached hash code if any.
> Instead of introducing the _ReuseOrComputeHash type, wouldn't it be
> simpler to just overload _M_hash_code?
>
>
> // Same hash function, use the cached hash code.
> __hash_code
> _M_hash_code(const _Hash&,
> const _Hash_node_value<_Value, true>& __n) const
> { return __n._M_hash_code; }
>
> // Compute hash code using a different hash function, _H2
> template<typename _H2>
> __hash_code
> _M_hash_code(const _H2&,
> const _Hash_node_value<_Value, __cache_hash_code>& __n) const
> { return this->_M_hash_code(_ExtractKey{}(__n._M_v()); }
>
> The first overload is more specialized, so will be chosen when the
> first argument is the same type as _Hash and the cache_has_code
> boolean is true.
Yes, it is simpler.
Ok to commit ?
François
[-- Attachment #2: hashtable_merge.patch --]
[-- Type: text/x-patch, Size: 5074 bytes --]
diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h
index 0e949d73614..6e2d4c10cfe 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -1076,7 +1076,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
auto __pos = __i++;
const key_type& __k = _ExtractKey{}(*__pos);
- __hash_code __code = this->_M_hash_code(__k);
+ __hash_code __code
+ = this->_M_hash_code(__src.hash_function(), *__pos._M_cur);
size_type __bkt = _M_bucket_index(__code);
if (_M_find_node(__bkt, __k, __code) == nullptr)
{
@@ -1099,14 +1100,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
node_type>, "Node types are compatible");
__glibcxx_assert(get_allocator() == __src.get_allocator());
+ __node_ptr __hint = nullptr;
this->reserve(size() + __src.size());
for (auto __i = __src.cbegin(), __end = __src.cend(); __i != __end;)
{
auto __pos = __i++;
- const key_type& __k = _ExtractKey{}(*__pos);
- __hash_code __code = this->_M_hash_code(__k);
+ __hash_code __code
+ = this->_M_hash_code(__src.hash_function(), *__pos._M_cur);
auto __nh = __src.extract(__pos);
- _M_insert_multi_node(nullptr, __code, __nh._M_ptr);
+ __hint = _M_insert_multi_node(__hint, __code, __nh._M_ptr)._M_cur;
__nh._M_ptr = nullptr;
}
}
diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h
index c0295b75963..0b5443fc187 100644
--- a/libstdc++-v3/include/bits/hashtable_policy.h
+++ b/libstdc++-v3/include/bits/hashtable_policy.h
@@ -1250,6 +1250,19 @@ namespace __detail
return _M_hash()(__k);
}
+ __hash_code
+ _M_hash_code(const _Hash&,
+ const _Hash_node_value<_Value, true>& __n) const
+ { return __n._M_hash_code; }
+
+ // Compute hash code using _Hash as __n _M_hash_code, if present, was
+ // computed using _H2.
+ template<typename _H2>
+ __hash_code
+ _M_hash_code(const _H2&,
+ const _Hash_node_value<_Value, __cache_hash_code>& __n) const
+ { return _M_hash_code(_ExtractKey{}(__n._M_v())); }
+
std::size_t
_M_bucket_index(__hash_code __c, std::size_t __bkt_count) const
{ return _RangeHash{}(__c, __bkt_count); }
diff --git a/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc b/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc
index 1ed2ce234a1..07b8a344169 100644
--- a/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc
+++ b/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc
@@ -17,6 +17,7 @@
// { dg-do run { target c++17 } }
+#include <string>
#include <unordered_set>
#include <algorithm>
#include <testsuite_hooks.h>
@@ -105,6 +106,26 @@ test04()
VERIFY( c2.empty() );
}
+void
+test05()
+{
+ const std::unordered_multiset<std::string> c0{ "abcd", "abcd", "efgh", "efgh", "ijkl", "ijkl" };
+ std::unordered_multiset<std::string> c1 = c0;
+ std::unordered_set<std::string> c2( c0.begin(), c0.end() );
+
+ c1.merge(c2);
+ VERIFY( c1.size() == (1.5 * c0.size()) );
+ for (auto& i : c1)
+ VERIFY( c1.count(i) == (1.5 * c0.count(i)) );
+ VERIFY( c2.empty() );
+
+ c1.clear();
+ c2.insert( c0.begin(), c0.end() );
+ c1.merge(std::move(c2));
+ VERIFY( c1.size() == (0.5 * c0.size()) );
+ VERIFY( c2.empty() );
+}
+
int
main()
{
@@ -112,4 +133,5 @@ main()
test02();
test03();
test04();
+ test05();
}
diff --git a/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/merge.cc b/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/merge.cc
index c9c8a60fd54..0e184b10c60 100644
--- a/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/merge.cc
+++ b/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/merge.cc
@@ -17,6 +17,7 @@
// { dg-do run { target c++17 } }
+#include <string>
#include <unordered_set>
#include <algorithm>
#include <testsuite_hooks.h>
@@ -125,10 +126,52 @@ test03()
VERIFY( c2.empty() );
}
+void
+test04()
+{
+ const std::unordered_set<std::string> c0{ "abcd", "efgh", "ijkl", };
+ std::unordered_set<std::string> c1 = c0;
+ std::unordered_multiset<std::string> c2( c0.begin(), c0.end() );
+ c1.merge(c2);
+ VERIFY( c1 == c0 );
+ VERIFY( equal_elements(c2, c0) );
+
+ c1.clear();
+ c1.merge(c2);
+ VERIFY( c1 == c0 );
+ VERIFY( c2.empty() );
+
+ c2.merge(c1);
+ VERIFY( c1.empty() );
+ VERIFY( equal_elements(c2, c0) );
+
+ c1 = c0;
+ c2.merge(c1);
+ VERIFY( c1.empty() );
+ VERIFY( c2.size() == (2 * c0.size()) );
+ VERIFY( c2.count("abcd") == 2 );
+ VERIFY( c2.count("efgh") == 2 );
+ VERIFY( c2.count("ijkl") == 2 );
+
+ c1.merge(c2);
+ VERIFY( c1 == c0 );
+ VERIFY( equal_elements(c2, c0) );
+
+ c1.merge(std::move(c2));
+ VERIFY( c1 == c0 );
+ VERIFY( equal_elements(c2, c0) );
+
+ c1.clear();
+ c1.merge(std::move(c2));
+ VERIFY( c1 == c0 );
+ VERIFY( c2.empty() );
+}
+
int
main()
{
test01();
test02();
test03();
+ test04();
}
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] Enhance unordered container merge
2021-11-15 6:00 ` François Dumont
@ 2021-11-15 9:10 ` Jonathan Wakely
0 siblings, 0 replies; 4+ messages in thread
From: Jonathan Wakely @ 2021-11-15 9:10 UTC (permalink / raw)
To: François Dumont; +Cc: libstdc++, gcc-patches
On Mon, 15 Nov 2021 at 06:00, François Dumont wrote:
>
> On 15/11/21 12:25 am, Jonathan Wakely wrote:
> > On Sun, 14 Nov 2021 at 13:31, François Dumont via Libstdc++
> > <libstdc++@gcc.gnu.org> wrote:
> >> libstdc++: Unordered containers merge re-use hash code.
> >>
> >> When merging between 2 unordered containers with same hasher we can
> >> re-use
> >> the cached hash code if any.
> > Instead of introducing the _ReuseOrComputeHash type, wouldn't it be
> > simpler to just overload _M_hash_code?
> >
> >
> > // Same hash function, use the cached hash code.
> > __hash_code
> > _M_hash_code(const _Hash&,
> > const _Hash_node_value<_Value, true>& __n) const
> > { return __n._M_hash_code; }
> >
> > // Compute hash code using a different hash function, _H2
> > template<typename _H2>
> > __hash_code
> > _M_hash_code(const _H2&,
> > const _Hash_node_value<_Value, __cache_hash_code>& __n) const
> > { return this->_M_hash_code(_ExtractKey{}(__n._M_v()); }
> >
> > The first overload is more specialized, so will be chosen when the
> > first argument is the same type as _Hash and the cache_has_code
> > boolean is true.
>
> Yes, it is simpler.
>
> Ok to commit ?
Yes, thanks.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-11-15 9:11 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-14 13:29 [PATCH] Enhance unordered container merge François Dumont
2021-11-14 23:25 ` Jonathan Wakely
2021-11-15 6:00 ` François Dumont
2021-11-15 9:10 ` Jonathan Wakely
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).