* Re: [PATCH] Enhance _Hashtable for range insertion 0/5
2022-06-21 17:12 ` Jonathan Wakely
@ 2022-06-26 20:06 ` François Dumont
2022-06-27 16:18 ` Jonathan Wakely
0 siblings, 1 reply; 4+ messages in thread
From: François Dumont @ 2022-06-26 20:06 UTC (permalink / raw)
To: Jonathan Wakely; +Cc: libstdc++, gcc-patches
[-- Attachment #1: Type: text/plain, Size: 1730 bytes --]
I knew you were going to ask for it but was to impatient to propose
those patches to wait anymore.
Attached you'll find what I start to work on. But I am quite
disappointed by the results. At least it's showing that results are not
worst.
To be honest I was also hoping some feedback from potential users
interesting in testing those patches. And maybe there are some well
known (and free) benches that I could challenge ?
François
On 21/06/22 19:12, Jonathan Wakely wrote:
> On Mon, 20 Jun 2022 at 17:58, François Dumont via Libstdc++
> <libstdc++@gcc.gnu.org> wrote:
>> Hi
>>
>> Here is a series of patch to enhance _Hashtable behavior mostly in the
>> context of range insertion. I also start considering the problem of
>> memory fragmentation in this container with 2 objectives:
>>
>> - It is easier to find out when you're done with the elements of a
>> bucket if the last node of the bucket N is the before-begin node of
>> bucket N + 1.
>>
>> - It is faster to loop through nodes of a bucket if those node are close
>> in memory, ultimately we should have addressof(Node + 1) ==
>> addressof(Node) + 1
> Have these changes been profiled or benchmarked? Is it measurably
> faster? By how much?
>
>
>> [1/5] Make more use of user hints as both insertion and allocation hints.
>>
>> [2/5] Introduce a new method to check if we are still looping through
>> the same bucket's nodes
>>
>> [3/5] Consider that all initializer_list elements are going to be inserted
>>
>> [4/5] Introduce a before-begin cache policy to remember which bucket is
>> currently pointing on it
>>
>> [5/5] Prealloc nodes on _Hashtable copy and introduce a new assignment
>> method which replicate buckets data structure
>>
>> François
>>
[-- Attachment #2: bench.patch --]
[-- Type: text/x-patch, Size: 9007 bytes --]
diff --git a/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc b/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc
index 8aa0cd20193..bb5778a257c 100644
--- a/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc
+++ b/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc
@@ -17,12 +17,14 @@
// { dg-do run { target c++11 } }
-#include <testsuite_performance.h>
+#include <string>
#include <random>
#include <sstream>
#include <tr1/unordered_set>
#include <unordered_set>
+#include <testsuite_performance.h>
+
#define USE_MY_FOO 1
struct Foo
@@ -71,10 +73,13 @@ struct HashFunction
};
const int sz = 300000;
+const int usz = sz / 2;
template<typename _ContType>
void
- bench(const char* container_desc, const typename _ContType::value_type* foos)
+ bench(const char* container_desc,
+ const typename _ContType::value_type* foos,
+ const typename _ContType::value_type* ufoos)
{
using namespace __gnu_test;
@@ -106,6 +111,51 @@ template<typename _ContType>
ostr << container_desc << nb_loop << " times insertion of "
<< sz << " elements";
report_performance(__FILE__, ostr.str().c_str(), time, resource);
+
+ // Try to lookup for mostly unknown entries.
+ start_counters(time, resource);
+
+ int fcount = 0;
+ for (int j = 0; j != nb_loop; ++j)
+ for (int i = 0; i != usz; ++i)
+ fcount += s.find(ufoos[i]) != s.end() ? 1 : 0;
+
+ stop_counters(time, resource);
+ ostr.str("");
+ ostr << container_desc << nb_loop << " times lookup of "
+ << usz << " elements " << fcount / nb_loop << " found";
+ report_performance(__FILE__, ostr.str().c_str(), time, resource);
+
+ // Try again the previous operations but on a copy with potentially
+ // less memory fragmentation.
+ _ContType scopy(s);
+
+ // Try to insert again to check performance of collision detection
+ start_counters(time, resource);
+
+ for (int j = 0; j != nb_loop; ++j)
+ for (int i = 0; i != sz; ++i)
+ scopy.insert(foos[i]);
+
+ stop_counters(time, resource);
+ ostr.str("");
+ ostr << container_desc << nb_loop << " times insertion of "
+ << sz << " elements in copy";
+ report_performance(__FILE__, ostr.str().c_str(), time, resource);
+
+ // Try to lookup for mostly unknown entries.
+ start_counters(time, resource);
+
+ fcount = 0;
+ for (int j = 0; j != nb_loop; ++j)
+ for (int i = 0; i != usz; ++i)
+ fcount += scopy.find(ufoos[i]) != scopy.end() ? 1 : 0;
+
+ stop_counters(time, resource);
+ ostr.str("");
+ ostr << container_desc << nb_loop << " times lookup of "
+ << usz << " elements " << fcount / nb_loop << " found";
+ report_performance(__FILE__, ostr.str().c_str(), time, resource);
}
template<bool cache>
@@ -155,67 +205,78 @@ int main()
{
int bars[sz];
+ int ubars[usz];
for (int i = 0; i != sz; ++i)
bars[i] = i;
+ for (int i = 0; i != usz; ++i)
+ ubars[i] = sz + i;
bench<std::tr1::unordered_set<int>>(
- "std::tr1::unordered_set<int> ", bars);
+ "std::tr1::unordered_set<int> ", bars, ubars);
bench<std::unordered_set<int>>(
- "std::unordered_set<int> ", bars);
+ "std::unordered_set<int> ", bars, ubars);
}
- Foo foos[sz];
-#if USE_MY_FOO
{
- std::random_device randev;
- for (int i = 0; i != sz; ++i)
- foos[i].init(randev);
- }
+ Foo foos[sz];
+ Foo ufoos[usz];
+#if USE_MY_FOO
+ {
+ std::random_device randev;
+ for (int i = 0; i != sz; ++i)
+ foos[i].init(randev);
+ for (int i = 0; i != usz; ++i)
+ ufoos[i].init(randev);
+ }
#endif
- time_counter time;
- resource_counter resource;
- start_counters(time, resource);
-
- bench<__tr1_uset<false>>(
- "std::tr1::unordered_set without hash code cached ", foos);
- bench<__tr1_uset<true>>(
- "std::tr1::unordered_set with hash code cached ", foos);
- bench<__tr1_umset<false>>(
- "std::tr1::unordered_multiset without hash code cached ", foos);
- bench<__tr1_umset<true>>(
- "std::tr1::unordered_multiset with hash code cached ", foos);
-
- stop_counters(time, resource);
- report_performance(__FILE__, "tr1 benches", time, resource);
-
- start_counters(time, resource);
- bench<__uset<false>>(
- "std::unordered_set without hash code cached ", foos);
- bench<__uset<true>>(
- "std::unordered_set with hash code cached ", foos);
- bench<__umset<false>>(
- "std::unordered_multiset without hash code cached ", foos);
- bench<__umset<true>>(
- "std::unordered_multiset with hash code cached ", foos);
-
- stop_counters(time, resource);
- report_performance(__FILE__, "std benches", time, resource);
-
- start_counters(time, resource);
- bench<__uset2<false>>(
- "std::unordered_set2 without hash code cached ", foos);
- bench<__uset2<true>>(
- "std::unordered_set2 with hash code cached ", foos);
- bench<__umset2<false>>(
- "std::unordered_multiset2 without hash code cached ", foos);
- bench<__umset2<true>>(
- "std::unordered_multiset2 with hash code cached ", foos);
-
- stop_counters(time, resource);
- report_performance(__FILE__, "std2 benches", time, resource);
-
- bench<std::unordered_set<Foo, HashFunction>>(
- "std::unordered_set default cache ", foos);
- bench<std::unordered_multiset<Foo, HashFunction>>(
- "std::unordered_multiset default cache ", foos);
+ time_counter time;
+ resource_counter resource;
+ start_counters(time, resource);
+
+ bench<__tr1_uset<false>>(
+ "std::tr1::unordered_set without hash code cached ", foos, ufoos);
+ bench<__tr1_uset<true>>(
+ "std::tr1::unordered_set with hash code cached ", foos, ufoos);
+ bench<__tr1_umset<false>>(
+ "std::tr1::unordered_multiset without hash code cached ", foos, ufoos);
+ bench<__tr1_umset<true>>(
+ "std::tr1::unordered_multiset with hash code cached ", foos, ufoos);
+
+ stop_counters(time, resource);
+ report_performance(__FILE__, "tr1 benches", time, resource);
+
+ start_counters(time, resource);
+ bench<__uset<false>>(
+ "std::unordered_set without hash code cached ", foos, ufoos);
+ bench<__uset<true>>(
+ "std::unordered_set with hash code cached ", foos, ufoos);
+ bench<__umset<false>>(
+ "std::unordered_multiset without hash code cached ", foos, ufoos);
+ bench<__umset<true>>(
+ "std::unordered_multiset with hash code cached ", foos, ufoos);
+
+ stop_counters(time, resource);
+ report_performance(__FILE__, "std benches", time, resource);
+
+ start_counters(time, resource);
+ bench<__uset2<false>>(
+ "std::unordered_set2 without hash code cached ", foos, ufoos);
+ bench<__uset2<true>>(
+ "std::unordered_set2 with hash code cached ", foos, ufoos);
+ bench<__umset2<false>>(
+ "std::unordered_multiset2 without hash code cached ", foos, ufoos);
+ bench<__umset2<true>>(
+ "std::unordered_multiset2 with hash code cached ", foos, ufoos);
+
+ stop_counters(time, resource);
+ report_performance(__FILE__, "std2 benches", time, resource);
+
+ bench<std::unordered_set<Foo, HashFunction>>(
+ "std::unordered_set default cache ", foos, ufoos);
+ bench<std::unordered_multiset<Foo, HashFunction>>(
+ "std::unordered_multiset default cache ", foos, ufoos);
+ }
+
+ {
+ }
}
diff --git a/libstdc++-v3/testsuite/performance/23_containers/insert_erase/unordered_small_size.cc b/libstdc++-v3/testsuite/performance/23_containers/insert_erase/unordered_small_size.cc
index ae63c15b5da..a23b20bf69d 100644
--- a/libstdc++-v3/testsuite/performance/23_containers/insert_erase/unordered_small_size.cc
+++ b/libstdc++-v3/testsuite/performance/23_containers/insert_erase/unordered_small_size.cc
@@ -29,7 +29,7 @@ namespace
const int nb_insts = 150000;
template<typename _ElemType>
- void bench(const char* desc, const std::vector<_ElemType>& elems)
+ void bench(const char* desc, const std::vector<_ElemType>& elems, bool with_copy)
{
using namespace __gnu_test;
@@ -52,6 +52,19 @@ namespace
ostr << desc << " 1st insert";
report_performance(__FILE__, ostr.str().c_str(), time, resource);
+ if (with_copy)
+ {
+ start_counters(time, resource);
+
+ std::vector<std::unordered_set<_ElemType>>(insts).swap(insts);
+
+ stop_counters(time, resource);
+
+ ostr.str("");
+ ostr << desc << " copy";
+ report_performance(__FILE__, ostr.str().c_str(), time, resource);
+ }
+
start_counters(time, resource);
for (auto& us : insts)
@@ -103,7 +116,8 @@ int main()
for (int i = 0; i != nb_elements; ++i)
elems.push_back(i);
- bench("std::unordered_set<int>: ", elems);
+ bench("std::unordered_set<int>: ", elems, false);
+ bench("std::unordered_set<int>: ", elems, true);
}
{
@@ -118,7 +132,8 @@ int main()
}
}
- bench("std::unordered_set<string>: ", elems);
+ bench("std::unordered_set<string>: ", elems, false);
+ bench("std::unordered_set<string>: ", elems, true);
}
return 0;
^ permalink raw reply [flat|nested] 4+ messages in thread