public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* New power of 2 hash policy
@ 2015-09-10 20:11 François Dumont
  2015-09-11 13:18 ` Michael Matz
  0 siblings, 1 reply; 9+ messages in thread
From: François Dumont @ 2015-09-10 20:11 UTC (permalink / raw)
  To: libstdc++, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 589 bytes --]

Hi

    Here is a patch to offer an alternative hash policy. This one is
using power of 2 number of buckets allowing a faster modulo operation.
This is obvious when running the performance test that I have adapted to
use this alternative policy. Something between current implementation
and the tr1 one, the old std one.

    Of course with this hash policy the lower bits of the hash code are
more important. For pointers it would require to change the std::hash
implementation to remove the lower 0 bits like in the patch I proposed
some weeks ago.

    What do you think ?

François

[-- Attachment #2: hashtable.patch --]
[-- Type: text/x-patch, Size: 17494 bytes --]

diff --git a/libstdc++-v3/config/abi/pre/gnu.ver b/libstdc++-v3/config/abi/pre/gnu.ver
index d42cd37..97913a6 100644
--- a/libstdc++-v3/config/abi/pre/gnu.ver
+++ b/libstdc++-v3/config/abi/pre/gnu.ver
@@ -1870,6 +1870,11 @@ GLIBCXX_3.4.22 {
     # std::uncaught_exceptions()
     _ZSt19uncaught_exceptionsv;
 
+    extern "C++"
+    {
+      std::__detail::_Power2_rehash_policy::*;
+    };
+
 } GLIBCXX_3.4.21;
 
 # Symbols in the support library (libsupc++) have their own tag.
diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h
index a9ad7dd..e774044 100644
--- a/libstdc++-v3/include/bits/hashtable_policy.h
+++ b/libstdc++-v3/include/bits/hashtable_policy.h
@@ -457,6 +457,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   /// smallest prime that keeps the load factor small enough.
   struct _Prime_rehash_policy
   {
+    using __has_load_factor = std::true_type;
+
     _Prime_rehash_policy(float __z = 1.0) noexcept
     : _M_max_load_factor(__z), _M_next_resize(0) { }
 
@@ -501,6 +503,69 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     mutable std::size_t	_M_next_resize;
   };
 
+  /// Range hashing function considering that second args is a power of 2.
+  struct _Mask_range_hashing
+  {
+    typedef std::size_t first_argument_type;
+    typedef std::size_t second_argument_type;
+    typedef std::size_t result_type;
+
+    result_type
+    operator()(first_argument_type __num,
+	       second_argument_type __den) const noexcept
+    { return __num & (__den - 1); }
+  };
+
+  /// Rehash policy providing power of 2 bucket numbers. Ease modulo
+  /// operations.
+  struct _Power2_rehash_policy
+  {
+    using __has_load_factor = std::true_type;
+
+    _Power2_rehash_policy(float __z = 1.0) noexcept
+    : _M_max_load_factor(__z), _M_next_resize(0) { }
+
+    float
+    max_load_factor() const noexcept
+    { return _M_max_load_factor; }
+
+    // Return a bucket size no smaller than n.
+    std::size_t
+    _M_next_bkt(std::size_t __n) const;
+
+    // Return a bucket count appropriate for n elements
+    std::size_t
+    _M_bkt_for_elements(std::size_t __n) const
+    { return __builtin_ceil(__n / (long double)_M_max_load_factor); }
+
+    // __n_bkt is current bucket count, __n_elt is current element count,
+    // and __n_ins is number of elements to be inserted.  Do we need to
+    // increase bucket count?  If so, return make_pair(true, n), where n
+    // is the new bucket count.  If not, return make_pair(false, 0).
+    std::pair<bool, std::size_t>
+    _M_need_rehash(std::size_t __n_bkt, std::size_t __n_elt,
+		   std::size_t __n_ins) const;
+
+    typedef std::size_t _State;
+
+    _State
+    _M_state() const
+    { return _M_next_resize; }
+
+    void
+    _M_reset() noexcept
+    { _M_next_resize = 0; }
+
+    void
+    _M_reset(_State __state)
+    { _M_next_resize = __state; }
+
+    static const std::size_t _S_growth_factor = 2;
+
+    float		_M_max_load_factor;
+    mutable std::size_t	_M_next_resize;
+  };
+
   // Base classes for std::_Hashtable.  We define these base classes
   // because in some cases we want to do different things depending on
   // the value of a policy class.  In some cases the policy class
@@ -775,8 +840,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	   typename _ExtractKey, typename _Equal,
 	   typename _H1, typename _H2, typename _Hash,
 	   typename _RehashPolicy, typename _Traits,
-	   bool _Constant_iterators = _Traits::__constant_iterators::value,
-	   bool _Unique_keys = _Traits::__unique_keys::value>
+	   bool _Constant_iterators = _Traits::__constant_iterators::value>
     struct _Insert;
 
   /// Specialization.
@@ -785,65 +849,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	   typename _H1, typename _H2, typename _Hash,
 	   typename _RehashPolicy, typename _Traits>
     struct _Insert<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash,
-		   _RehashPolicy, _Traits, true, true>
+		   _RehashPolicy, _Traits, true>
     : public _Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal,
 			   _H1, _H2, _Hash, _RehashPolicy, _Traits>
     {
       using __base_type = _Insert_base<_Key, _Value, _Alloc, _ExtractKey,
 					_Equal, _H1, _H2, _Hash,
 					_RehashPolicy, _Traits>;
-      using value_type = typename __base_type::value_type;
-      using iterator = typename __base_type::iterator;
-      using const_iterator =  typename __base_type::const_iterator;
-
-      using __unique_keys = typename __base_type::__unique_keys;
-      using __hashtable = typename __base_type::__hashtable;
-      using __node_gen_type = typename __base_type::__node_gen_type;
-
-      using __base_type::insert;
 
-      std::pair<iterator, bool>
-      insert(value_type&& __v)
-      {
-	__hashtable& __h = this->_M_conjure_hashtable();
-	__node_gen_type __node_gen(__h);
-	return __h._M_insert(std::move(__v), __node_gen, __unique_keys());
-      }
-
-      iterator
-      insert(const_iterator __hint, value_type&& __v)
-      {
-	__hashtable& __h = this->_M_conjure_hashtable();
-	__node_gen_type __node_gen(__h);
-	return __h._M_insert(__hint, std::move(__v), __node_gen,
-			     __unique_keys());
-      }
-    };
+      using __hashtable_base = _Hashtable_base<_Key, _Value, _ExtractKey,
+					       _Equal, _H1, _H2, _Hash,
+					       _Traits>;
 
-  /// Specialization.
-  template<typename _Key, typename _Value, typename _Alloc,
-	   typename _ExtractKey, typename _Equal,
-	   typename _H1, typename _H2, typename _Hash,
-	   typename _RehashPolicy, typename _Traits>
-    struct _Insert<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash,
-		   _RehashPolicy, _Traits, true, false>
-    : public _Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal,
-			   _H1, _H2, _Hash, _RehashPolicy, _Traits>
-    {
-      using __base_type = _Insert_base<_Key, _Value, _Alloc, _ExtractKey,
-					_Equal, _H1, _H2, _Hash,
-					_RehashPolicy, _Traits>;
       using value_type = typename __base_type::value_type;
       using iterator = typename __base_type::iterator;
       using const_iterator =  typename __base_type::const_iterator;
 
       using __unique_keys = typename __base_type::__unique_keys;
+      using __ireturn_type = typename __hashtable_base::__ireturn_type;
       using __hashtable = typename __base_type::__hashtable;
       using __node_gen_type = typename __base_type::__node_gen_type;
 
       using __base_type::insert;
 
-      iterator
+      __ireturn_type
       insert(value_type&& __v)
       {
 	__hashtable& __h = this->_M_conjure_hashtable();
@@ -865,9 +894,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template<typename _Key, typename _Value, typename _Alloc,
 	   typename _ExtractKey, typename _Equal,
 	   typename _H1, typename _H2, typename _Hash,
-	   typename _RehashPolicy, typename _Traits, bool _Unique_keys>
+	   typename _RehashPolicy, typename _Traits>
     struct _Insert<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash,
-		   _RehashPolicy, _Traits, false, _Unique_keys>
+		   _RehashPolicy, _Traits, false>
     : public _Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal,
 			   _H1, _H2, _Hash, _RehashPolicy, _Traits>
     {
@@ -911,28 +940,46 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	}
    };
 
+  template<typename _Policy>
+    using __has_load_factor = typename _Policy::__has_load_factor;
+
   /**
    *  Primary class template  _Rehash_base.
    *
    *  Give hashtable the max_load_factor functions and reserve iff the
-   *  rehash policy is _Prime_rehash_policy.
+   *  rehash policy supports it.
   */
   template<typename _Key, typename _Value, typename _Alloc,
 	   typename _ExtractKey, typename _Equal,
 	   typename _H1, typename _H2, typename _Hash,
-	   typename _RehashPolicy, typename _Traits>
+	   typename _RehashPolicy, typename _Traits,
+	   typename =
+	     __detected_or_t<std::false_type, __has_load_factor, _RehashPolicy>>
     struct _Rehash_base;
 
-  /// Specialization.
+  /// Specialization when rehash policy doesn't provide load factor management.
   template<typename _Key, typename _Value, typename _Alloc,
 	   typename _ExtractKey, typename _Equal,
-	   typename _H1, typename _H2, typename _Hash, typename _Traits>
+	   typename _H1, typename _H2, typename _Hash,
+	   typename _RehashPolicy, typename _Traits>
+    struct _Rehash_base<_Key, _Value, _Alloc, _ExtractKey, _Equal,
+		      _H1, _H2, _Hash, _RehashPolicy, _Traits,
+		      std::false_type>
+    {
+    };
+
+  /// Specialization when rehash policy provide load factor management.
+  template<typename _Key, typename _Value, typename _Alloc,
+	   typename _ExtractKey, typename _Equal,
+	   typename _H1, typename _H2, typename _Hash,
+	   typename _RehashPolicy, typename _Traits>
     struct _Rehash_base<_Key, _Value, _Alloc, _ExtractKey, _Equal,
-			_H1, _H2, _Hash, _Prime_rehash_policy, _Traits>
+			_H1, _H2, _Hash, _RehashPolicy, _Traits,
+			std::true_type>
     {
       using __hashtable = _Hashtable<_Key, _Value, _Alloc, _ExtractKey,
 				     _Equal, _H1, _H2, _Hash,
-				     _Prime_rehash_policy, _Traits>;
+				     _RehashPolicy, _Traits>;
 
       float
       max_load_factor() const noexcept
@@ -945,7 +992,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       max_load_factor(float __z)
       {
 	__hashtable* __this = static_cast<__hashtable*>(this);
-	__this->__rehash_policy(_Prime_rehash_policy(__z));
+	__this->__rehash_policy(_RehashPolicy(__z));
       }
 
       void
diff --git a/libstdc++-v3/src/c++11/hashtable_c++0x.cc b/libstdc++-v3/src/c++11/hashtable_c++0x.cc
index 69f999f..7b2fd76 100644
--- a/libstdc++-v3/src/c++11/hashtable_c++0x.cc
+++ b/libstdc++-v3/src/c++11/hashtable_c++0x.cc
@@ -32,6 +32,81 @@
 #include <ext/alloc_traits.h>
 #include <bits/hashtable_policy.h>
 
+namespace
+{
+  const unsigned long __power2_list[] = // 32 or 64
+  {
+    1ul << 0,
+    1ul << 1,
+    1ul << 2,
+    1ul << 3,
+    1ul << 4,
+    1ul << 5,
+    1ul << 6,
+    1ul << 7,
+    1ul << 8,
+    1ul << 9,
+    1ul << 10,
+    1ul << 11,
+    1ul << 12,
+    1ul << 13,
+    1ul << 14,
+    1ul << 15,
+    1ul << 16,
+    1ul << 17,
+    1ul << 18,
+    1ul << 19,
+    1ul << 20,
+    1ul << 21,
+    1ul << 22,
+    1ul << 23,
+    1ul << 24,
+    1ul << 25,
+    1ul << 26,
+    1ul << 27,
+    1ul << 28,
+    1ul << 29,
+    1ul << 30,
+    1ul << 31,
+#if __SIZEOF_LONG__ != 8
+    1ul << 32
+#else
+    1ul << 32,
+    1ul << 33,
+    1ul << 34,
+    1ul << 35,
+    1ul << 36,
+    1ul << 37,
+    1ul << 38,
+    1ul << 39,
+    1ul << 40,
+    1ul << 41,
+    1ul << 42,
+    1ul << 43,
+    1ul << 44,
+    1ul << 45,
+    1ul << 46,
+    1ul << 47,
+    1ul << 48,
+    1ul << 49,
+    1ul << 50,
+    1ul << 51,
+    1ul << 52,
+    1ul << 53,
+    1ul << 54,
+    1ul << 55,
+    1ul << 56,
+    1ul << 57,
+    1ul << 58,
+    1ul << 59,
+    1ul << 60,
+    1ul << 61,
+    1ul << 62,
+    1ul << 63
+#endif
+  };
+}
+
 namespace std _GLIBCXX_VISIBILITY(default)
 {
 #include "../shared/hashtable-aux.cc"
@@ -96,6 +171,62 @@ namespace __detail
       return std::make_pair(false, 0);
   }
 
+  // Return a prime no smaller than n.
+  std::size_t
+  _Power2_rehash_policy::_M_next_bkt(std::size_t __n) const
+  {
+    // Optimize lookups involving the first elements of __prime_list.
+    // (useful to speed-up, eg, constructors)
+    static const unsigned char __fast_bkt[12]
+      = { 2, 2, 2, 4, 8, 8, 8, 8, 16, 16, 16, 16 };
+
+    if (__n <= 11)
+      {
+	_M_next_resize =
+	  __builtin_ceil(__fast_bkt[__n] * (long double)_M_max_load_factor);
+	return __fast_bkt[__n];
+      }
+
+    constexpr auto __n_power2
+      = sizeof(__power2_list) / sizeof(unsigned long) - 1;
+    const unsigned long* __next_bkt =
+      std::lower_bound(__power2_list + 4, __power2_list + __n_power2, __n);
+    _M_next_resize =
+      __builtin_ceil(*__next_bkt * (long double)_M_max_load_factor);
+    return *__next_bkt;
+  }
+
+  // Finds the smallest prime p such that alpha p > __n_elt + __n_ins.
+  // If p > __n_bkt, return make_pair(true, p); otherwise return
+  // make_pair(false, 0).  In principle this isn't very different from
+  // _M_bkt_for_elements.
+
+  // The only tricky part is that we're caching the element count at
+  // which we need to rehash, so we don't have to do a floating-point
+  // multiply for every insertion.
+
+  std::pair<bool, std::size_t>
+  _Power2_rehash_policy::
+  _M_need_rehash(std::size_t __n_bkt, std::size_t __n_elt,
+		 std::size_t __n_ins) const
+  {
+    if (__n_elt + __n_ins >= _M_next_resize)
+      {
+	long double __min_bkts = (__n_elt + __n_ins)
+				   / (long double)_M_max_load_factor;
+	if (__min_bkts >= __n_bkt)
+	  return std::make_pair(true,
+	    _M_next_bkt(std::max<std::size_t>(__builtin_floor(__min_bkts) + 1,
+					      __n_bkt * _S_growth_factor)));
+
+	_M_next_resize
+	  = __builtin_floor(__n_bkt * (long double)_M_max_load_factor);
+	return std::make_pair(false, 0);
+      }
+    else
+      return std::make_pair(false, 0);
+  }
+
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace __detail
 } // namespace std
diff --git a/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc b/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc
index ef956a0..a07d552 100644
--- a/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc
+++ b/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc
@@ -127,7 +127,27 @@ template<bool cache>
   using __umset = std::__umset_hashtable<Foo, HashFunction,
 					 std::equal_to<Foo>,
 					 std::allocator<Foo>,
-					 std::__uset_traits<cache>>;
+					 std::__umset_traits<cache>>;
+
+template<bool cache>
+  using __uset2 =
+	      std::_Hashtable<Foo, Foo, std::allocator<Foo>,
+			      std::__detail::_Identity,
+			      std::equal_to<Foo>, HashFunction,
+			      std::__detail::_Mask_range_hashing,
+			      std::__detail::_Default_ranged_hash,
+			      std::__detail::_Power2_rehash_policy,
+			      std::__uset_traits<cache>>;
+
+template<bool cache>
+  using __umset2 =
+	      std::_Hashtable<Foo, Foo, std::allocator<Foo>,
+			      std::__detail::_Identity,
+			      std::equal_to<Foo>, HashFunction,
+			      std::__detail::_Mask_range_hashing,
+			      std::__detail::_Default_ranged_hash,
+			      std::__detail::_Power2_rehash_policy,
+			      std::__umset_traits<cache>>;
 
 int main()
 {
@@ -181,6 +201,19 @@ int main()
   stop_counters(time, resource);
   report_performance(__FILE__, "std benches", time, resource);
 
+  start_counters(time, resource);
+  bench<__uset2<false>>(
+	"std::unordered_set2 without hash code cached ", foos);
+  bench<__uset2<true>>(
+	"std::unordered_set2 with hash code cached ", foos);
+  bench<__umset2<false>>(
+	"std::unordered_multiset2 without hash code cached ", foos);
+  bench<__umset2<true>>(
+	"std::unordered_multiset2 with hash code cached ", foos);
+
+  stop_counters(time, resource);
+  report_performance(__FILE__, "std2 benches", time, resource);
+
   bench<std::unordered_set<Foo, HashFunction>>(
 	"std::unordered_set default cache ", foos);
   bench<std::unordered_multiset<Foo, HashFunction>>(
diff --git a/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc b/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc
index 9846104..4b29fde 100644
--- a/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc
+++ b/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc
@@ -177,6 +177,16 @@ template<bool cache>
 					cache>;
 
 template<bool cache>
+  using __uset2 =
+	      std::_Hashtable<int, int, std::allocator<int>,
+			      std::__detail::_Identity,
+			      std::equal_to<int>, std::hash<int>,
+			      std::__detail::_Mask_range_hashing,
+			      std::__detail::_Default_ranged_hash,
+			      std::__detail::_Power2_rehash_policy,
+			      std::__uset_traits<cache>>;
+
+template<bool cache>
   using __str_uset = 
 	      std::__uset_hashtable<std::string, std::hash<std::string>,
 				    std::equal_to<std::string>,
@@ -190,6 +200,16 @@ template<bool cache>
 					std::allocator<std::string>,
 					cache>;
 
+template<bool cache>
+  using __str_uset2 =
+	      std::_Hashtable<std::string, std::string, std::allocator<std::string>,
+			      std::__detail::_Identity,
+			      std::equal_to<std::string>, std::hash<std::string>,
+			      std::__detail::_Mask_range_hashing,
+			      std::__detail::_Default_ranged_hash,
+			      std::__detail::_Power2_rehash_policy,
+			      std::__uset_traits<cache>>;
+
 int main()
 {
   bench<__tr1_uset<false>>(
@@ -202,6 +222,10 @@ int main()
 	"std::unordered_set<int> with hash code cached");
   bench<std::unordered_set<int>>(
 	"std::unordered_set<int> default cache");
+  bench<__uset2<false>>(
+	"std::unordered_set2<int> without hash code cached");
+  bench<__uset2<true>>(
+	"std::unordered_set2<int> with hash code cached");
   bench_str<__tr1_str_uset<false>>(
 	"std::tr1::unordered_set<string> without hash code cached");
   bench_str<__tr1_str_uset<true>>(
@@ -210,7 +234,11 @@ int main()
 	"std::unordered_set<string> without hash code cached");
   bench_str<__str_uset<true>>(
 	"std::unordered_set<string> with hash code cached");
-    bench_str<std::unordered_set<std::string>>(
+  bench_str<std::unordered_set<std::string>>(
 	"std::unordered_set<string> default cache");
+  bench_str<__str_uset2<false>>(
+	"std::unordered_set2<string> without hash code cached");
+  bench_str<__str_uset2<true>>(
+	"std::unordered_set2<string> with hash code cached");
   return 0;
 }

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: New power of 2 hash policy
  2015-09-10 20:11 New power of 2 hash policy François Dumont
@ 2015-09-11 13:18 ` Michael Matz
  2015-09-11 13:19   ` Jonathan Wakely
  0 siblings, 1 reply; 9+ messages in thread
From: Michael Matz @ 2015-09-11 13:18 UTC (permalink / raw)
  To: François Dumont; +Cc: libstdc++, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1042 bytes --]

Hi,

On Thu, 10 Sep 2015, François Dumont wrote:

>     Here is a patch to offer an alternative hash policy. This one is 
> using power of 2 number of buckets allowing a faster modulo operation. 
> This is obvious when running the performance test that I have adapted to 
> use this alternative policy. Something between current implementation 
> and the tr1 one, the old std one.
> 
>     Of course with this hash policy the lower bits of the hash code are 
> more important. For pointers it would require to change the std::hash 
> implementation to remove the lower 0 bits like in the patch I proposed 
> some weeks ago.
> 
>     What do you think ?

No comment on if it should be included (except that it seems useful to 
me), but one observation of the patch:

> +    1ul << 31,
> +#if __SIZEOF_LONG__ != 8
> +    1ul << 32
> +#else

This is wrong, 1ul<<32 is zero on a 32bit machine, and is also the 33rd 
entry in that table, when you want only 32.  Like you also (correctly) 
stop with 1ul<<63 for a 64bit machine.


Ciao,
Michael.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: New power of 2 hash policy
  2015-09-11 13:18 ` Michael Matz
@ 2015-09-11 13:19   ` Jonathan Wakely
  2015-09-11 13:28     ` Jonathan Wakely
  0 siblings, 1 reply; 9+ messages in thread
From: Jonathan Wakely @ 2015-09-11 13:19 UTC (permalink / raw)
  To: Michael Matz; +Cc: François Dumont, libstdc++, gcc-patches

On 11/09/15 15:11 +0200, Michael Matz wrote:
>Hi,
>
>On Thu, 10 Sep 2015, François Dumont wrote:
>
>>     Here is a patch to offer an alternative hash policy. This one is
>> using power of 2 number of buckets allowing a faster modulo operation.
>> This is obvious when running the performance test that I have adapted to
>> use this alternative policy. Something between current implementation
>> and the tr1 one, the old std one.
>>
>>     Of course with this hash policy the lower bits of the hash code are
>> more important. For pointers it would require to change the std::hash
>> implementation to remove the lower 0 bits like in the patch I proposed
>> some weeks ago.
>>
>>     What do you think ?
>
>No comment on if it should be included (except that it seems useful to
>me), but one observation of the patch:
>
>> +    1ul << 31,
>> +#if __SIZEOF_LONG__ != 8
>> +    1ul << 32
>> +#else
>
>This is wrong, 1ul<<32 is zero on a 32bit machine, and is also the 33rd
>entry in that table, when you want only 32.  Like you also (correctly)
>stop with 1ul<<63 for a 64bit machine.

I'd prefer to see that table disappear completely, replaced by a
constexpr function. We need a static table of prime numbers because
they can't be computed instantly, but we don't need to store powers of
two in the library.

I agree the extension is useful, and would like to see it included,
but I wonder if we can do it without adding any new symbols to the
shared library. We certainly don't need the table, and the few other
functions added to the DSO could probably be defined inline in
headers.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: New power of 2 hash policy
  2015-09-11 13:19   ` Jonathan Wakely
@ 2015-09-11 13:28     ` Jonathan Wakely
  2015-09-13 20:28       ` François Dumont
  2015-09-24 20:58       ` François Dumont
  0 siblings, 2 replies; 9+ messages in thread
From: Jonathan Wakely @ 2015-09-11 13:28 UTC (permalink / raw)
  To: Michael Matz; +Cc: François Dumont, libstdc++, gcc-patches

On 11/09/15 14:18 +0100, Jonathan Wakely wrote:
>On 11/09/15 15:11 +0200, Michael Matz wrote:
>>Hi,
>>
>>On Thu, 10 Sep 2015, François Dumont wrote:
>>
>>>    Here is a patch to offer an alternative hash policy. This one is
>>>using power of 2 number of buckets allowing a faster modulo operation.
>>>This is obvious when running the performance test that I have adapted to
>>>use this alternative policy. Something between current implementation
>>>and the tr1 one, the old std one.
>>>
>>>    Of course with this hash policy the lower bits of the hash code are
>>>more important. For pointers it would require to change the std::hash
>>>implementation to remove the lower 0 bits like in the patch I proposed
>>>some weeks ago.
>>>
>>>    What do you think ?
>>
>>No comment on if it should be included (except that it seems useful to
>>me), but one observation of the patch:
>>
>>>+    1ul << 31,
>>>+#if __SIZEOF_LONG__ != 8
>>>+    1ul << 32
>>>+#else
>>
>>This is wrong, 1ul<<32 is zero on a 32bit machine, and is also the 33rd
>>entry in that table, when you want only 32.  Like you also (correctly)
>>stop with 1ul<<63 for a 64bit machine.
>
>I'd prefer to see that table disappear completely, replaced by a
>constexpr function. We need a static table of prime numbers because
>they can't be computed instantly, but we don't need to store powers of
>two in the library.
>
>I agree the extension is useful, and would like to see it included,
>but I wonder if we can do it without adding any new symbols to the
>shared library. We certainly don't need the table, and the few other
>functions added to the DSO could probably be defined inline in
>headers.


Also there several comments that talk about finding "the next prime"
which should talk about powers of two, and the smaller table for fast
lookup of the next "prime" may not be needed for powers of two. There
are fast tricks for finding the next power of two using bitwise
operations.

So I'm in favour of the change in general, but it needs a little bit
of reworking where the prime number code has been copy&pasted.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: New power of 2 hash policy
  2015-09-11 13:28     ` Jonathan Wakely
@ 2015-09-13 20:28       ` François Dumont
  2015-09-24 20:58       ` François Dumont
  1 sibling, 0 replies; 9+ messages in thread
From: François Dumont @ 2015-09-13 20:28 UTC (permalink / raw)
  To: Jonathan Wakely, Michael Matz; +Cc: libstdc++, gcc-patches

On 11/09/2015 15:23, Jonathan Wakely wrote:
> On 11/09/15 14:18 +0100, Jonathan Wakely wrote:
>> On 11/09/15 15:11 +0200, Michael Matz wrote:
>>> Hi,
>>>
>>> On Thu, 10 Sep 2015, François Dumont wrote:
>>>
>>>>    Here is a patch to offer an alternative hash policy. This one is
>>>> using power of 2 number of buckets allowing a faster modulo operation.
>>>> This is obvious when running the performance test that I have
>>>> adapted to
>>>> use this alternative policy. Something between current implementation
>>>> and the tr1 one, the old std one.
>>>>
>>>>    Of course with this hash policy the lower bits of the hash code are
>>>> more important. For pointers it would require to change the std::hash
>>>> implementation to remove the lower 0 bits like in the patch I proposed
>>>> some weeks ago.
>>>>
>>>>    What do you think ?
>>>
>>> No comment on if it should be included (except that it seems useful to
>>> me), but one observation of the patch:
>>>
>>>> +    1ul << 31,
>>>> +#if __SIZEOF_LONG__ != 8
>>>> +    1ul << 32
>>>> +#else
>>>
>>> This is wrong, 1ul<<32 is zero on a 32bit machine, and is also the 33rd
>>> entry in that table, when you want only 32.  Like you also (correctly)
>>> stop with 1ul<<63 for a 64bit machine.
>>
>> I'd prefer to see that table disappear completely, replaced by a
>> constexpr function. We need a static table of prime numbers because
>> they can't be computed instantly, but we don't need to store powers of
>> two in the library.
>>
>> I agree the extension is useful, and would like to see it included,
>> but I wonder if we can do it without adding any new symbols to the
>> shared library. We certainly don't need the table, and the few other
>> functions added to the DSO could probably be defined inline in
>> headers.
>
>
> Also there several comments that talk about finding "the next prime"
> which should talk about powers of two, and the smaller table for fast
> lookup of the next "prime" may not be needed for powers of two. There
> are fast tricks for finding the next power of two using bitwise
> operations.
>
> So I'm in favour of the change in general, but it needs a little bit
> of reworking where the prime number code has been copy&pasted.
>
Ok, thanks for the feedback. We can indeed implement it in a simpler
way, should be fine.

François

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: New power of 2 hash policy
  2015-09-11 13:28     ` Jonathan Wakely
  2015-09-13 20:28       ` François Dumont
@ 2015-09-24 20:58       ` François Dumont
  2015-09-25 13:58         ` Jonathan Wakely
  1 sibling, 1 reply; 9+ messages in thread
From: François Dumont @ 2015-09-24 20:58 UTC (permalink / raw)
  To: Jonathan Wakely, Michael Matz; +Cc: libstdc++, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 2699 bytes --]

On 11/09/2015 15:23, Jonathan Wakely wrote:
> On 11/09/15 14:18 +0100, Jonathan Wakely wrote:
>> On 11/09/15 15:11 +0200, Michael Matz wrote:
>>> Hi,
>>>
>>> On Thu, 10 Sep 2015, François Dumont wrote:
>>>
>>>>    Here is a patch to offer an alternative hash policy. This one is
>>>> using power of 2 number of buckets allowing a faster modulo operation.
>>>> This is obvious when running the performance test that I have
>>>> adapted to
>>>> use this alternative policy. Something between current implementation
>>>> and the tr1 one, the old std one.
>>>>
>>>>    Of course with this hash policy the lower bits of the hash code are
>>>> more important. For pointers it would require to change the std::hash
>>>> implementation to remove the lower 0 bits like in the patch I proposed
>>>> some weeks ago.
>>>>
>>>>    What do you think ?
>>>
>>> No comment on if it should be included (except that it seems useful to
>>> me), but one observation of the patch:
>>>
>>>> +    1ul << 31,
>>>> +#if __SIZEOF_LONG__ != 8
>>>> +    1ul << 32
>>>> +#else
>>>
>>> This is wrong, 1ul<<32 is zero on a 32bit machine, and is also the 33rd
>>> entry in that table, when you want only 32.  Like you also (correctly)
>>> stop with 1ul<<63 for a 64bit machine.
>>
>> I'd prefer to see that table disappear completely, replaced by a
>> constexpr function. We need a static table of prime numbers because
>> they can't be computed instantly, but we don't need to store powers of
>> two in the library.
>>
>> I agree the extension is useful, and would like to see it included,
>> but I wonder if we can do it without adding any new symbols to the
>> shared library. We certainly don't need the table, and the few other
>> functions added to the DSO could probably be defined inline in
>> headers.
>
>
> Also there several comments that talk about finding "the next prime"
> which should talk about powers of two, and the smaller table for fast
> lookup of the next "prime" may not be needed for powers of two. There
> are fast tricks for finding the next power of two using bitwise
> operations.
>
> So I'm in favour of the change in general, but it needs a little bit
> of reworking where the prime number code has been copy&pasted.
>
Hi

    Here is the new patch then.

    Working on it I realised that despite the comment on _M_next_bkt
saying "no smaller than n" the method can return a value smaller for big
n values. This is not likely to happen but I prefer to take care of it.
I just make sure we won't try to rehash again even if the drawback is
that we won't respect max_load_factor anymore at those levels. But as I
said we will surely have a memory issue before that.

    Ok to commit ?

François




[-- Attachment #2: hashtable.patch --]
[-- Type: text/x-patch, Size: 17736 bytes --]

diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h
index a9ad7dd..4e1bc29 100644
--- a/libstdc++-v3/include/bits/hashtable_policy.h
+++ b/libstdc++-v3/include/bits/hashtable_policy.h
@@ -457,6 +457,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   /// smallest prime that keeps the load factor small enough.
   struct _Prime_rehash_policy
   {
+    using __has_load_factor = std::true_type;
+
     _Prime_rehash_policy(float __z = 1.0) noexcept
     : _M_max_load_factor(__z), _M_next_resize(0) { }
 
@@ -501,6 +503,129 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     mutable std::size_t	_M_next_resize;
   };
 
+  /// Range hashing function considering that second args is a power of 2.
+  struct _Mask_range_hashing
+  {
+    typedef std::size_t first_argument_type;
+    typedef std::size_t second_argument_type;
+    typedef std::size_t result_type;
+
+    result_type
+    operator()(first_argument_type __num,
+	       second_argument_type __den) const noexcept
+    { return __num & (__den - 1); }
+  };
+
+
+  /// Helper type to compute next power of 2.
+  template<std::size_t _N>
+    struct _NextPower2
+    {
+      static std::size_t
+      _Get(std::size_t __n)
+      {
+	std::size_t __next = _NextPower2<(_N >> 1)>::_Get(__n);
+	return __next |= __next >> _N;
+      }
+    };
+
+  template<>
+    struct _NextPower2<1>
+    {
+      static std::size_t
+      _Get(std::size_t __n)
+      { return __n |= __n >> 1; }
+    };
+
+  /// Rehash policy providing power of 2 bucket numbers. Ease modulo
+  /// operations.
+  struct _Power2_rehash_policy
+  {
+    using __has_load_factor = std::true_type;
+
+    _Power2_rehash_policy(float __z = 1.0) noexcept
+    : _M_max_load_factor(__z), _M_next_resize(0) { }
+
+    float
+    max_load_factor() const noexcept
+    { return _M_max_load_factor; }
+
+    // Return a bucket size no smaller than n (as long as n is not above the
+    // highest power of 2).
+    std::size_t
+    _M_next_bkt(std::size_t __n) const
+    {
+      constexpr auto __max_bkt
+	= (std::size_t(1) << (sizeof(std::size_t) * 8 - 1));
+
+      std::size_t __res
+	= _NextPower2<((sizeof(std::size_t) * 8) >> 1)>::_Get(--__n) + 1;
+
+      if (__res == 0)
+	__res = __max_bkt;
+
+      if (__res == __max_bkt)
+	// Set next resize to the max value so that we never try to rehash again
+	// as we already reach the biggest possible bucket number.
+	// Note that it might result in max_load_factor not being respected.
+	_M_next_resize = std::size_t(0) - 1;
+      else
+	_M_next_resize
+	  = __builtin_floor(__res * (long double)_M_max_load_factor);
+
+      return __res;
+    }
+
+    // Return a bucket count appropriate for n elements
+    std::size_t
+    _M_bkt_for_elements(std::size_t __n) const
+    { return __builtin_ceil(__n / (long double)_M_max_load_factor); }
+
+    // __n_bkt is current bucket count, __n_elt is current element count,
+    // and __n_ins is number of elements to be inserted.  Do we need to
+    // increase bucket count?  If so, return make_pair(true, n), where n
+    // is the new bucket count.  If not, return make_pair(false, 0).
+    std::pair<bool, std::size_t>
+    _M_need_rehash(std::size_t __n_bkt, std::size_t __n_elt,
+		   std::size_t __n_ins) const
+    {
+      if (__n_elt + __n_ins >= _M_next_resize)
+	{
+	  long double __min_bkts = (__n_elt + __n_ins)
+					/ (long double)_M_max_load_factor;
+	  if (__min_bkts >= __n_bkt)
+	    return std::make_pair(true,
+	      _M_next_bkt(std::max<std::size_t>(__builtin_floor(__min_bkts) + 1,
+						__n_bkt * _S_growth_factor)));
+
+	  _M_next_resize
+	    = __builtin_floor(__n_bkt * (long double)_M_max_load_factor);
+	  return std::make_pair(false, 0);
+	}
+      else
+	return std::make_pair(false, 0);
+    }
+
+    typedef std::size_t _State;
+
+    _State
+    _M_state() const
+    { return _M_next_resize; }
+
+    void
+    _M_reset() noexcept
+    { _M_next_resize = 0; }
+
+    void
+    _M_reset(_State __state)
+    { _M_next_resize = __state; }
+
+    static const std::size_t _S_growth_factor = 2;
+
+    float		_M_max_load_factor;
+    mutable std::size_t	_M_next_resize;
+  };
+
   // Base classes for std::_Hashtable.  We define these base classes
   // because in some cases we want to do different things depending on
   // the value of a policy class.  In some cases the policy class
@@ -775,8 +900,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	   typename _ExtractKey, typename _Equal,
 	   typename _H1, typename _H2, typename _Hash,
 	   typename _RehashPolicy, typename _Traits,
-	   bool _Constant_iterators = _Traits::__constant_iterators::value,
-	   bool _Unique_keys = _Traits::__unique_keys::value>
+	   bool _Constant_iterators = _Traits::__constant_iterators::value>
     struct _Insert;
 
   /// Specialization.
@@ -785,65 +909,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	   typename _H1, typename _H2, typename _Hash,
 	   typename _RehashPolicy, typename _Traits>
     struct _Insert<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash,
-		   _RehashPolicy, _Traits, true, true>
+		   _RehashPolicy, _Traits, true>
     : public _Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal,
 			   _H1, _H2, _Hash, _RehashPolicy, _Traits>
     {
       using __base_type = _Insert_base<_Key, _Value, _Alloc, _ExtractKey,
 					_Equal, _H1, _H2, _Hash,
 					_RehashPolicy, _Traits>;
-      using value_type = typename __base_type::value_type;
-      using iterator = typename __base_type::iterator;
-      using const_iterator =  typename __base_type::const_iterator;
 
-      using __unique_keys = typename __base_type::__unique_keys;
-      using __hashtable = typename __base_type::__hashtable;
-      using __node_gen_type = typename __base_type::__node_gen_type;
-
-      using __base_type::insert;
-
-      std::pair<iterator, bool>
-      insert(value_type&& __v)
-      {
-	__hashtable& __h = this->_M_conjure_hashtable();
-	__node_gen_type __node_gen(__h);
-	return __h._M_insert(std::move(__v), __node_gen, __unique_keys());
-      }
-
-      iterator
-      insert(const_iterator __hint, value_type&& __v)
-      {
-	__hashtable& __h = this->_M_conjure_hashtable();
-	__node_gen_type __node_gen(__h);
-	return __h._M_insert(__hint, std::move(__v), __node_gen,
-			     __unique_keys());
-      }
-    };
+      using __hashtable_base = _Hashtable_base<_Key, _Value, _ExtractKey,
+					       _Equal, _H1, _H2, _Hash,
+					       _Traits>;
 
-  /// Specialization.
-  template<typename _Key, typename _Value, typename _Alloc,
-	   typename _ExtractKey, typename _Equal,
-	   typename _H1, typename _H2, typename _Hash,
-	   typename _RehashPolicy, typename _Traits>
-    struct _Insert<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash,
-		   _RehashPolicy, _Traits, true, false>
-    : public _Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal,
-			   _H1, _H2, _Hash, _RehashPolicy, _Traits>
-    {
-      using __base_type = _Insert_base<_Key, _Value, _Alloc, _ExtractKey,
-					_Equal, _H1, _H2, _Hash,
-					_RehashPolicy, _Traits>;
       using value_type = typename __base_type::value_type;
       using iterator = typename __base_type::iterator;
       using const_iterator =  typename __base_type::const_iterator;
 
       using __unique_keys = typename __base_type::__unique_keys;
+      using __ireturn_type = typename __hashtable_base::__ireturn_type;
       using __hashtable = typename __base_type::__hashtable;
       using __node_gen_type = typename __base_type::__node_gen_type;
 
       using __base_type::insert;
 
-      iterator
+      __ireturn_type
       insert(value_type&& __v)
       {
 	__hashtable& __h = this->_M_conjure_hashtable();
@@ -865,9 +954,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template<typename _Key, typename _Value, typename _Alloc,
 	   typename _ExtractKey, typename _Equal,
 	   typename _H1, typename _H2, typename _Hash,
-	   typename _RehashPolicy, typename _Traits, bool _Unique_keys>
+	   typename _RehashPolicy, typename _Traits>
     struct _Insert<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash,
-		   _RehashPolicy, _Traits, false, _Unique_keys>
+		   _RehashPolicy, _Traits, false>
     : public _Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal,
 			   _H1, _H2, _Hash, _RehashPolicy, _Traits>
     {
@@ -911,28 +1000,46 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	}
    };
 
+  template<typename _Policy>
+    using __has_load_factor = typename _Policy::__has_load_factor;
+
   /**
    *  Primary class template  _Rehash_base.
    *
    *  Give hashtable the max_load_factor functions and reserve iff the
-   *  rehash policy is _Prime_rehash_policy.
+   *  rehash policy supports it.
   */
   template<typename _Key, typename _Value, typename _Alloc,
 	   typename _ExtractKey, typename _Equal,
 	   typename _H1, typename _H2, typename _Hash,
-	   typename _RehashPolicy, typename _Traits>
+	   typename _RehashPolicy, typename _Traits,
+	   typename =
+	     __detected_or_t<std::false_type, __has_load_factor, _RehashPolicy>>
     struct _Rehash_base;
 
-  /// Specialization.
+  /// Specialization when rehash policy doesn't provide load factor management.
   template<typename _Key, typename _Value, typename _Alloc,
 	   typename _ExtractKey, typename _Equal,
-	   typename _H1, typename _H2, typename _Hash, typename _Traits>
+	   typename _H1, typename _H2, typename _Hash,
+	   typename _RehashPolicy, typename _Traits>
+    struct _Rehash_base<_Key, _Value, _Alloc, _ExtractKey, _Equal,
+		      _H1, _H2, _Hash, _RehashPolicy, _Traits,
+		      std::false_type>
+    {
+    };
+
+  /// Specialization when rehash policy provide load factor management.
+  template<typename _Key, typename _Value, typename _Alloc,
+	   typename _ExtractKey, typename _Equal,
+	   typename _H1, typename _H2, typename _Hash,
+	   typename _RehashPolicy, typename _Traits>
     struct _Rehash_base<_Key, _Value, _Alloc, _ExtractKey, _Equal,
-			_H1, _H2, _Hash, _Prime_rehash_policy, _Traits>
+			_H1, _H2, _Hash, _RehashPolicy, _Traits,
+			std::true_type>
     {
       using __hashtable = _Hashtable<_Key, _Value, _Alloc, _ExtractKey,
 				     _Equal, _H1, _H2, _Hash,
-				     _Prime_rehash_policy, _Traits>;
+				     _RehashPolicy, _Traits>;
 
       float
       max_load_factor() const noexcept
@@ -945,7 +1052,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       max_load_factor(float __z)
       {
 	__hashtable* __this = static_cast<__hashtable*>(this);
-	__this->__rehash_policy(_Prime_rehash_policy(__z));
+	__this->__rehash_policy(_RehashPolicy(__z));
       }
 
       void
diff --git a/libstdc++-v3/src/c++11/hashtable_c++0x.cc b/libstdc++-v3/src/c++11/hashtable_c++0x.cc
index 69f999f..dd2afe3 100644
--- a/libstdc++-v3/src/c++11/hashtable_c++0x.cc
+++ b/libstdc++-v3/src/c++11/hashtable_c++0x.cc
@@ -60,8 +60,16 @@ namespace __detail
       = sizeof(__prime_list) / sizeof(unsigned long) - 1;
     const unsigned long* __next_bkt =
       std::lower_bound(__prime_list + 5, __prime_list + __n_primes, __n);
-    _M_next_resize =
-      __builtin_ceil(*__next_bkt * (long double)_M_max_load_factor);
+
+    if (__next_bkt == __prime_list + __n_primes)
+      // Set next resize to the max value so that we never try to rehash again
+      // as we already reach the biggest possible bucket number.
+      // Note that it might result in max_load_factor not being respected.
+      _M_next_resize = std::size_t(0) - 1;
+    else
+      _M_next_resize =
+	__builtin_ceil(*__next_bkt * (long double)_M_max_load_factor);
+
     return *__next_bkt;
   }
 
diff --git a/libstdc++-v3/testsuite/23_containers/unordered_set/hash_policy/power2_rehash.cc b/libstdc++-v3/testsuite/23_containers/unordered_set/hash_policy/power2_rehash.cc
new file mode 100644
index 0000000..502794b
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/unordered_set/hash_policy/power2_rehash.cc
@@ -0,0 +1,42 @@
+// Copyright (C) 2015 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+//
+// { dg-options "-std=gnu++11" }
+
+#include <unordered_set>
+
+#include <testsuite_hooks.h>
+
+void test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  std::__detail::_Power2_rehash_policy policy;
+  VERIFY( policy._M_next_bkt(1) == 1 );
+  VERIFY( policy._M_next_bkt(2) == 2 );
+  VERIFY( policy._M_next_bkt(3) == 4 );
+  VERIFY( policy._M_next_bkt(5) == 8 );
+  VERIFY( policy._M_next_bkt(33) == 64 );
+  VERIFY( policy._M_next_bkt((std::size_t(1) << (sizeof(std::size_t) * 8 - 2)) + 1)
+	  == (std::size_t(1) << (sizeof(std::size_t) * 8 - 1)) );
+}
+
+int main()
+{
+  test01();
+  return 0;
+}
diff --git a/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc b/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc
index ef956a0..a07d552 100644
--- a/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc
+++ b/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc
@@ -127,7 +127,27 @@ template<bool cache>
   using __umset = std::__umset_hashtable<Foo, HashFunction,
 					 std::equal_to<Foo>,
 					 std::allocator<Foo>,
-					 std::__uset_traits<cache>>;
+					 std::__umset_traits<cache>>;
+
+template<bool cache>
+  using __uset2 =
+	      std::_Hashtable<Foo, Foo, std::allocator<Foo>,
+			      std::__detail::_Identity,
+			      std::equal_to<Foo>, HashFunction,
+			      std::__detail::_Mask_range_hashing,
+			      std::__detail::_Default_ranged_hash,
+			      std::__detail::_Power2_rehash_policy,
+			      std::__uset_traits<cache>>;
+
+template<bool cache>
+  using __umset2 =
+	      std::_Hashtable<Foo, Foo, std::allocator<Foo>,
+			      std::__detail::_Identity,
+			      std::equal_to<Foo>, HashFunction,
+			      std::__detail::_Mask_range_hashing,
+			      std::__detail::_Default_ranged_hash,
+			      std::__detail::_Power2_rehash_policy,
+			      std::__umset_traits<cache>>;
 
 int main()
 {
@@ -181,6 +201,19 @@ int main()
   stop_counters(time, resource);
   report_performance(__FILE__, "std benches", time, resource);
 
+  start_counters(time, resource);
+  bench<__uset2<false>>(
+	"std::unordered_set2 without hash code cached ", foos);
+  bench<__uset2<true>>(
+	"std::unordered_set2 with hash code cached ", foos);
+  bench<__umset2<false>>(
+	"std::unordered_multiset2 without hash code cached ", foos);
+  bench<__umset2<true>>(
+	"std::unordered_multiset2 with hash code cached ", foos);
+
+  stop_counters(time, resource);
+  report_performance(__FILE__, "std2 benches", time, resource);
+
   bench<std::unordered_set<Foo, HashFunction>>(
 	"std::unordered_set default cache ", foos);
   bench<std::unordered_multiset<Foo, HashFunction>>(
diff --git a/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc b/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc
index 9846104..4b29fde 100644
--- a/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc
+++ b/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc
@@ -177,6 +177,16 @@ template<bool cache>
 					cache>;
 
 template<bool cache>
+  using __uset2 =
+	      std::_Hashtable<int, int, std::allocator<int>,
+			      std::__detail::_Identity,
+			      std::equal_to<int>, std::hash<int>,
+			      std::__detail::_Mask_range_hashing,
+			      std::__detail::_Default_ranged_hash,
+			      std::__detail::_Power2_rehash_policy,
+			      std::__uset_traits<cache>>;
+
+template<bool cache>
   using __str_uset = 
 	      std::__uset_hashtable<std::string, std::hash<std::string>,
 				    std::equal_to<std::string>,
@@ -190,6 +200,16 @@ template<bool cache>
 					std::allocator<std::string>,
 					cache>;
 
+template<bool cache>
+  using __str_uset2 =
+	      std::_Hashtable<std::string, std::string, std::allocator<std::string>,
+			      std::__detail::_Identity,
+			      std::equal_to<std::string>, std::hash<std::string>,
+			      std::__detail::_Mask_range_hashing,
+			      std::__detail::_Default_ranged_hash,
+			      std::__detail::_Power2_rehash_policy,
+			      std::__uset_traits<cache>>;
+
 int main()
 {
   bench<__tr1_uset<false>>(
@@ -202,6 +222,10 @@ int main()
 	"std::unordered_set<int> with hash code cached");
   bench<std::unordered_set<int>>(
 	"std::unordered_set<int> default cache");
+  bench<__uset2<false>>(
+	"std::unordered_set2<int> without hash code cached");
+  bench<__uset2<true>>(
+	"std::unordered_set2<int> with hash code cached");
   bench_str<__tr1_str_uset<false>>(
 	"std::tr1::unordered_set<string> without hash code cached");
   bench_str<__tr1_str_uset<true>>(
@@ -210,7 +234,11 @@ int main()
 	"std::unordered_set<string> without hash code cached");
   bench_str<__str_uset<true>>(
 	"std::unordered_set<string> with hash code cached");
-    bench_str<std::unordered_set<std::string>>(
+  bench_str<std::unordered_set<std::string>>(
 	"std::unordered_set<string> default cache");
+  bench_str<__str_uset2<false>>(
+	"std::unordered_set2<string> without hash code cached");
+  bench_str<__str_uset2<true>>(
+	"std::unordered_set2<string> with hash code cached");
   return 0;
 }

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: New power of 2 hash policy
  2015-09-24 20:58       ` François Dumont
@ 2015-09-25 13:58         ` Jonathan Wakely
  2015-09-28 19:39           ` François Dumont
  0 siblings, 1 reply; 9+ messages in thread
From: Jonathan Wakely @ 2015-09-25 13:58 UTC (permalink / raw)
  To: François Dumont; +Cc: Michael Matz, libstdc++, gcc-patches

On 24/09/15 22:08 +0200, François Dumont wrote:
>    Working on it I realised that despite the comment on _M_next_bkt
>saying "no smaller than n" the method can return a value smaller for big
>n values. This is not likely to happen but I prefer to take care of it.
>I just make sure we won't try to rehash again even if the drawback is
>that we won't respect max_load_factor anymore at those levels. But as I
>said we will surely have a memory issue before that.
>
>    Ok to commit ?
>
>François
>
>
>

>diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h
>index a9ad7dd..4e1bc29 100644
>--- a/libstdc++-v3/include/bits/hashtable_policy.h
>+++ b/libstdc++-v3/include/bits/hashtable_policy.h
>@@ -457,6 +457,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>   /// smallest prime that keeps the load factor small enough.
>   struct _Prime_rehash_policy
>   {
>+    using __has_load_factor = std::true_type;
>+
>     _Prime_rehash_policy(float __z = 1.0) noexcept
>     : _M_max_load_factor(__z), _M_next_resize(0) { }
> 
>@@ -501,6 +503,129 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>     mutable std::size_t	_M_next_resize;
>   };
> 
>+  /// Range hashing function considering that second args is a power of 2.

Does this mean "assuming" not "considering"?

>+  struct _Mask_range_hashing
>+  {
>+    typedef std::size_t first_argument_type;
>+    typedef std::size_t second_argument_type;
>+    typedef std::size_t result_type;
>+
>+    result_type
>+    operator()(first_argument_type __num,
>+	       second_argument_type __den) const noexcept
>+    { return __num & (__den - 1); }
>+  };
>+
>+
>+  /// Helper type to compute next power of 2.
>+  template<std::size_t _N>
>+    struct _NextPower2
>+    {
>+      static std::size_t
>+      _Get(std::size_t __n)
>+      {
>+	std::size_t __next = _NextPower2<(_N >> 1)>::_Get(__n);
>+	return __next |= __next >> _N;
>+      }
>+    };
>+
>+  template<>
>+    struct _NextPower2<1>
>+    {
>+      static std::size_t
>+      _Get(std::size_t __n)
>+      { return __n |= __n >> 1; }
>+    };

This doesn't seem to return the next power of 2, it returns one less.

_NextPower2<32>::_Get(2) returns 3, but 2 is already a power of 2.
_NextPower2<32>::_Get(3) returns 3, but the next power of 2 is 4.


I don't think this needs to be a recursive template, it can simply be
a function, can't it?


>+  /// Rehash policy providing power of 2 bucket numbers. Ease modulo
>+  /// operations.
>+  struct _Power2_rehash_policy
>+  {
>+    using __has_load_factor = std::true_type;
>+
>+    _Power2_rehash_policy(float __z = 1.0) noexcept
>+    : _M_max_load_factor(__z), _M_next_resize(0) { }
>+
>+    float
>+    max_load_factor() const noexcept
>+    { return _M_max_load_factor; }
>+
>+    // Return a bucket size no smaller than n (as long as n is not above the
>+    // highest power of 2).

This says "no smaller than n" but it actually seems to guarantee
"greater than n" because _NextPower2<>::_Get(n)+1 is 2n when n is a
power of two.

>+    std::size_t
>+    _M_next_bkt(std::size_t __n) const
>+    {
>+      constexpr auto __max_bkt
>+	= (std::size_t(1) << (sizeof(std::size_t) * 8 - 1));
>+
>+      std::size_t __res
>+	= _NextPower2<((sizeof(std::size_t) * 8) >> 1)>::_Get(--__n) + 1;

You wouldn't need to add one to the result if the template actually
returned a power of two!

>+      if (__res == 0)
>+	__res = __max_bkt;
>+
>+      if (__res == __max_bkt)
>+	// Set next resize to the max value so that we never try to rehash again
>+	// as we already reach the biggest possible bucket number.
>+	// Note that it might result in max_load_factor not being respected.
>+	_M_next_resize = std::size_t(0) - 1;
>+      else
>+	_M_next_resize
>+	  = __builtin_floor(__res * (long double)_M_max_load_factor);
>+
>+      return __res;
>+    }

What are the requirements for this function, "no smaller than n" or
"greater than n"?

If "no smaller than n" is correct then the algorithm you want is
"round up to nearest power of 2", which you can find here (I wrote
this earlier this year for some reason I can't remember now):

https://gitlab.com/redistd/redistd/blob/master/include/redi/bits.h

The non-recursive version is only a valid constexpr function in C++14,
but since you don't need a constexpr function you could just that,
extended to handle 64-bit:

  std::size_t
  clp2(std::size_t n)
  {
    std::uint_least64_t x = n;
    // Algorithm from Hacker's Delight, Figure 3-3.
    x = x - 1;
    x = x | (x >> 1);
    x = x | (x >> 2);
    x = x | (x >> 4);
    x = x | (x >> 8);
    x = x | (x >>16);
    x = x | (x >>32);
    return x + 1;
  }

We could avoid the last shift when sizeof(size_t) == 32, I don't know
if the optimisers will take care of that anyway.


The rest of the patch looks fine.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: New power of 2 hash policy
  2015-09-25 13:58         ` Jonathan Wakely
@ 2015-09-28 19:39           ` François Dumont
  2015-10-19 20:23             ` François Dumont
  0 siblings, 1 reply; 9+ messages in thread
From: François Dumont @ 2015-09-28 19:39 UTC (permalink / raw)
  To: Jonathan Wakely; +Cc: Michael Matz, libstdc++, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 5134 bytes --]

On 25/09/2015 15:28, Jonathan Wakely wrote:
> @@ -501,6 +503,129 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>     mutable std::size_t    _M_next_resize;
>>   };
>>
>> +  /// Range hashing function considering that second args is a power
>> of 2.
>
> Does this mean "assuming" not "considering"?

I assume yes.

>
>> +  struct _Mask_range_hashing
>> +  {
>> +    typedef std::size_t first_argument_type;
>> +    typedef std::size_t second_argument_type;
>> +    typedef std::size_t result_type;
>> +
>> +    result_type
>> +    operator()(first_argument_type __num,
>> +           second_argument_type __den) const noexcept
>> +    { return __num & (__den - 1); }
>> +  };
>> +
>> +
>> +  /// Helper type to compute next power of 2.
>> +  template<std::size_t _N>
>> +    struct _NextPower2
>> +    {
>> +      static std::size_t
>> +      _Get(std::size_t __n)
>> +      {
>> +    std::size_t __next = _NextPower2<(_N >> 1)>::_Get(__n);
>> +    return __next |= __next >> _N;
>> +      }
>> +    };
>> +
>> +  template<>
>> +    struct _NextPower2<1>
>> +    {
>> +      static std::size_t
>> +      _Get(std::size_t __n)
>> +      { return __n |= __n >> 1; }
>> +    };
>
> This doesn't seem to return the next power of 2, it returns one less.
>
> _NextPower2<32>::_Get(2) returns 3, but 2 is already a power of 2.
> _NextPower2<32>::_Get(3) returns 3, but the next power of 2 is 4.


Yes, name is bad, that is just part of the algo you copy/paste below. I
review implementation to have _NextPower2 do all the algo.

>
>
> I don't think this needs to be a recursive template, it can simply be
> a function, can't it?

I wanted code to adapt to any sizeof(std::size_t) without relying on
some preprocessor checks. As you pointed out additional >> 32 on 32 bits
or >> 64 on 64 bits wouldn't hurt but the recursive template just make
sure that we don't do useless operations.

>
>
>> +  /// Rehash policy providing power of 2 bucket numbers. Ease modulo
>> +  /// operations.
>> +  struct _Power2_rehash_policy
>> +  {
>> +    using __has_load_factor = std::true_type;
>> +
>> +    _Power2_rehash_policy(float __z = 1.0) noexcept
>> +    : _M_max_load_factor(__z), _M_next_resize(0) { }
>> +
>> +    float
>> +    max_load_factor() const noexcept
>> +    { return _M_max_load_factor; }
>> +
>> +    // Return a bucket size no smaller than n (as long as n is not
>> above the
>> +    // highest power of 2).
>
> This says "no smaller than n" but it actually seems to guarantee
> "greater than n" because _NextPower2<>::_Get(n)+1 is 2n when n is a
> power of two.

yes but this function is calling _NextPower2<>::_Get(n - 1) + 1, there
is a minus one which make this comment valid as shown by newly
introduced test.

>
>> +    std::size_t
>> +    _M_next_bkt(std::size_t __n) const
>> +    {
>> +      constexpr auto __max_bkt
>> +    = (std::size_t(1) << (sizeof(std::size_t) * 8 - 1));
>> +
>> +      std::size_t __res
>> +    = _NextPower2<((sizeof(std::size_t) * 8) >> 1)>::_Get(--__n) + 1;
>
> You wouldn't need to add one to the result if the template actually
> returned a power of two!
>
>> +      if (__res == 0)
>> +    __res = __max_bkt;
>> +
>> +      if (__res == __max_bkt)
>> +    // Set next resize to the max value so that we never try to
>> rehash again
>> +    // as we already reach the biggest possible bucket number.
>> +    // Note that it might result in max_load_factor not being
>> respected.
>> +    _M_next_resize = std::size_t(0) - 1;
>> +      else
>> +    _M_next_resize
>> +      = __builtin_floor(__res * (long double)_M_max_load_factor);
>> +
>> +      return __res;
>> +    }
>
> What are the requirements for this function, "no smaller than n" or
> "greater than n"?

'No smaller than n' like stated in the comment. However for big n it is
not possible, even in the prime number based implementation. So I played
with _M_next_resize to make sure that _M_next_bkt won't be called again
as soon as the max bucket number has been reach.


>
> If "no smaller than n" is correct then the algorithm you want is
> "round up to nearest power of 2", which you can find here (I wrote
> this earlier this year for some reason I can't remember now):
>
> https://gitlab.com/redistd/redistd/blob/master/include/redi/bits.h
>
> The non-recursive version is only a valid constexpr function in C++14,
> but since you don't need a constexpr function you could just that,
> extended to handle 64-bit:
>
>  std::size_t
>  clp2(std::size_t n)
>  {
>    std::uint_least64_t x = n;
>    // Algorithm from Hacker's Delight, Figure 3-3.
>    x = x - 1;
>    x = x | (x >> 1);
>    x = x | (x >> 2);
>    x = x | (x >> 4);
>    x = x | (x >> 8);
>    x = x | (x >>16);
>    x = x | (x >>32);
>    return x + 1;
>  }
>
> We could avoid the last shift when sizeof(size_t) == 32, I don't know
> if the optimisers will take care of that anyway.

This is indeed the algo I found by myself and that I adapted to work
with any sizeof(size_t).

Do you prefer the new version or do you want to stick a more explicit
version like the one you propose above. In this case is the last 32 bits
shift enough ? No 128 bits platform yet ?

François


[-- Attachment #2: hashtable.patch --]
[-- Type: text/x-patch, Size: 17855 bytes --]

diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h
index a9ad7dd..bf8b644 100644
--- a/libstdc++-v3/include/bits/hashtable_policy.h
+++ b/libstdc++-v3/include/bits/hashtable_policy.h
@@ -457,6 +457,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   /// smallest prime that keeps the load factor small enough.
   struct _Prime_rehash_policy
   {
+    using __has_load_factor = std::true_type;
+
     _Prime_rehash_policy(float __z = 1.0) noexcept
     : _M_max_load_factor(__z), _M_next_resize(0) { }
 
@@ -501,6 +503,136 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     mutable std::size_t	_M_next_resize;
   };
 
+  /// Range hashing function assuming that second args is a power of 2.
+  struct _Mask_range_hashing
+  {
+    typedef std::size_t first_argument_type;
+    typedef std::size_t second_argument_type;
+    typedef std::size_t result_type;
+
+    result_type
+    operator()(first_argument_type __num,
+	       second_argument_type __den) const noexcept
+    { return __num & (__den - 1); }
+  };
+
+
+  /// Helper type to compute next power of 2.
+  template<typename _SizeT,
+	   std::size_t _N = sizeof(_SizeT) << 2, bool _Increment = true>
+    struct _NextPower2
+    {
+      static _SizeT
+      _Get(_SizeT __n)
+      {
+	_SizeT __next = _NextPower2<_SizeT, (_N >> 1), false>::_Get(__n);
+	__next |= __next >> _N;
+	if (_Increment)
+	  ++__next;
+
+	return __next;
+      }
+    };
+
+  template<typename _SizeT>
+    struct _NextPower2<_SizeT, 1, false>
+    {
+      static _SizeT
+      _Get(_SizeT __n)
+      {
+	--__n;
+	return __n |= __n >> 1;
+      }
+    };
+
+  /// Rehash policy providing power of 2 bucket numbers. Ease modulo
+  /// operations.
+  struct _Power2_rehash_policy
+  {
+    using __has_load_factor = std::true_type;
+
+    _Power2_rehash_policy(float __z = 1.0) noexcept
+    : _M_max_load_factor(__z), _M_next_resize(0) { }
+
+    float
+    max_load_factor() const noexcept
+    { return _M_max_load_factor; }
+
+    // Return a bucket size no smaller than n (as long as n is not above the
+    // highest power of 2).
+    std::size_t
+    _M_next_bkt(std::size_t __n) const
+    {
+      constexpr auto __max_bkt
+	= std::size_t(1) << ((sizeof(std::size_t) << 3) - 1);
+
+      std::size_t __res = _NextPower2<std::size_t>::_Get(__n);
+
+      if (__res == 0)
+	__res = __max_bkt;
+
+      if (__res == __max_bkt)
+	// Set next resize to the max value so that we never try to rehash again
+	// as we already reach the biggest possible bucket number.
+	// Note that it might result in max_load_factor not being respected.
+	_M_next_resize = std::size_t(0) - 1;
+      else
+	_M_next_resize
+	  = __builtin_floor(__res * (long double)_M_max_load_factor);
+
+      return __res;
+    }
+
+    // Return a bucket count appropriate for n elements
+    std::size_t
+    _M_bkt_for_elements(std::size_t __n) const
+    { return __builtin_ceil(__n / (long double)_M_max_load_factor); }
+
+    // __n_bkt is current bucket count, __n_elt is current element count,
+    // and __n_ins is number of elements to be inserted.  Do we need to
+    // increase bucket count?  If so, return make_pair(true, n), where n
+    // is the new bucket count.  If not, return make_pair(false, 0).
+    std::pair<bool, std::size_t>
+    _M_need_rehash(std::size_t __n_bkt, std::size_t __n_elt,
+		   std::size_t __n_ins) const
+    {
+      if (__n_elt + __n_ins >= _M_next_resize)
+	{
+	  long double __min_bkts = (__n_elt + __n_ins)
+					/ (long double)_M_max_load_factor;
+	  if (__min_bkts >= __n_bkt)
+	    return std::make_pair(true,
+	      _M_next_bkt(std::max<std::size_t>(__builtin_floor(__min_bkts) + 1,
+						__n_bkt * _S_growth_factor)));
+
+	  _M_next_resize
+	    = __builtin_floor(__n_bkt * (long double)_M_max_load_factor);
+	  return std::make_pair(false, 0);
+	}
+      else
+	return std::make_pair(false, 0);
+    }
+
+    typedef std::size_t _State;
+
+    _State
+    _M_state() const
+    { return _M_next_resize; }
+
+    void
+    _M_reset() noexcept
+    { _M_next_resize = 0; }
+
+    void
+    _M_reset(_State __state)
+    { _M_next_resize = __state; }
+
+    static const std::size_t _S_growth_factor = 2;
+
+    float		_M_max_load_factor;
+    mutable std::size_t	_M_next_resize;
+  };
+
   // Base classes for std::_Hashtable.  We define these base classes
   // because in some cases we want to do different things depending on
   // the value of a policy class.  In some cases the policy class
@@ -775,8 +907,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	   typename _ExtractKey, typename _Equal,
 	   typename _H1, typename _H2, typename _Hash,
 	   typename _RehashPolicy, typename _Traits,
-	   bool _Constant_iterators = _Traits::__constant_iterators::value,
-	   bool _Unique_keys = _Traits::__unique_keys::value>
+	   bool _Constant_iterators = _Traits::__constant_iterators::value>
     struct _Insert;
 
   /// Specialization.
@@ -785,65 +916,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	   typename _H1, typename _H2, typename _Hash,
 	   typename _RehashPolicy, typename _Traits>
     struct _Insert<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash,
-		   _RehashPolicy, _Traits, true, true>
+		   _RehashPolicy, _Traits, true>
     : public _Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal,
 			   _H1, _H2, _Hash, _RehashPolicy, _Traits>
     {
       using __base_type = _Insert_base<_Key, _Value, _Alloc, _ExtractKey,
 					_Equal, _H1, _H2, _Hash,
 					_RehashPolicy, _Traits>;
-      using value_type = typename __base_type::value_type;
-      using iterator = typename __base_type::iterator;
-      using const_iterator =  typename __base_type::const_iterator;
-
-      using __unique_keys = typename __base_type::__unique_keys;
-      using __hashtable = typename __base_type::__hashtable;
-      using __node_gen_type = typename __base_type::__node_gen_type;
-
-      using __base_type::insert;
 
-      std::pair<iterator, bool>
-      insert(value_type&& __v)
-      {
-	__hashtable& __h = this->_M_conjure_hashtable();
-	__node_gen_type __node_gen(__h);
-	return __h._M_insert(std::move(__v), __node_gen, __unique_keys());
-      }
-
-      iterator
-      insert(const_iterator __hint, value_type&& __v)
-      {
-	__hashtable& __h = this->_M_conjure_hashtable();
-	__node_gen_type __node_gen(__h);
-	return __h._M_insert(__hint, std::move(__v), __node_gen,
-			     __unique_keys());
-      }
-    };
+      using __hashtable_base = _Hashtable_base<_Key, _Value, _ExtractKey,
+					       _Equal, _H1, _H2, _Hash,
+					       _Traits>;
 
-  /// Specialization.
-  template<typename _Key, typename _Value, typename _Alloc,
-	   typename _ExtractKey, typename _Equal,
-	   typename _H1, typename _H2, typename _Hash,
-	   typename _RehashPolicy, typename _Traits>
-    struct _Insert<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash,
-		   _RehashPolicy, _Traits, true, false>
-    : public _Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal,
-			   _H1, _H2, _Hash, _RehashPolicy, _Traits>
-    {
-      using __base_type = _Insert_base<_Key, _Value, _Alloc, _ExtractKey,
-					_Equal, _H1, _H2, _Hash,
-					_RehashPolicy, _Traits>;
       using value_type = typename __base_type::value_type;
       using iterator = typename __base_type::iterator;
       using const_iterator =  typename __base_type::const_iterator;
 
       using __unique_keys = typename __base_type::__unique_keys;
+      using __ireturn_type = typename __hashtable_base::__ireturn_type;
       using __hashtable = typename __base_type::__hashtable;
       using __node_gen_type = typename __base_type::__node_gen_type;
 
       using __base_type::insert;
 
-      iterator
+      __ireturn_type
       insert(value_type&& __v)
       {
 	__hashtable& __h = this->_M_conjure_hashtable();
@@ -865,9 +961,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template<typename _Key, typename _Value, typename _Alloc,
 	   typename _ExtractKey, typename _Equal,
 	   typename _H1, typename _H2, typename _Hash,
-	   typename _RehashPolicy, typename _Traits, bool _Unique_keys>
+	   typename _RehashPolicy, typename _Traits>
     struct _Insert<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash,
-		   _RehashPolicy, _Traits, false, _Unique_keys>
+		   _RehashPolicy, _Traits, false>
     : public _Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal,
 			   _H1, _H2, _Hash, _RehashPolicy, _Traits>
     {
@@ -911,28 +1007,46 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	}
    };
 
+  template<typename _Policy>
+    using __has_load_factor = typename _Policy::__has_load_factor;
+
   /**
    *  Primary class template  _Rehash_base.
    *
    *  Give hashtable the max_load_factor functions and reserve iff the
-   *  rehash policy is _Prime_rehash_policy.
+   *  rehash policy supports it.
   */
   template<typename _Key, typename _Value, typename _Alloc,
 	   typename _ExtractKey, typename _Equal,
 	   typename _H1, typename _H2, typename _Hash,
-	   typename _RehashPolicy, typename _Traits>
+	   typename _RehashPolicy, typename _Traits,
+	   typename =
+	     __detected_or_t<std::false_type, __has_load_factor, _RehashPolicy>>
     struct _Rehash_base;
 
-  /// Specialization.
+  /// Specialization when rehash policy doesn't provide load factor management.
   template<typename _Key, typename _Value, typename _Alloc,
 	   typename _ExtractKey, typename _Equal,
-	   typename _H1, typename _H2, typename _Hash, typename _Traits>
+	   typename _H1, typename _H2, typename _Hash,
+	   typename _RehashPolicy, typename _Traits>
     struct _Rehash_base<_Key, _Value, _Alloc, _ExtractKey, _Equal,
-			_H1, _H2, _Hash, _Prime_rehash_policy, _Traits>
+		      _H1, _H2, _Hash, _RehashPolicy, _Traits,
+		      std::false_type>
+    {
+    };
+
+  /// Specialization when rehash policy provide load factor management.
+  template<typename _Key, typename _Value, typename _Alloc,
+	   typename _ExtractKey, typename _Equal,
+	   typename _H1, typename _H2, typename _Hash,
+	   typename _RehashPolicy, typename _Traits>
+    struct _Rehash_base<_Key, _Value, _Alloc, _ExtractKey, _Equal,
+			_H1, _H2, _Hash, _RehashPolicy, _Traits,
+			std::true_type>
     {
       using __hashtable = _Hashtable<_Key, _Value, _Alloc, _ExtractKey,
 				     _Equal, _H1, _H2, _Hash,
-				     _Prime_rehash_policy, _Traits>;
+				     _RehashPolicy, _Traits>;
 
       float
       max_load_factor() const noexcept
@@ -945,7 +1059,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       max_load_factor(float __z)
       {
 	__hashtable* __this = static_cast<__hashtable*>(this);
-	__this->__rehash_policy(_Prime_rehash_policy(__z));
+	__this->__rehash_policy(_RehashPolicy(__z));
       }
 
       void
diff --git a/libstdc++-v3/src/c++11/hashtable_c++0x.cc b/libstdc++-v3/src/c++11/hashtable_c++0x.cc
index 69f999f..dd2afe3 100644
--- a/libstdc++-v3/src/c++11/hashtable_c++0x.cc
+++ b/libstdc++-v3/src/c++11/hashtable_c++0x.cc
@@ -60,8 +60,16 @@ namespace __detail
       = sizeof(__prime_list) / sizeof(unsigned long) - 1;
     const unsigned long* __next_bkt =
       std::lower_bound(__prime_list + 5, __prime_list + __n_primes, __n);
-    _M_next_resize =
-      __builtin_ceil(*__next_bkt * (long double)_M_max_load_factor);
+
+    if (__next_bkt == __prime_list + __n_primes)
+      // Set next resize to the max value so that we never try to rehash again
+      // as we already reach the biggest possible bucket number.
+      // Note that it might result in max_load_factor not being respected.
+      _M_next_resize = std::size_t(0) - 1;
+    else
+      _M_next_resize =
+	__builtin_ceil(*__next_bkt * (long double)_M_max_load_factor);
+
     return *__next_bkt;
   }
 
diff --git a/libstdc++-v3/testsuite/23_containers/unordered_set/hash_policy/power2_rehash.cc b/libstdc++-v3/testsuite/23_containers/unordered_set/hash_policy/power2_rehash.cc
new file mode 100644
index 0000000..502794b
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/unordered_set/hash_policy/power2_rehash.cc
@@ -0,0 +1,42 @@
+// Copyright (C) 2015 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+//
+// { dg-options "-std=gnu++11" }
+
+#include <unordered_set>
+
+#include <testsuite_hooks.h>
+
+void test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  std::__detail::_Power2_rehash_policy policy;
+  VERIFY( policy._M_next_bkt(1) == 1 );
+  VERIFY( policy._M_next_bkt(2) == 2 );
+  VERIFY( policy._M_next_bkt(3) == 4 );
+  VERIFY( policy._M_next_bkt(5) == 8 );
+  VERIFY( policy._M_next_bkt(33) == 64 );
+  VERIFY( policy._M_next_bkt((std::size_t(1) << (sizeof(std::size_t) * 8 - 2)) + 1)
+	  == (std::size_t(1) << (sizeof(std::size_t) * 8 - 1)) );
+}
+
+int main()
+{
+  test01();
+  return 0;
+}
diff --git a/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc b/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc
index ef956a0..a07d552 100644
--- a/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc
+++ b/libstdc++-v3/testsuite/performance/23_containers/insert/54075.cc
@@ -127,7 +127,27 @@ template<bool cache>
   using __umset = std::__umset_hashtable<Foo, HashFunction,
 					 std::equal_to<Foo>,
 					 std::allocator<Foo>,
-					 std::__uset_traits<cache>>;
+					 std::__umset_traits<cache>>;
+
+template<bool cache>
+  using __uset2 =
+	      std::_Hashtable<Foo, Foo, std::allocator<Foo>,
+			      std::__detail::_Identity,
+			      std::equal_to<Foo>, HashFunction,
+			      std::__detail::_Mask_range_hashing,
+			      std::__detail::_Default_ranged_hash,
+			      std::__detail::_Power2_rehash_policy,
+			      std::__uset_traits<cache>>;
+
+template<bool cache>
+  using __umset2 =
+	      std::_Hashtable<Foo, Foo, std::allocator<Foo>,
+			      std::__detail::_Identity,
+			      std::equal_to<Foo>, HashFunction,
+			      std::__detail::_Mask_range_hashing,
+			      std::__detail::_Default_ranged_hash,
+			      std::__detail::_Power2_rehash_policy,
+			      std::__umset_traits<cache>>;
 
 int main()
 {
@@ -181,6 +201,19 @@ int main()
   stop_counters(time, resource);
   report_performance(__FILE__, "std benches", time, resource);
 
+  start_counters(time, resource);
+  bench<__uset2<false>>(
+	"std::unordered_set2 without hash code cached ", foos);
+  bench<__uset2<true>>(
+	"std::unordered_set2 with hash code cached ", foos);
+  bench<__umset2<false>>(
+	"std::unordered_multiset2 without hash code cached ", foos);
+  bench<__umset2<true>>(
+	"std::unordered_multiset2 with hash code cached ", foos);
+
+  stop_counters(time, resource);
+  report_performance(__FILE__, "std2 benches", time, resource);
+
   bench<std::unordered_set<Foo, HashFunction>>(
 	"std::unordered_set default cache ", foos);
   bench<std::unordered_multiset<Foo, HashFunction>>(
diff --git a/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc b/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc
index 9846104..4b29fde 100644
--- a/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc
+++ b/libstdc++-v3/testsuite/performance/23_containers/insert_erase/41975.cc
@@ -177,6 +177,16 @@ template<bool cache>
 					cache>;
 
 template<bool cache>
+  using __uset2 =
+	      std::_Hashtable<int, int, std::allocator<int>,
+			      std::__detail::_Identity,
+			      std::equal_to<int>, std::hash<int>,
+			      std::__detail::_Mask_range_hashing,
+			      std::__detail::_Default_ranged_hash,
+			      std::__detail::_Power2_rehash_policy,
+			      std::__uset_traits<cache>>;
+
+template<bool cache>
   using __str_uset = 
 	      std::__uset_hashtable<std::string, std::hash<std::string>,
 				    std::equal_to<std::string>,
@@ -190,6 +200,16 @@ template<bool cache>
 					std::allocator<std::string>,
 					cache>;
 
+template<bool cache>
+  using __str_uset2 =
+	      std::_Hashtable<std::string, std::string, std::allocator<std::string>,
+			      std::__detail::_Identity,
+			      std::equal_to<std::string>, std::hash<std::string>,
+			      std::__detail::_Mask_range_hashing,
+			      std::__detail::_Default_ranged_hash,
+			      std::__detail::_Power2_rehash_policy,
+			      std::__uset_traits<cache>>;
+
 int main()
 {
   bench<__tr1_uset<false>>(
@@ -202,6 +222,10 @@ int main()
 	"std::unordered_set<int> with hash code cached");
   bench<std::unordered_set<int>>(
 	"std::unordered_set<int> default cache");
+  bench<__uset2<false>>(
+	"std::unordered_set2<int> without hash code cached");
+  bench<__uset2<true>>(
+	"std::unordered_set2<int> with hash code cached");
   bench_str<__tr1_str_uset<false>>(
 	"std::tr1::unordered_set<string> without hash code cached");
   bench_str<__tr1_str_uset<true>>(
@@ -210,7 +234,11 @@ int main()
 	"std::unordered_set<string> without hash code cached");
   bench_str<__str_uset<true>>(
 	"std::unordered_set<string> with hash code cached");
-    bench_str<std::unordered_set<std::string>>(
+  bench_str<std::unordered_set<std::string>>(
 	"std::unordered_set<string> default cache");
+  bench_str<__str_uset2<false>>(
+	"std::unordered_set2<string> without hash code cached");
+  bench_str<__str_uset2<true>>(
+	"std::unordered_set2<string> with hash code cached");
   return 0;
 }

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: New power of 2 hash policy
  2015-09-28 19:39           ` François Dumont
@ 2015-10-19 20:23             ` François Dumont
  0 siblings, 0 replies; 9+ messages in thread
From: François Dumont @ 2015-10-19 20:23 UTC (permalink / raw)
  To: Jonathan Wakely; +Cc: Michael Matz, libstdc++, gcc-patches

Is this one ok ?

François


On 28/09/2015 21:16, François Dumont wrote:
> On 25/09/2015 15:28, Jonathan Wakely wrote:
>> @@ -501,6 +503,129 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>>     mutable std::size_t    _M_next_resize;
>>>   };
>>>
>>> +  /// Range hashing function considering that second args is a power
>>> of 2.
>> Does this mean "assuming" not "considering"?
> I assume yes.
>
>>> +  struct _Mask_range_hashing
>>> +  {
>>> +    typedef std::size_t first_argument_type;
>>> +    typedef std::size_t second_argument_type;
>>> +    typedef std::size_t result_type;
>>> +
>>> +    result_type
>>> +    operator()(first_argument_type __num,
>>> +           second_argument_type __den) const noexcept
>>> +    { return __num & (__den - 1); }
>>> +  };
>>> +
>>> +
>>> +  /// Helper type to compute next power of 2.
>>> +  template<std::size_t _N>
>>> +    struct _NextPower2
>>> +    {
>>> +      static std::size_t
>>> +      _Get(std::size_t __n)
>>> +      {
>>> +    std::size_t __next = _NextPower2<(_N >> 1)>::_Get(__n);
>>> +    return __next |= __next >> _N;
>>> +      }
>>> +    };
>>> +
>>> +  template<>
>>> +    struct _NextPower2<1>
>>> +    {
>>> +      static std::size_t
>>> +      _Get(std::size_t __n)
>>> +      { return __n |= __n >> 1; }
>>> +    };
>> This doesn't seem to return the next power of 2, it returns one less.
>>
>> _NextPower2<32>::_Get(2) returns 3, but 2 is already a power of 2.
>> _NextPower2<32>::_Get(3) returns 3, but the next power of 2 is 4.
>
> Yes, name is bad, that is just part of the algo you copy/paste below. I
> review implementation to have _NextPower2 do all the algo.
>
>>
>> I don't think this needs to be a recursive template, it can simply be
>> a function, can't it?
> I wanted code to adapt to any sizeof(std::size_t) without relying on
> some preprocessor checks. As you pointed out additional >> 32 on 32 bits
> or >> 64 on 64 bits wouldn't hurt but the recursive template just make
> sure that we don't do useless operations.
>
>>
>>> +  /// Rehash policy providing power of 2 bucket numbers. Ease modulo
>>> +  /// operations.
>>> +  struct _Power2_rehash_policy
>>> +  {
>>> +    using __has_load_factor = std::true_type;
>>> +
>>> +    _Power2_rehash_policy(float __z = 1.0) noexcept
>>> +    : _M_max_load_factor(__z), _M_next_resize(0) { }
>>> +
>>> +    float
>>> +    max_load_factor() const noexcept
>>> +    { return _M_max_load_factor; }
>>> +
>>> +    // Return a bucket size no smaller than n (as long as n is not
>>> above the
>>> +    // highest power of 2).
>> This says "no smaller than n" but it actually seems to guarantee
>> "greater than n" because _NextPower2<>::_Get(n)+1 is 2n when n is a
>> power of two.
> yes but this function is calling _NextPower2<>::_Get(n - 1) + 1, there
> is a minus one which make this comment valid as shown by newly
> introduced test.
>
>>> +    std::size_t
>>> +    _M_next_bkt(std::size_t __n) const
>>> +    {
>>> +      constexpr auto __max_bkt
>>> +    = (std::size_t(1) << (sizeof(std::size_t) * 8 - 1));
>>> +
>>> +      std::size_t __res
>>> +    = _NextPower2<((sizeof(std::size_t) * 8) >> 1)>::_Get(--__n) + 1;
>> You wouldn't need to add one to the result if the template actually
>> returned a power of two!
>>
>>> +      if (__res == 0)
>>> +    __res = __max_bkt;
>>> +
>>> +      if (__res == __max_bkt)
>>> +    // Set next resize to the max value so that we never try to
>>> rehash again
>>> +    // as we already reach the biggest possible bucket number.
>>> +    // Note that it might result in max_load_factor not being
>>> respected.
>>> +    _M_next_resize = std::size_t(0) - 1;
>>> +      else
>>> +    _M_next_resize
>>> +      = __builtin_floor(__res * (long double)_M_max_load_factor);
>>> +
>>> +      return __res;
>>> +    }
>> What are the requirements for this function, "no smaller than n" or
>> "greater than n"?
> 'No smaller than n' like stated in the comment. However for big n it is
> not possible, even in the prime number based implementation. So I played
> with _M_next_resize to make sure that _M_next_bkt won't be called again
> as soon as the max bucket number has been reach.
>
>
>> If "no smaller than n" is correct then the algorithm you want is
>> "round up to nearest power of 2", which you can find here (I wrote
>> this earlier this year for some reason I can't remember now):
>>
>> https://gitlab.com/redistd/redistd/blob/master/include/redi/bits.h
>>
>> The non-recursive version is only a valid constexpr function in C++14,
>> but since you don't need a constexpr function you could just that,
>> extended to handle 64-bit:
>>
>>  std::size_t
>>  clp2(std::size_t n)
>>  {
>>    std::uint_least64_t x = n;
>>    // Algorithm from Hacker's Delight, Figure 3-3.
>>    x = x - 1;
>>    x = x | (x >> 1);
>>    x = x | (x >> 2);
>>    x = x | (x >> 4);
>>    x = x | (x >> 8);
>>    x = x | (x >>16);
>>    x = x | (x >>32);
>>    return x + 1;
>>  }
>>
>> We could avoid the last shift when sizeof(size_t) == 32, I don't know
>> if the optimisers will take care of that anyway.
> This is indeed the algo I found by myself and that I adapted to work
> with any sizeof(size_t).
>
> Do you prefer the new version or do you want to stick a more explicit
> version like the one you propose above. In this case is the last 32 bits
> shift enough ? No 128 bits platform yet ?
>
> François
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2015-10-19 20:12 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-09-10 20:11 New power of 2 hash policy François Dumont
2015-09-11 13:18 ` Michael Matz
2015-09-11 13:19   ` Jonathan Wakely
2015-09-11 13:28     ` Jonathan Wakely
2015-09-13 20:28       ` François Dumont
2015-09-24 20:58       ` François Dumont
2015-09-25 13:58         ` Jonathan Wakely
2015-09-28 19:39           ` François Dumont
2015-10-19 20:23             ` François Dumont

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).