From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <fdumont@sourceware.org>
Received: by sourceware.org (Postfix, from userid 1720)
	id 4812A3858D1E; Sun, 31 Dec 2023 17:11:26 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 4812A3858D1E
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org;
	s=default; t=1704042687;
	bh=gAkFwtU1//rTzSXFQou1kkn/diyS3Xi7LejHO5QVRu0=;
	h=From:To:Subject:Date:From;
	b=uawttT/arh5u0EdEav1yArDbj6/lecMbuIJ2WxgAyG4n3emTsnCKFjUUxQD7GRkha
	 dHxKowezs0lvsesbsvpn4yUuAJyyqh0A8VpuctfhjqD+p94MKR9sn5qQ2kDyP9q8xR
	 8auCqbte2wq+dmT7nuiyASyykWOala6iVR6aq9YM=
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Content-Type: text/plain; charset="utf-8"
From: Francois Dumont <fdumont@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org, libstdc++-cvs@gcc.gnu.org
Subject: [gcc r14-6872] libstdc++: [_Hashtable] Extend the small size
 optimization
X-Act-Checkin: gcc
X-Git-Author: =?utf-8?q?Fran=C3=A7ois_Dumont?= <fdumont@gcc.gnu.org>
X-Git-Refname: refs/heads/master
X-Git-Oldrev: 91b334d02772d0864c168353ccf92287b6bfefe7
X-Git-Newrev: 505110bb9139a97f77e577d2ab1d537628971c4b
Message-Id: <20231231171127.4812A3858D1E@sourceware.org>
Date: Sun, 31 Dec 2023 17:11:26 +0000 (GMT)
List-Id: <libstdc++-cvs.sourceware.org>

https://gcc.gnu.org/g:505110bb9139a97f77e577d2ab1d537628971c4b

commit r14-6872-g505110bb9139a97f77e577d2ab1d537628971c4b
Author: François Dumont <fdumont@gcc.gnu.org>
Date:   Wed Sep 27 06:53:51 2023 +0200

    libstdc++: [_Hashtable] Extend the small size optimization
    
    A number of methods were still not using the small size optimization which
    is to prefer an O(N) research to a hash computation as long as N is small.
    
    libstdc++-v3/ChangeLog:
    
            * include/bits/hashtable.h: Move comment about all equivalent values
            being next to each other in the class documentation header.
            (_M_reinsert_node, _M_merge_unique): Implement small size optimization.
            (_M_find_tr, _M_count_tr, _M_equal_range_tr): Likewise.

Diff:
---
 libstdc++-v3/include/bits/hashtable.h | 149 ++++++++++++++++++++++++++++------
 1 file changed, 123 insertions(+), 26 deletions(-)

diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h
index 9ff9104a2ab..9441fe1f91c 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -152,6 +152,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    *  _M_before_begin, if any, is updated to point to its new before
    *  begin node.
    *
+   *  Note that all equivalent values, if any, are next to each other, if
+   *  we find a non-equivalent value after an equivalent one it means that
+   *  we won't find any new equivalent value.
+   *
    *  On erase, the simple iterator design requires using the hash
    *  functor to get the index of the bucket to update. For this
    *  reason, when __cache_hash_code is set to false the hash functor must
@@ -1049,10 +1053,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  {
 	    __glibcxx_assert(get_allocator() == __nh.get_allocator());
 
+	    __node_ptr __n = nullptr;
 	    const key_type& __k = __nh._M_key();
-	    __hash_code __code = this->_M_hash_code(__k);
-	    size_type __bkt = _M_bucket_index(__code);
-	    if (__node_ptr __n = _M_find_node(__bkt, __k, __code))
+	    const size_type __size = size();
+	    if (__size <= __small_size_threshold())
+	      {
+		for (__n = _M_begin(); __n; __n = __n->_M_next())
+		  if (this->_M_key_equals(__k, *__n))
+		    break;
+	      }
+
+	    __hash_code __code;
+	    size_type __bkt;
+	    if (!__n)
+	      {
+		__code = this->_M_hash_code(__k);
+		__bkt = _M_bucket_index(__code);
+		if (__size > __small_size_threshold())
+		  __n = _M_find_node(__bkt, __k, __code);
+	      }
+
+	    if (__n)
 	      {
 		__ret.node = std::move(__nh);
 		__ret.position = iterator(__n);
@@ -1156,11 +1177,31 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  for (auto __i = __src.cbegin(), __end = __src.cend(); __i != __end;)
 	    {
 	      auto __pos = __i++;
+	      const size_type __size = size();
 	      const key_type& __k = _ExtractKey{}(*__pos);
+	      if (__size <= __small_size_threshold())
+		{
+		  bool __found = false;
+		  for (auto __n = _M_begin(); __n; __n = __n->_M_next())
+		    if (this->_M_key_equals(__k, *__n))
+		      {
+			__found = true;
+			break;
+		      }
+
+		  if (__found)
+		    {
+		      if (__n_elt != 1)
+			--__n_elt;
+		      continue;
+		    }
+		}
+
 	      __hash_code __code
 		= _M_src_hash_code(__src.hash_function(), __k, *__pos._M_cur);
 	      size_type __bkt = _M_bucket_index(__code);
-	      if (_M_find_node(__bkt, __k, __code) == nullptr)
+	      if (__size <= __small_size_threshold()
+		  || _M_find_node(__bkt, __k, __code) == nullptr)
 		{
 		  auto __nh = __src.extract(__pos);
 		  _M_insert_unique_node(__bkt, __code, __nh._M_ptr, __n_elt);
@@ -1737,6 +1778,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_find_tr(const _Kt& __k)
       -> iterator
       {
+	if (size() <= __small_size_threshold())
+	  {
+	    for (auto __n = _M_begin(); __n; __n = __n->_M_next())
+	      if (this->_M_key_equals_tr(__k, *__n))
+		return iterator(__n);
+	    return end();
+	  }
+
 	__hash_code __code = this->_M_hash_code_tr(__k);
 	std::size_t __bkt = _M_bucket_index(__code);
 	return iterator(_M_find_node_tr(__bkt, __k, __code));
@@ -1753,6 +1802,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_find_tr(const _Kt& __k) const
       -> const_iterator
       {
+	if (size() <= __small_size_threshold())
+	  {
+	    for (auto __n = _M_begin(); __n; __n = __n->_M_next())
+	      if (this->_M_key_equals_tr(__k, *__n))
+		return const_iterator(__n);
+	    return end();
+	  }
+
 	__hash_code __code = this->_M_hash_code_tr(__k);
 	std::size_t __bkt = _M_bucket_index(__code);
 	return const_iterator(_M_find_node_tr(__bkt, __k, __code));
@@ -1776,9 +1833,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       if (__unique_keys::value)
 	return 1;
 
-      // All equivalent values are next to each other, if we find a
-      // non-equivalent value after an equivalent one it means that we won't
-      // find any new equivalent value.
       size_type __result = 1;
       for (auto __ref = __it++;
 	   __it._M_cur && this->_M_node_equals(*__ref._M_cur, *__it._M_cur);
@@ -1800,15 +1854,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_count_tr(const _Kt& __k) const
       -> size_type
       {
+	if (size() <= __small_size_threshold())
+	  {
+	    size_type __result = 0;
+	    for (auto __n = _M_begin(); __n; __n = __n->_M_next())
+	      {
+		if (this->_M_key_equals_tr(__k, *__n))
+		  {
+		    ++__result;
+		    continue;
+		  }
+
+		if (__result)
+		  break;
+	      }
+
+	    return __result;
+	  }
+
 	__hash_code __code = this->_M_hash_code_tr(__k);
 	std::size_t __bkt = _M_bucket_index(__code);
 	auto __n = _M_find_node_tr(__bkt, __k, __code);
 	if (!__n)
 	  return 0;
 
-	// All equivalent values are next to each other, if we find a
-	// non-equivalent value after an equivalent one it means that we won't
-	// find any new equivalent value.
 	iterator __it(__n);
 	size_type __result = 1;
 	for (++__it;
@@ -1838,9 +1907,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       if (__unique_keys::value)
 	return { __beg, __ite };
 
-      // All equivalent values are next to each other, if we find a
-      // non-equivalent value after an equivalent one it means that we won't
-      // find any new equivalent value.
       while (__ite._M_cur && this->_M_node_equals(*__beg._M_cur, *__ite._M_cur))
 	++__ite;
 
@@ -1865,9 +1931,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       if (__unique_keys::value)
 	return { __beg, __ite };
 
-      // All equivalent values are next to each other, if we find a
-      // non-equivalent value after an equivalent one it means that we won't
-      // find any new equivalent value.
       while (__ite._M_cur && this->_M_node_equals(*__beg._M_cur, *__ite._M_cur))
 	++__ite;
 
@@ -1886,6 +1949,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_equal_range_tr(const _Kt& __k)
       -> pair<iterator, iterator>
       {
+	if (size() <= __small_size_threshold())
+	  {
+	    __node_ptr __n, __beg = nullptr;
+	    for (__n = _M_begin(); __n; __n = __n->_M_next())
+	      {
+		if (this->_M_key_equals_tr(__k, *__n))
+		  {
+		    if (!__beg)
+		      __beg = __n;
+		    continue;
+		  }
+
+		if (__beg)
+		  break;
+	      }
+
+	    return { iterator(__beg), iterator(__n) };
+	  }
+
 	__hash_code __code = this->_M_hash_code_tr(__k);
 	std::size_t __bkt = _M_bucket_index(__code);
 	auto __n = _M_find_node_tr(__bkt, __k, __code);
@@ -1893,9 +1975,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	if (!__n)
 	  return { __ite, __ite };
 
-	// All equivalent values are next to each other, if we find a
-	// non-equivalent value after an equivalent one it means that we won't
-	// find any new equivalent value.
 	auto __beg = __ite++;
 	while (__ite._M_cur && this->_M_equals_tr(__k, __code, *__ite._M_cur))
 	  ++__ite;
@@ -1914,6 +1993,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_equal_range_tr(const _Kt& __k) const
       -> pair<const_iterator, const_iterator>
       {
+	if (size() <= __small_size_threshold())
+	  {
+	    __node_ptr __n, __beg = nullptr;
+	    for (__n = _M_begin(); __n; __n = __n->_M_next())
+	      {
+		if (this->_M_key_equals_tr(__k, *__n))
+		  {
+		    if (!__beg)
+		      __beg = __n;
+		    continue;
+		  }
+
+		if (__beg)
+		  break;
+	      }
+
+	    return { const_iterator(__beg), const_iterator(__n) };
+	  }
+
 	__hash_code __code = this->_M_hash_code_tr(__k);
 	std::size_t __bkt = _M_bucket_index(__code);
 	auto __n = _M_find_node_tr(__bkt, __k, __code);
@@ -1921,9 +2019,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	if (!__n)
 	  return { __ite, __ite };
 
-	// All equivalent values are next to each other, if we find a
-	// non-equivalent value after an equivalent one it means that we won't
-	// find any new equivalent value.
 	auto __beg = __ite++;
 	while (__ite._M_cur && this->_M_equals_tr(__k, __code, *__ite._M_cur))
 	  ++__ite;
@@ -2052,7 +2147,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	// First build the node to get access to the hash code
 	_Scoped_node __node { this, std::forward<_Args>(__args)...  };
 	const key_type& __k = _ExtractKey{}(__node._M_node->_M_v());
-	if (size() <= __small_size_threshold())
+	const size_type __size = size();
+	if (__size <= __small_size_threshold())
 	  {
 	    for (auto __it = _M_begin(); __it; __it = __it->_M_next())
 	      if (this->_M_key_equals(__k, *__it))
@@ -2062,7 +2158,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 	__hash_code __code = this->_M_hash_code(__k);
 	size_type __bkt = _M_bucket_index(__code);
-	if (size() > __small_size_threshold())
+	if (__size > __small_size_threshold())
 	  if (__node_ptr __p = _M_find_node(__bkt, __k, __code))
 	    // There is already an equivalent node, no insertion
 	    return { iterator(__p), false };
@@ -2225,7 +2321,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		       const _NodeGenerator& __node_gen)
       -> pair<iterator, bool>
       {
-	if (size() <= __small_size_threshold())
+	const size_type __size = size();
+	if (__size <= __small_size_threshold())
 	  for (auto __it = _M_begin(); __it; __it = __it->_M_next())
 	    if (this->_M_key_equals_tr(__k, *__it))
 	      return { iterator(__it), false };
@@ -2233,7 +2330,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	__hash_code __code = this->_M_hash_code_tr(__k);
 	size_type __bkt = _M_bucket_index(__code);
 
-	if (size() > __small_size_threshold())
+	if (__size > __small_size_threshold())
 	  if (__node_ptr __node = _M_find_node_tr(__bkt, __k, __code))
 	    return { iterator(__node), false };