public inbox for libstdc++-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/wschmidt/heads/builtins3)] libstdc++: Do not over-size hashtable buckets on range insertion
@ 2020-08-18 18:12 William Schmidt
  0 siblings, 0 replies; only message in thread
From: William Schmidt @ 2020-08-18 18:12 UTC (permalink / raw)
  To: gcc-cvs, libstdc++-cvs

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="us-ascii", Size: 17331 bytes --]

https://gcc.gnu.org/g:6dcf042368012e2d7ce1626ee5d378bf3ad0ccfc

commit 6dcf042368012e2d7ce1626ee5d378bf3ad0ccfc
Author: François Dumont <fdumont@gcc.gnu.org>
Date:   Mon Jan 20 19:01:18 2020 +0100

    libstdc++: Do not over-size hashtable buckets on range insertion
    
    We used to consider range size on insertion but on unique keys container
    not all range values might be inserted resulting in over-sizing. In this
    case we just consider user reservation and if none then the container will
    adapt to actually inserted elements.
    
    libstdc++-v3/ChangeLog:
    
            * include/bits/hashtable.h
            (_Hashtable<>(_InputIterator, _InputIterator, size_t, const _H1&,
            const _H2&, const _Hash&, const _Equal&, const _ExtractKey&,
            const allocator_type&, true_type)): New.
            (_Hashtable<>(_InputIterator, _InputIterator, size_t, const _H1&,
            const _H2&, const _Hash&, const _Equal&, const _ExtractKey&,
            const allocator_type&, false_type)): New.
            (_Hashtable<>(_InputIterator, _InputIterator, size_t, const _H1&,
            const _H2&, const _Hash&, const _Equal&, const _ExtractKey&,
            const allocator_type&)): Delegate to latters.
            (operator=(initializer_list<value_type>)): Rehash if too small.
            (_M_insert(_Arg&&, const _NodeGenerator&, true_type)): Remove
            size_t len parameter.
            * include/bits/hashtable_policy.h (_Insert_base<>::_M_insert_range):
            Do not try to get input range distance.
            * testsuite/23_containers/unordered_set/cons/bucket_hint.cc: New test.
            * testsuite/23_containers/unordered_set/modifiers/insert.cc: New test.

Diff:
---
 libstdc++-v3/include/bits/hashtable.h              | 90 ++++++++++++++++------
 libstdc++-v3/include/bits/hashtable_policy.h       | 36 ++++-----
 .../unordered_set/cons/bucket_hint.cc              | 63 +++++++++++++++
 .../unordered_set/modifiers/insert.cc              | 66 ++++++++++++++++
 4 files changed, 210 insertions(+), 45 deletions(-)

diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h
index 248dd62589b..9d1ad592553 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -460,6 +460,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	__hashtable_alloc(__node_alloc_type(__a))
       { }
 
+      template<typename _InputIterator>
+	_Hashtable(_InputIterator __first, _InputIterator __last,
+		   size_type __bkt_count_hint,
+		   const _H1&, const _H2&, const _Hash&,
+		   const _Equal&, const _ExtractKey&,
+		   const allocator_type&,
+		   true_type __uks);
+
+      template<typename _InputIterator>
+	_Hashtable(_InputIterator __first, _InputIterator __last,
+		   size_type __bkt_count_hint,
+		   const _H1&, const _H2&, const _Hash&,
+		   const _Equal&, const _ExtractKey&,
+		   const allocator_type&,
+		   false_type __uks);
+
     public:
       // Constructor, destructor, assignment, swap
       _Hashtable() = default;
@@ -468,13 +484,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		 const _Equal&, const _ExtractKey&,
 		 const allocator_type&);
 
-      template<typename _InputIterator>
-	_Hashtable(_InputIterator __first, _InputIterator __last,
-		   size_type __bkt_count_hint,
-		   const _H1&, const _H2&, const _Hash&,
-		   const _Equal&, const _ExtractKey&,
-		   const allocator_type&);
-
       _Hashtable(const _Hashtable&);
 
       _Hashtable(_Hashtable&&) noexcept;
@@ -484,6 +493,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _Hashtable(_Hashtable&&, const allocator_type&);
 
       // Use delegating constructors.
+      template<typename _InputIterator>
+	_Hashtable(_InputIterator __first, _InputIterator __last,
+		   size_type __bkt_count_hint,
+		   const _H1& __h1, const _H2& __h2, const _Hash& __h,
+		   const _Equal& __eq, const _ExtractKey& __exk,
+		   const allocator_type& __a)
+	: _Hashtable(__first, __last, __bkt_count_hint,
+		     __h1, __h2, __h, __eq, __exk, __a, __unique_keys{})
+	{ }
+
       explicit
       _Hashtable(const allocator_type& __a)
       : __hashtable_alloc(__node_alloc_type(__a))
@@ -540,7 +559,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	__reuse_or_alloc_node_gen_t __roan(_M_begin(), *this);
 	_M_before_begin._M_nxt = nullptr;
 	clear();
-	this->_M_insert_range(__l.begin(), __l.end(), __roan, __unique_keys());
+
+	// We consider that all elements of __l are going to be inserted.
+	auto __l_bkt_count = _M_rehash_policy._M_bkt_for_elements(__l.size());
+
+	// Do not shrink to keep potential user reservation.
+	if (_M_bucket_count < __l_bkt_count)
+	  rehash(__l_bkt_count);
+
+	this->_M_insert_range(__l.begin(), __l.end(), __roan, __unique_keys{});
 	return *this;
       }
 
@@ -749,41 +776,41 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       // Emplace with hint, useless when keys are unique.
       template<typename... _Args>
 	iterator
-	_M_emplace(const_iterator, true_type __uk, _Args&&... __args)
-	{ return _M_emplace(__uk, std::forward<_Args>(__args)...).first; }
+	_M_emplace(const_iterator, true_type __uks, _Args&&... __args)
+	{ return _M_emplace(__uks, std::forward<_Args>(__args)...).first; }
 
       template<typename... _Args>
 	iterator
-	_M_emplace(const_iterator, false_type, _Args&&... __args);
+	_M_emplace(const_iterator, false_type __uks, _Args&&... __args);
 
       template<typename _Arg, typename _NodeGenerator>
 	std::pair<iterator, bool>
-	_M_insert(_Arg&&, const _NodeGenerator&, true_type, size_type = 1);
+	_M_insert(_Arg&&, const _NodeGenerator&, true_type __uks);
 
       template<typename _Arg, typename _NodeGenerator>
 	iterator
 	_M_insert(_Arg&& __arg, const _NodeGenerator& __node_gen,
-		  false_type __uk)
+		  false_type __uks)
 	{
 	  return _M_insert(cend(), std::forward<_Arg>(__arg), __node_gen,
-			   __uk);
+			   __uks);
 	}
 
       // Insert with hint, not used when keys are unique.
       template<typename _Arg, typename _NodeGenerator>
 	iterator
 	_M_insert(const_iterator, _Arg&& __arg,
-		  const _NodeGenerator& __node_gen, true_type __uk)
+		  const _NodeGenerator& __node_gen, true_type __uks)
 	{
 	  return
-	    _M_insert(std::forward<_Arg>(__arg), __node_gen, __uk).first;
+	    _M_insert(std::forward<_Arg>(__arg), __node_gen, __uks).first;
 	}
 
       // Insert with hint when keys are not unique.
       template<typename _Arg, typename _NodeGenerator>
 	iterator
 	_M_insert(const_iterator, _Arg&&,
-		  const _NodeGenerator&, false_type);
+		  const _NodeGenerator&, false_type __uks);
 
       size_type
       _M_erase(true_type, const key_type&);
@@ -1032,7 +1059,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		 size_type __bkt_count_hint,
 		 const _H1& __h1, const _H2& __h2, const _Hash& __h,
 		 const _Equal& __eq, const _ExtractKey& __exk,
-		 const allocator_type& __a)
+		 const allocator_type& __a, true_type /* __uks */)
+      : _Hashtable(__bkt_count_hint, __h1, __h2, __h, __eq, __exk, __a)
+      {
+	for (; __f != __l; ++__f)
+	  this->insert(*__f);
+      }
+
+  template<typename _Key, typename _Value,
+	   typename _Alloc, typename _ExtractKey, typename _Equal,
+	   typename _H1, typename _H2, typename _Hash, typename _RehashPolicy,
+	   typename _Traits>
+    template<typename _InputIterator>
+      _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
+		 _H1, _H2, _Hash, _RehashPolicy, _Traits>::
+      _Hashtable(_InputIterator __f, _InputIterator __l,
+		 size_type __bkt_count_hint,
+		 const _H1& __h1, const _H2& __h2, const _Hash& __h,
+		 const _Equal& __eq, const _ExtractKey& __exk,
+		 const allocator_type& __a, false_type /* __uks */)
       : _Hashtable(__h1, __h2, __h, __eq, __exk, __a)
       {
 	auto __nb_elems = __detail::__distance_fw(__f, __l);
@@ -1802,8 +1847,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       auto
       _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
 		 _H1, _H2, _Hash, _RehashPolicy, _Traits>::
-      _M_insert(_Arg&& __v, const _NodeGenerator& __node_gen, true_type,
-		size_type __n_elt)
+      _M_insert(_Arg&& __v, const _NodeGenerator& __node_gen,
+		true_type /* __uks */)
       -> pair<iterator, bool>
       {
 	const key_type& __k = this->_M_extract()(__v);
@@ -1815,7 +1860,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 	_Scoped_node __node{ __node_gen(std::forward<_Arg>(__v)), this };
 	auto __pos
-	  = _M_insert_unique_node(__k, __bkt, __code, __node._M_node, __n_elt);
+	  = _M_insert_unique_node(__k, __bkt, __code, __node._M_node);
 	__node._M_node = nullptr;
 	return { __pos, true };
       }
@@ -1830,7 +1875,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
 		 _H1, _H2, _Hash, _RehashPolicy, _Traits>::
       _M_insert(const_iterator __hint, _Arg&& __v,
-		const _NodeGenerator& __node_gen, false_type)
+		const _NodeGenerator& __node_gen,
+		false_type /* __uks */)
       -> iterator
       {
 	// First compute the hash code so that we don't do anything if it
diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h
index a91ae21a906..26ea9bf29ce 100644
--- a/libstdc++-v3/include/bits/hashtable_policy.h
+++ b/libstdc++-v3/include/bits/hashtable_policy.h
@@ -820,12 +820,12 @@ namespace __detail
       template<typename _InputIterator, typename _NodeGetter>
 	void
 	_M_insert_range(_InputIterator __first, _InputIterator __last,
-			const _NodeGetter&, true_type);
+			const _NodeGetter&, true_type __uks);
 
       template<typename _InputIterator, typename _NodeGetter>
 	void
 	_M_insert_range(_InputIterator __first, _InputIterator __last,
-			const _NodeGetter&, false_type);
+			const _NodeGetter&, false_type __uks);
 
     public:
       __ireturn_type
@@ -833,7 +833,7 @@ namespace __detail
       {
 	__hashtable& __h = _M_conjure_hashtable();
 	__node_gen_type __node_gen(__h);
-	return __h._M_insert(__v, __node_gen, __unique_keys());
+	return __h._M_insert(__v, __node_gen, __unique_keys{});
       }
 
       iterator
@@ -841,7 +841,7 @@ namespace __detail
       {
 	__hashtable& __h = _M_conjure_hashtable();
 	__node_gen_type __node_gen(__h);	
-	return __h._M_insert(__hint, __v, __node_gen, __unique_keys());
+	return __h._M_insert(__hint, __v, __node_gen, __unique_keys{});
       }
 
       template<typename _KType, typename... _Args>
@@ -876,7 +876,7 @@ namespace __detail
 	{
 	  __hashtable& __h = _M_conjure_hashtable();
 	  __node_gen_type __node_gen(__h);
-	  return _M_insert_range(__first, __last, __node_gen, __unique_keys());
+	  return _M_insert_range(__first, __last, __node_gen, __unique_keys{});
 	}
     };
 
@@ -889,21 +889,11 @@ namespace __detail
       _Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash,
 		    _RehashPolicy, _Traits>::
       _M_insert_range(_InputIterator __first, _InputIterator __last,
-		      const _NodeGetter& __node_gen, true_type)
+		      const _NodeGetter& __node_gen, true_type __uks)
       {
-	size_type __n_elt = __detail::__distance_fw(__first, __last);
-	if (__n_elt == 0)
-	  return;
-
 	__hashtable& __h = _M_conjure_hashtable();
 	for (; __first != __last; ++__first)
-	  {
-	    if (__h._M_insert(*__first, __node_gen, __unique_keys(),
-			      __n_elt).second)
-	      __n_elt = 1;
-	    else if (__n_elt != 1)
-	      --__n_elt;
-	  }
+	  __h._M_insert(*__first, __node_gen, __uks);
       }
 
   template<typename _Key, typename _Value, typename _Alloc,
@@ -915,7 +905,7 @@ namespace __detail
       _Insert_base<_Key, _Value, _Alloc, _ExtractKey, _Equal, _H1, _H2, _Hash,
 		    _RehashPolicy, _Traits>::
       _M_insert_range(_InputIterator __first, _InputIterator __last,
-		      const _NodeGetter& __node_gen, false_type)
+		      const _NodeGetter& __node_gen, false_type __uks)
       {
 	using __rehash_type = typename __hashtable::__rehash_type;
 	using __rehash_state = typename __hashtable::__rehash_state;
@@ -936,7 +926,7 @@ namespace __detail
 	  __h._M_rehash(__do_rehash.second, __saved_state);
 
 	for (; __first != __last; ++__first)
-	  __h._M_insert(*__first, __node_gen, __unique_keys());
+	  __h._M_insert(*__first, __node_gen, __uks);
       }
 
   /**
@@ -986,7 +976,7 @@ namespace __detail
       {
 	__hashtable& __h = this->_M_conjure_hashtable();
 	__node_gen_type __node_gen(__h);
-	return __h._M_insert(std::move(__v), __node_gen, __unique_keys());
+	return __h._M_insert(std::move(__v), __node_gen, __unique_keys{});
       }
 
       iterator
@@ -995,7 +985,7 @@ namespace __detail
 	__hashtable& __h = this->_M_conjure_hashtable();
 	__node_gen_type __node_gen(__h);
 	return __h._M_insert(__hint, std::move(__v), __node_gen,
-			     __unique_keys());
+			     __unique_keys{});
       }
     };
 
@@ -1036,7 +1026,7 @@ namespace __detail
 	insert(_Pair&& __v)
 	{
 	  __hashtable& __h = this->_M_conjure_hashtable();
-	  return __h._M_emplace(__unique_keys(), std::forward<_Pair>(__v));
+	  return __h._M_emplace(__unique_keys{}, std::forward<_Pair>(__v));
 	}
 
       template<typename _Pair, typename = _IFconsp<_Pair>>
@@ -1044,7 +1034,7 @@ namespace __detail
 	insert(const_iterator __hint, _Pair&& __v)
 	{
 	  __hashtable& __h = this->_M_conjure_hashtable();
-	  return __h._M_emplace(__hint, __unique_keys(),
+	  return __h._M_emplace(__hint, __unique_keys{},
 				std::forward<_Pair>(__v));
 	}
    };
diff --git a/libstdc++-v3/testsuite/23_containers/unordered_set/cons/bucket_hint.cc b/libstdc++-v3/testsuite/23_containers/unordered_set/cons/bucket_hint.cc
new file mode 100644
index 00000000000..100eb5a27df
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/unordered_set/cons/bucket_hint.cc
@@ -0,0 +1,63 @@
+// { dg-do run { target c++11 } }
+
+// Copyright (C) 2020 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+#include <vector>
+#include <forward_list>
+#include <unordered_set>
+
+#include <testsuite_hooks.h>
+
+void test01()
+{
+  std::unordered_set<int> a;
+  a.reserve(2);
+
+  std::unordered_set<int> b({ 0, 1, 0, 1, 0, 1, 0, 1 }, a.bucket_count());
+  VERIFY( b.bucket_count() == a.bucket_count() );
+}
+
+void test02()
+{
+  std::unordered_set<int> a;
+  a.reserve(2);
+
+  std::vector<int> v { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 };
+
+  std::unordered_set<int> b(v.begin(), v.end(), a.bucket_count());
+  VERIFY( b.bucket_count() == a.bucket_count() );
+}
+
+void test03()
+{
+  std::unordered_set<int> a;
+  a.reserve(2);
+
+  std::forward_list<int> fl { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 };
+
+  std::unordered_set<int> b(fl.begin(), fl.end(), a.bucket_count());
+  VERIFY( b.bucket_count() == a.bucket_count() );
+}
+
+int main()
+{
+  test01();
+  test02();
+  test03();
+  return 0;
+}
diff --git a/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/insert.cc b/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/insert.cc
new file mode 100644
index 00000000000..acf01c73b1b
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/insert.cc
@@ -0,0 +1,66 @@
+// { dg-do run { target c++11 } }
+
+// Copyright (C) 2020 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+#include <vector>
+#include <forward_list>
+#include <unordered_set>
+
+#include <testsuite_hooks.h>
+
+void test01()
+{
+  std::unordered_set<int> a;
+  a.reserve(2);
+
+  auto bkt_count = a.bucket_count();
+  a.insert({ 0, 1, 0, 1, 0, 1, 0, 1 });
+  VERIFY( a.bucket_count() == bkt_count );
+}
+
+void test02()
+{
+  std::unordered_set<int> a;
+  a.reserve(2);
+
+  std::vector<int> v { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 };
+
+  auto bkt_count = a.bucket_count();
+  a.insert(v.begin(), v.end());
+  VERIFY( a.bucket_count() == bkt_count );
+}
+
+void test03()
+{
+  std::unordered_set<int> a;
+  a.reserve(2);
+
+  std::forward_list<int> fl { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 };
+
+  auto bkt_count = a.bucket_count();
+  a.insert(fl.begin(), fl.end());
+  VERIFY( a.bucket_count() == bkt_count );
+}
+
+int main()
+{
+  test01();
+  test02();
+  test03();
+  return 0;
+}


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-08-18 18:12 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-18 18:12 [gcc(refs/users/wschmidt/heads/builtins3)] libstdc++: Do not over-size hashtable buckets on range insertion William Schmidt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).