public inbox for libstdc++@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] PR 83938 Reduce memory consumption in stable_sort/inplace_merge
@ 2020-06-11  6:32 François Dumont
  2020-06-22  6:27 ` François Dumont
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: François Dumont @ 2020-06-11  6:32 UTC (permalink / raw)
  To: libstdc++, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 2124 bytes --]

As we are on patching algos we still have this old one.

     From the original patch I only kept the memory optimization part as 
the new performance test was not showing good result for the other part 
to change pivot value. I also kept the small change in 
get_temporary_buffer even if I don't have strong feeling about it, it 
just make sure that we'll try to allocate 1 element as a last chance 
allocation.

     Note that there is still place for an improvement. If we miss 
memory on the heap we then use a recursive implementation which then 
rely on stack memory. I would be surprise that a system which miss heap 
memory would have no problem to allocate about the same on the stack so 
we will surely end up in a stack overflow. I still have this on my todo 
even if I already made several tries with no satisfying result in terms 
of performance.

     Tested under Linux x86_64.

Commit message:

     libstdc++: Limit memory allocation in stable_sort/inplace_merge (PR 
83938)

     Reduce memory consumption in stable_sort/inplace_merge to what is used.

     Co-authored-by: François Dumont  <fdumont@gcc.gnu.org>

     libstdc++-v3/ChangeLog:

     2020-06-11  John Chang  <john.chang@samba.tv>
                 François Dumont  <fdumont@gcc.gnu.org>

             PR libstdc++/83938
             * include/bits/stl_tempbuf.h (get_temporary_buffer): Change 
__len
             computation in the loop.
             * include/bits/stl_algo.h:
             (__inplace_merge): Take temporary buffer length from 
smallest range.
             (__stable_sort): Limit temporary buffer length.
             * testsuite/25_algorithms/inplace_merge/1.cc (test03): Test 
different
             pivot positions.
             * testsuite/performance/25_algorithms/stable_sort.cc: Test 
stable_sort
             under different heap memory conditions.
             * testsuite/performance/25_algorithms/inplace_merge.cc: New.

Ok to commit ?

François


[-- Attachment #2: pr83938.patch --]
[-- Type: text/x-patch, Size: 16082 bytes --]

diff --git a/libstdc++-v3/include/bits/stl_algo.h b/libstdc++-v3/include/bits/stl_algo.h
index fd6edd0d5f4..44798ffaa7f 100644
--- a/libstdc++-v3/include/bits/stl_algo.h
+++ b/libstdc++-v3/include/bits/stl_algo.h
@@ -2531,7 +2531,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       const _DistanceType __len2 = std::distance(__middle, __last);
 
       typedef _Temporary_buffer<_BidirectionalIterator, _ValueType> _TmpBuf;
-      _TmpBuf __buf(__first, __len1 + __len2);
+      _TmpBuf __buf(__first, std::min(__len1, __len2));
 
       if (__buf.begin() == 0)
 	std::__merge_without_buffer
@@ -2740,6 +2740,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  std::__merge_sort_with_buffer(__first, __middle, __buffer, __comp);
 	  std::__merge_sort_with_buffer(__middle, __last, __buffer, __comp);
 	}
+
       std::__merge_adaptive(__first, __middle, __last,
 			    _Distance(__middle - __first),
 			    _Distance(__last - __middle),
@@ -5006,8 +5007,11 @@ _GLIBCXX_BEGIN_NAMESPACE_ALGO
       typedef typename iterator_traits<_RandomAccessIterator>::difference_type
 	_DistanceType;
 
+      if (__first == __last)
+	return;
+
       typedef _Temporary_buffer<_RandomAccessIterator, _ValueType> _TmpBuf;
-      _TmpBuf __buf(__first, std::distance(__first, __last));
+      _TmpBuf __buf(__first, (__last - __first + 1) / 2);
 
       if (__buf.begin() == 0)
 	std::__inplace_stable_sort(__first, __last, __comp);
diff --git a/libstdc++-v3/include/bits/stl_tempbuf.h b/libstdc++-v3/include/bits/stl_tempbuf.h
index f6f17960472..d76ed7f7ea6 100644
--- a/libstdc++-v3/include/bits/stl_tempbuf.h
+++ b/libstdc++-v3/include/bits/stl_tempbuf.h
@@ -110,7 +110,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 							std::nothrow));
 	  if (__tmp != 0)
 	    return std::pair<_Tp*, ptrdiff_t>(__tmp, __len);
-	  __len /= 2;
+	  __len = __len == 1 ? 0 : ((__len + 1) / 2);
 	}
       return std::pair<_Tp*, ptrdiff_t>(static_cast<_Tp*>(0), 0);
     }
diff --git a/libstdc++-v3/testsuite/25_algorithms/inplace_merge/1.cc b/libstdc++-v3/testsuite/25_algorithms/inplace_merge/1.cc
index 5859f0363d5..7f78687d0aa 100644
--- a/libstdc++-v3/testsuite/25_algorithms/inplace_merge/1.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/inplace_merge/1.cc
@@ -28,7 +28,7 @@ using std::inplace_merge;
 
 typedef test_container<int, bidirectional_iterator_wrapper> container;
 
-void 
+void
 test1()
 {
   int array[] = { 1 };
@@ -39,7 +39,7 @@ test1()
   inplace_merge(con2.begin(), con2.begin(), con2.end());
 }
 
-void 
+void
 test2()
 {
   int array[] = { 0, 2, 4, 1, 3, 5 };
@@ -60,30 +60,34 @@ struct S
   { return a < _s.a; }
 };
 
-void 
+void
 test3()
 {
   S s[8];
-  s[0].a = 0;
-  s[1].a = 1;
-  s[2].a = 2;
-  s[3].a = 3;
-  s[4].a = 0;
-  s[5].a = 1;
-  s[6].a = 2;
-  s[7].a = 3;
-
-  s[0].b = 0;
-  s[1].b = 1;
-  s[2].b = 2;
-  s[3].b = 3;
-  s[4].b = 4;
-  s[5].b = 5;
-  s[6].b = 6;
-  s[7].b = 7;
-
-  inplace_merge(s, s + 4, s + 8);
-  VERIFY( s[0].b == 0 && s[1].b == 4 && s[2].b == 1 && s[3].b == 5 );
+  for (int pivot_idx = 0; pivot_idx < 8; ++pivot_idx)
+    {
+      int bval = 0;
+      for (int i = 0; i != pivot_idx; ++i)
+	{
+	  s[i].a = i;
+	  s[i].b = bval++;
+	}
+
+      for (int i = pivot_idx; i != 8; ++i)
+	{
+	  s[i].a = i - pivot_idx;
+	  s[i].b = bval++;
+	}
+
+      inplace_merge(s, s + pivot_idx, s + 8);
+
+      for (int i = 1; i < 8; ++i)
+	{
+	  VERIFY( !(s[i] < s[i - 1]) );
+	  if (s[i - 1].a == s[i].a)
+	    VERIFY( s[i - 1].b < s[i].b );
+	}
+    }
 }
 
 int 
diff --git a/libstdc++-v3/testsuite/performance/25_algorithms/inplace_merge.cc b/libstdc++-v3/testsuite/performance/25_algorithms/inplace_merge.cc
new file mode 100644
index 00000000000..780c21912c7
--- /dev/null
+++ b/libstdc++-v3/testsuite/performance/25_algorithms/inplace_merge.cc
@@ -0,0 +1,290 @@
+// Copyright (C) 2020 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+#include <vector>
+#include <algorithm>
+#include <cmath>
+
+#include <testsuite_new_operators.h>
+#include <testsuite_performance.h>
+
+const int max_size = 10000000;
+const int small_size = 200000;
+const int front_pivot_idx = 10000;
+int middle_pivot_idx = max_size / 2;
+int back_pivot_idx = max_size - front_pivot_idx;
+
+void bench(int mem_threshold, int pivot_index,
+	   std::vector<int> revv,
+	   std::vector<int> fwdv,
+	   std::vector<int> wstv,
+	   std::vector<int> rndv)
+{
+  using namespace __gnu_test;
+
+  time_counter time;
+  resource_counter resource;
+
+  set_new_limit(mem_threshold);
+
+  start_counters(time, resource);
+  std::inplace_merge(revv.begin(), revv.begin() + pivot_index, revv.end());
+  stop_counters(time, resource);
+
+  set_new_limit(~size_t(0));
+
+  report_performance(__FILE__, "reverse", time, resource);
+  clear_counters(time, resource);
+
+  set_new_limit(mem_threshold);
+
+  start_counters(time, resource);
+  std::inplace_merge(fwdv.begin(), fwdv.begin() + pivot_index, fwdv.end());
+  stop_counters(time, resource);
+
+  set_new_limit(~size_t(0));
+
+  report_performance(__FILE__, "forward", time, resource);
+  clear_counters(time, resource);
+
+  set_new_limit(mem_threshold);
+
+  start_counters(time, resource);
+  std::inplace_merge(wstv.begin(), wstv.begin() + pivot_index, wstv.end());
+  stop_counters(time, resource);
+
+  set_new_limit(~size_t(0));
+
+  report_performance(__FILE__, "worst", time, resource);
+  clear_counters(time, resource);
+
+  set_new_limit(mem_threshold);
+
+  start_counters(time, resource);
+  std::inplace_merge(rndv.begin(), rndv.begin() + pivot_index, rndv.end());
+  stop_counters(time, resource);
+
+  set_new_limit(~size_t(0));
+  report_performance(__FILE__, "random", time, resource);
+}
+
+void mem_bench(double mem_ratio,
+	       const std::vector<int>& front_revv,
+	       const std::vector<int>& middle_revv,
+	       const std::vector<int>& back_revv,
+	       const std::vector<int>& fwdv,
+	       const std::vector<int>& front_wstv,
+	       const std::vector<int>& middle_wstv,
+	       const std::vector<int>& back_wstv,
+	       const std::vector<int>& front_rndv,
+	       const std::vector<int>& middle_rndv,
+	       const std::vector<int>& back_rndv)
+{
+  using namespace __gnu_test;
+
+  time_counter time;
+  resource_counter resource;
+
+  int max_mem = (int)std::ceil(front_pivot_idx * mem_ratio) * sizeof(int);
+  start_counters(time, resource);
+  bench(max_mem, front_pivot_idx, front_revv, fwdv, front_wstv, front_rndv);
+  stop_counters(time, resource);
+  report_performance(__FILE__, "front pivot", time, resource);
+  clear_counters(time, resource);
+
+  max_mem = (int)std::ceil(middle_pivot_idx * mem_ratio) * sizeof(int);
+  start_counters(time, resource);
+  bench(max_mem, middle_pivot_idx, middle_revv, fwdv, middle_wstv, middle_rndv);
+  stop_counters(time, resource);
+  report_performance(__FILE__, "middle pivot", time, resource);
+  clear_counters(time, resource);
+
+  max_mem = (int)std::ceil(front_pivot_idx * mem_ratio) * sizeof(int);
+  start_counters(time, resource);
+  bench(max_mem, back_pivot_idx, back_revv, fwdv, back_wstv, back_rndv);
+  stop_counters(time, resource);
+  report_performance(__FILE__, "back pivot", time, resource);
+}
+
+void init_reverse(std::vector<int>& v, size_t pivot_index)
+{
+  int val = 0;
+  for (size_t i = pivot_index; i != v.size(); ++i)
+    v[i] = val++;
+  for (size_t i = 0; i != pivot_index; ++i)
+    v[i] = val++;
+}
+
+void init_forward(std::vector<int>& v)
+{
+  int val = 0;
+  for (size_t i = 0; i != v.size(); ++i)
+    v[i] = val++;
+}
+
+void init_worst(std::vector<int>& v, size_t pivot_index)
+{
+  int val = 0;
+  if (pivot_index + 1 > v.size() / 2)
+    {
+      for (size_t i = 0; i != pivot_index; val += 2, ++i)
+	v[i] = val;
+      val = 1;
+    }
+  else
+    {
+      for (size_t i = pivot_index; i != v.size(); val += 2, ++i)
+	v[i] = val;
+      val -= pivot_index * 2 + 1;
+    }
+
+  if (pivot_index + 1 > v.size() / 2)
+    for (size_t i = pivot_index; i != v.size(); val += 2, ++i)
+      v[i] = val;
+  else
+    for (size_t i = 0; i != pivot_index; val += 2, ++i)
+      v[i] = val;
+}
+
+void init_random(std::vector<int>& v)
+{
+  // a simple pseudo-random series which does not rely on rand() and friends
+  v[0] = 0;
+  for (size_t i = 1; i != v.size(); ++i)
+    v[i] = (v[i-1] + 110211473) * 745988807;
+}
+
+void reduce_size(std::vector<int>& front_v,
+		 std::vector<int>& middle_v,
+		 std::vector<int>& back_v)
+{
+  front_v.erase(front_v.begin() + front_pivot_idx,
+		front_v.end() - back_pivot_idx);
+  middle_v.erase(middle_v.begin() + small_size / 2,
+		 middle_v.end() - small_size / 2);
+  back_v.erase(back_v.begin() + back_pivot_idx,
+	       back_v.end() - front_pivot_idx);
+}
+
+int main()
+{
+  using namespace __gnu_test;
+
+  // No constraint to build vectors.
+  set_new_limit(~size_t(0));
+
+  std::vector<int> front_revv(max_size);
+  init_reverse(front_revv, front_pivot_idx);
+
+  std::vector<int> middle_revv(max_size);
+  init_reverse(middle_revv, middle_pivot_idx);
+
+  std::vector<int> back_revv(max_size);
+  init_reverse(back_revv, back_pivot_idx);
+
+  std::vector<int> fwdv(max_size);
+  init_forward(fwdv);
+
+  std::vector<int> front_wstv(max_size);
+  init_worst(front_wstv, front_pivot_idx);
+
+  std::vector<int> middle_wstv(max_size);
+  init_worst(middle_wstv, middle_pivot_idx);
+
+  std::vector<int> back_wstv(max_size);
+  init_worst(back_wstv, back_pivot_idx);
+
+  std::vector<int> front_rndv(max_size);
+  init_random(front_rndv);
+  std::vector<int> middle_rndv(front_rndv);
+  std::vector<int> back_rndv(front_rndv);
+
+  sort(front_rndv.begin(), front_rndv.begin() + front_pivot_idx);
+  sort(front_rndv.begin() + front_pivot_idx, front_rndv.end());
+
+  sort(middle_rndv.begin(), middle_rndv.begin() + middle_pivot_idx);
+  sort(middle_rndv.begin() + middle_pivot_idx, middle_rndv.end());
+
+  sort(back_rndv.begin(), back_rndv.begin() + back_pivot_idx);
+  sort(back_rndv.begin() + back_pivot_idx, back_rndv.end());
+
+  time_counter time;
+  resource_counter resource;
+
+  start_counters(time, resource);
+
+  // No limit.
+  mem_bench(1.0,
+	    front_revv, middle_revv, back_revv,
+	    fwdv,
+	    front_wstv, middle_wstv, back_wstv,
+	    front_rndv, middle_rndv, back_rndv);
+
+  stop_counters(time, resource);
+
+  report_performance(__FILE__, "bench 1 / 1 memory", time, resource);
+  clear_counters(time, resource);
+
+  start_counters(time, resource);
+
+  // Limit to the fourth.
+  mem_bench(1.0 / 4,
+	    front_revv, middle_revv, back_revv,
+	    fwdv,
+	    front_wstv, middle_wstv, back_wstv,
+	    front_rndv, middle_rndv, back_rndv);
+
+  stop_counters(time, resource);
+
+  report_performance(__FILE__, "bench 1 / 4 memory", time, resource);
+  clear_counters(time, resource);
+
+  start_counters(time, resource);
+
+  // Really limit allocation.
+  mem_bench(1.0 / 64,
+	    front_revv, middle_revv, back_revv,
+	    fwdv,
+	    front_wstv, middle_wstv, back_wstv,
+	    front_rndv, middle_rndv, back_rndv);
+
+  stop_counters(time, resource);
+
+  report_performance(__FILE__, "bench 1 /64 memory", time, resource);
+  clear_counters(time, resource);
+
+  middle_pivot_idx = small_size / 2;
+  back_pivot_idx = small_size - front_pivot_idx;
+  reduce_size(front_revv, middle_revv, back_revv);
+  fwdv.resize(small_size);
+  reduce_size(front_wstv, middle_wstv, back_wstv);
+  reduce_size(front_rndv, middle_rndv, back_rndv);
+
+  start_counters(time, resource);
+
+  // No memory.
+  mem_bench(0.0,
+	    front_revv, middle_revv, back_revv,
+	    fwdv,
+	    front_wstv, middle_wstv, back_wstv,
+	    front_rndv, middle_rndv, back_rndv);
+
+  stop_counters(time, resource);
+
+  report_performance(__FILE__, "bench 0 / 1 memory", time, resource);
+  return 0;
+}
diff --git a/libstdc++-v3/testsuite/performance/25_algorithms/stable_sort.cc b/libstdc++-v3/testsuite/performance/25_algorithms/stable_sort.cc
index 02b869dd195..fe526638aaf 100644
--- a/libstdc++-v3/testsuite/performance/25_algorithms/stable_sort.cc
+++ b/libstdc++-v3/testsuite/performance/25_algorithms/stable_sort.cc
@@ -17,49 +17,107 @@
 
 #include <vector>
 #include <algorithm>
+
+#include <testsuite_new_operators.h>
 #include <testsuite_performance.h>
 
-int main()
+const int max_size = 10000000;
+const int small_size = 200000;
+
+void bench(size_t mem_threshold,
+	   std::vector<int> revv,
+	   std::vector<int> fwdv,
+	   std::vector<int> rndv)
 {
   using namespace __gnu_test;
 
   time_counter time;
   resource_counter resource;
 
-  const int max_size = 10000000;
-
-  std::vector<int> v(max_size);
-
-  for (int i = 0; i < max_size; ++i)
-    v[i] = -i;
+  set_new_limit(mem_threshold);
 
   start_counters(time, resource);
-  std::stable_sort(v.begin(), v.end());
+  std::stable_sort(revv.begin(), revv.end());
   stop_counters(time, resource);
 
+  set_new_limit(~size_t(0));
   report_performance(__FILE__, "reverse", time, resource);
   clear_counters(time, resource);
 
-  for (int i = 0; i < max_size; ++i)
-    v[i] = i;
+  set_new_limit(mem_threshold);
 
   start_counters(time, resource);
-  std::stable_sort(v.begin(), v.end());
+  std::stable_sort(fwdv.begin(), fwdv.end());
   stop_counters(time, resource);
 
+  set_new_limit(~size_t(0));
   report_performance(__FILE__, "forwards", time, resource);
   clear_counters(time, resource);
 
-  // a simple psuedo-random series which does not rely on rand() and friends
-  v[0] = 0;
+  start_counters(time, resource);
+  std::stable_sort(rndv.begin(), rndv.end());
+  stop_counters(time, resource);
+
+  set_new_limit(~size_t(0));
+  report_performance(__FILE__, "random", time, resource);
+}
+
+int main()
+{
+  using namespace __gnu_test;
+
+  // No memory constraint.
+  set_new_limit(~size_t(0));
+
+  std::vector<int> revv(max_size);
+  for (int i = 0; i < max_size; ++i)
+    revv[i] = -i;
+
+  std::vector<int> fwdv(max_size);
+  for (int i = 0; i < max_size; ++i)
+    fwdv[i] = i;
+
+  // a simple pseudo-random series which does not rely on rand() and friends
+  std::vector<int> rndv(max_size);
+  rndv[0] = 0;
   for (int i = 1; i < max_size; ++i)
-    v[i] = (v[i-1] + 110211473) * 745988807;
+    rndv[i] = (rndv[i-1] + 110211473) * 745988807;
+
+  time_counter time;
+  resource_counter resource;
 
   start_counters(time, resource);
-  std::stable_sort(v.begin(), v.end());
+  bench(~size_t(0), revv, fwdv, rndv);
   stop_counters(time, resource);
 
-  report_performance(__FILE__, "random", time, resource);
+  report_performance(__FILE__, "bench 1 / 1 memory", time, resource);
+  clear_counters(time, resource);
+
+  start_counters(time, resource);
+  // Limit to fourth the expected size of the sorted array.
+  bench(max_size * sizeof(int) / 4, revv, fwdv, rndv);
+  stop_counters(time, resource);
+
+  report_performance(__FILE__, "bench 1 / 4 memory", time, resource);
+  clear_counters(time, resource);
+
+  start_counters(time, resource);
+  // Limit to 1/64 of range size.
+  bench(max_size * sizeof(int) / 64, revv, fwdv, rndv);
+  stop_counters(time, resource);
+
+  report_performance(__FILE__, "bench 1 /64 memory", time, resource);
+  clear_counters(time, resource);
+
+  revv.resize(small_size);
+  fwdv.resize(small_size);
+  rndv.resize(small_size);
+
+  start_counters(time, resource);
+  // Forbid any allocation.
+  bench(0, revv, fwdv, rndv);
+  stop_counters(time, resource);
 
+  report_performance(__FILE__, "bench 0 / 1 memory", time, resource);
   return 0;
 }

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-11-19 12:26 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-11  6:32 [PATCH] PR 83938 Reduce memory consumption in stable_sort/inplace_merge François Dumont
2020-06-22  6:27 ` François Dumont
2020-06-24 17:39 ` Jonathan Wakely
2020-06-24 20:12   ` François Dumont
2020-11-18 20:43   ` François Dumont
2020-11-19 12:26 ` Jonathan Wakely

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).