public inbox for gdb-patches@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 1/4] [COVER-LETTER] Use task size balancing in cooked index creation
@ 2022-07-22 17:03 Tom de Vries
  2022-07-22 17:03 ` [PATCH 2/4] [gdbsupport] Add task size parameter in parallel_for_each Tom de Vries
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Tom de Vries @ 2022-07-22 17:03 UTC (permalink / raw)
  To: gdb-patches

Reworking of "[gdbsupport] Use task size in parallel_for_each", submitted
earlier here (
https://sourceware.org/pipermail/gdb-patches/2022-July/190868.html ).

Note that this series assumes "Introduce gdb::make_function_view" as posted
here ( https://sourceware.org/pipermail/gdb-patches/2022-July/190992.html ).
---
 gdb/COVER-LETTER | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 gdb/COVER-LETTER

diff --git a/gdb/COVER-LETTER b/gdb/COVER-LETTER
new file mode 100644
index 00000000000..e69de29bb2d

base-commit: 0d119fdec7883d2e8ee59ee1369c96788f9b18a5
-- 
2.35.3


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 2/4] [gdbsupport] Add task size parameter in parallel_for_each
  2022-07-22 17:03 [PATCH 1/4] [COVER-LETTER] Use task size balancing in cooked index creation Tom de Vries
@ 2022-07-22 17:03 ` Tom de Vries
  2022-07-22 17:03 ` [PATCH 3/4] [gdb/symtab] Use task size in parallel_for_each in dwarf2_build_psymtabs_hard Tom de Vries
  2022-07-22 17:03 ` [PATCH 4/4] [gdb] Add unit test for gdb::sequential_for_each Tom de Vries
  2 siblings, 0 replies; 4+ messages in thread
From: Tom de Vries @ 2022-07-22 17:03 UTC (permalink / raw)
  To: gdb-patches

Add a task_size parameter to parallel_for_each, defaulting to nullptr, and use
the task size to distribute similarly-sized chunks to the threads.

Tested on x86_64-linux.
---
 gdb/unittests/parallel-for-selftests.c |  28 ++++++
 gdbsupport/parallel-for.h              | 113 ++++++++++++++++++++-----
 2 files changed, 119 insertions(+), 22 deletions(-)

diff --git a/gdb/unittests/parallel-for-selftests.c b/gdb/unittests/parallel-for-selftests.c
index 8a86b435fd3..6e341f64037 100644
--- a/gdb/unittests/parallel-for-selftests.c
+++ b/gdb/unittests/parallel-for-selftests.c
@@ -68,6 +68,34 @@ test (int n_threads)
 			  });
   SELF_CHECK (counter == 0);
 
+  auto task_size_max_ = [] (int iter)
+    {
+      return (size_t)SIZE_MAX;
+    };
+  auto task_size_max = gdb::make_function_view (task_size_max_);
+
+  counter = 0;
+  gdb::parallel_for_each (1, 0, NUMBER,
+			  [&] (int start, int end)
+			  {
+			    counter += end - start;
+			  }, task_size_max);
+  SELF_CHECK (counter == NUMBER);
+
+  auto task_size_one_ = [] (int iter)
+    {
+      return (size_t)1;
+    };
+  auto task_size_one = gdb::make_function_view (task_size_one_);
+
+  counter = 0;
+  gdb::parallel_for_each (1, 0, NUMBER,
+			  [&] (int start, int end)
+			  {
+			    counter += end - start;
+			  }, task_size_one);
+  SELF_CHECK (counter == NUMBER);
+
 #undef NUMBER
 }
 
diff --git a/gdbsupport/parallel-for.h b/gdbsupport/parallel-for.h
index 0037ee23ff3..4cd1dbf847e 100644
--- a/gdbsupport/parallel-for.h
+++ b/gdbsupport/parallel-for.h
@@ -23,6 +23,7 @@
 #include <algorithm>
 #include <type_traits>
 #include "gdbsupport/thread-pool.h"
+#include "gdbsupport/function-view.h"
 
 namespace gdb
 {
@@ -134,7 +135,8 @@ typename gdb::detail::par_for_accumulator<
     typename std::result_of<RangeFunction (RandomIt, RandomIt)>::type
   >::result_type
 parallel_for_each (unsigned n, RandomIt first, RandomIt last,
-		   RangeFunction callback)
+		   RangeFunction callback,
+		   gdb::function_view<size_t(RandomIt)> task_size = nullptr)
 {
   using result_type
     = typename std::result_of<RangeFunction (RandomIt, RandomIt)>::type;
@@ -148,17 +150,41 @@ parallel_for_each (unsigned n, RandomIt first, RandomIt last,
   size_t n_elements = last - first;
   size_t elts_per_thread = 0;
   size_t elts_left_over = 0;
+  size_t total_size = 0;
+  size_t size_per_thread = 0;
+  size_t max_element_size = n_elements == 0 ? 1 : SIZE_MAX / n_elements;
 
   if (n_threads > 1)
     {
-      /* Require that there should be at least N elements in a
-	 thread.  */
-      gdb_assert (n > 0);
-      if (n_elements / n_threads < n)
-	n_threads = std::max (n_elements / n, (size_t) 1);
-      elts_per_thread = n_elements / n_threads;
-      elts_left_over = n_elements % n_threads;
-      /* n_elements == n_threads * elts_per_thread + elts_left_over. */
+      if (task_size != nullptr)
+	{
+	  gdb_assert (n == 1);
+	  for (RandomIt i = first; i != last; ++i)
+	    {
+	      size_t element_size = task_size (i);
+	      gdb_assert (element_size > 0);
+	      if (element_size > max_element_size)
+		/* We could start scaling here, but that doesn't seem to be
+		   worth the effort.  */
+		element_size = max_element_size;
+	      size_t prev_total_size = total_size;
+	      total_size += element_size;
+	      /* Check for overflow.  */
+	      gdb_assert (prev_total_size < total_size);
+	    }
+	  size_per_thread = total_size / n_threads;
+	}
+      else
+	{
+	  /* Require that there should be at least N elements in a
+	     thread.  */
+	  gdb_assert (n > 0);
+	  if (n_elements / n_threads < n)
+	    n_threads = std::max (n_elements / n, (size_t) 1);
+	  elts_per_thread = n_elements / n_threads;
+	  elts_left_over = n_elements % n_threads;
+	  /* n_elements == n_threads * elts_per_thread + elts_left_over. */
+	}
     }
 
   size_t count = n_threads == 0 ? 0 : n_threads - 1;
@@ -167,20 +193,52 @@ parallel_for_each (unsigned n, RandomIt first, RandomIt last,
   if (parallel_for_each_debug)
     {
       debug_printf (_("Parallel for: n_elements: %zu\n"), n_elements);
-      debug_printf (_("Parallel for: minimum elements per thread: %u\n"), n);
-      debug_printf (_("Parallel for: elts_per_thread: %zu\n"), elts_per_thread);
+      if (task_size != nullptr)
+	{
+	  debug_printf (_("Parallel for: total_size: %zu\n"), total_size);
+	  debug_printf (_("Parallel for: size_per_thread: %zu\n"), size_per_thread);
+	}
+      else
+	{
+	  debug_printf (_("Parallel for: minimum elements per thread: %u\n"), n);
+	  debug_printf (_("Parallel for: elts_per_thread: %zu\n"), elts_per_thread);
+	}
     }
 
+  size_t remaining_size = total_size;
   for (int i = 0; i < count; ++i)
     {
-      RandomIt end = first + elts_per_thread;
-      if (i < elts_left_over)
-	/* Distribute the leftovers over the worker threads, to avoid having
-	   to handle all of them in a single thread.  */
-	end++;
+      RandomIt end;
+      size_t chunk_size = 0;
+      if (task_size == nullptr)
+	{
+	  end = first + elts_per_thread;
+	  if (i < elts_left_over)
+	    /* Distribute the leftovers over the worker threads, to avoid having
+	       to handle all of them in a single thread.  */
+	    end++;
+	}
+      else
+	{
+	  RandomIt j;
+	  for (j = first; j < last && chunk_size < size_per_thread; ++j)
+	    {
+	      size_t element_size = task_size (j);
+	      if (element_size > max_element_size)
+		element_size = max_element_size;
+	      chunk_size += element_size;
+	    }
+	  end = j;
+	  remaining_size -= chunk_size;
+	}
       if (parallel_for_each_debug)
-	debug_printf (_("Parallel for: elements on worker thread %i\t: %zu\n"),
-		      i, (size_t)(end - first));
+	{
+	  debug_printf (_("Parallel for: elements on worker thread %i\t: %zu"),
+			i, (size_t)(end - first));
+	  if (task_size != nullptr)
+	    debug_printf (_("\t(size: %zu)"), chunk_size);
+	  debug_printf (_("\n"));
+	}
       results.post (i, [=] ()
         {
 	  return callback (first, end);
@@ -190,12 +248,22 @@ parallel_for_each (unsigned n, RandomIt first, RandomIt last,
 
   for (int i = count; i < n_worker_threads; ++i)
     if (parallel_for_each_debug)
-      debug_printf (_("Parallel for: elements on worker thread %i\t: 0\n"), i);
+      {
+	debug_printf (_("Parallel for: elements on worker thread %i\t: 0"), i);
+	if (task_size != nullptr)
+	  debug_printf (_("\t(size: 0)"));
+	debug_printf (_("\n"));
+      }
 
   /* Process all the remaining elements in the main thread.  */
   if (parallel_for_each_debug)
-    debug_printf (_("Parallel for: elements on main thread\t\t: %zu\n"),
-		  (size_t)(last - first));
+    {
+      debug_printf (_("Parallel for: elements on main thread\t\t: %zu"),
+		    (size_t)(last - first));
+      if (task_size != nullptr)
+	debug_printf (_("\t(size: %zu)"), remaining_size);
+      debug_printf (_("\n"));
+    }
   return results.finish ([=] ()
     {
       return callback (first, last);
@@ -211,7 +279,8 @@ typename gdb::detail::par_for_accumulator<
     typename std::result_of<RangeFunction (RandomIt, RandomIt)>::type
   >::result_type
 sequential_for_each (unsigned n, RandomIt first, RandomIt last,
-		   RangeFunction callback)
+		     RangeFunction callback,
+		     gdb::function_view<size_t(RandomIt)> task_size = nullptr)
 {
   using result_type
     = typename std::result_of<RangeFunction (RandomIt, RandomIt)>::type;
-- 
2.35.3


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 3/4] [gdb/symtab] Use task size in parallel_for_each in dwarf2_build_psymtabs_hard
  2022-07-22 17:03 [PATCH 1/4] [COVER-LETTER] Use task size balancing in cooked index creation Tom de Vries
  2022-07-22 17:03 ` [PATCH 2/4] [gdbsupport] Add task size parameter in parallel_for_each Tom de Vries
@ 2022-07-22 17:03 ` Tom de Vries
  2022-07-22 17:03 ` [PATCH 4/4] [gdb] Add unit test for gdb::sequential_for_each Tom de Vries
  2 siblings, 0 replies; 4+ messages in thread
From: Tom de Vries @ 2022-07-22 17:03 UTC (permalink / raw)
  To: gdb-patches

In dwarf2_build_psymtabs_hard, we use a parallel_for_each to distribute CUs
over threads.

Ensuring a fair distribution over the worker threads and main thread in terms
of number of CUs might not be the most efficient way, given that CUs can vary
in size.

Fix this by using per_cu->get_length () as the task size.

I've used this experiment to verify the performance impact:
...
$ for n in $(seq 1 10); do \
    time gdb -q -batch ~/firefox/libxul.so-93.0-1.1.x86_64.debug \
    2>&1 \
    | grep "real:"; \
  done
...
and without the patch got:
...
real: 4.71
real: 4.88
real: 4.29
real: 4.30
real: 4.65
real: 4.27
real: 4.27
real: 4.27
real: 4.75
real: 4.41
...
and with the patch:
...
real: 3.68
real: 3.81
real: 3.80
real: 3.68
real: 3.75
real: 3.69
real: 3.69
real: 3.74
real: 3.67
real: 3.74
...
so that seems a reasonable improvement.

With parallel_for_each_debug set to true, we get some more detail about
the difference in behaviour.  Without the patch we have:
...
Parallel for: n_elements: 2818
Parallel for: minimum elements per thread: 1
Parallel for: elts_per_thread: 704
Parallel for: elements on worker thread 0       : 705
Parallel for: elements on worker thread 1       : 705
Parallel for: elements on worker thread 2       : 704
Parallel for: elements on worker thread 3       : 0
Parallel for: elements on main thread           : 704
...
and with the patch:
...
Parallel for: n_elements: 2818
Parallel for: total_size: 1483674865
Parallel for: size_per_thread: 370918716
Parallel for: elements on worker thread 0       : 752   (size: 371811790)
Parallel for: elements on worker thread 1       : 360   (size: 371509370)
Parallel for: elements on worker thread 2       : 1130  (size: 372681710)
Parallel for: elements on worker thread 3       : 0     (size: 0)
Parallel for: elements on main thread           : 576   (size: 367671995)
...

Tested on x86_64-linux.
---
 gdb/dwarf2/read.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/gdb/dwarf2/read.c b/gdb/dwarf2/read.c
index 42230607fe0..6cf61214b40 100644
--- a/gdb/dwarf2/read.c
+++ b/gdb/dwarf2/read.c
@@ -7067,6 +7067,13 @@ dwarf2_build_psymtabs_hard (dwarf2_per_objfile *per_objfile)
 
     using iter_type = decltype (per_bfd->all_comp_units.begin ());
 
+    auto task_size_ = [] (iter_type iter)
+      {
+	dwarf2_per_cu_data *per_cu = iter->get ();
+	return (size_t)per_cu->length ();
+      };
+    auto task_size = gdb::make_function_view (task_size_);
+
     /* Each thread returns a pair holding a cooked index, and a vector
        of errors that should be printed.  The latter is done because
        GDB's I/O system is not thread-safe.  run_on_main_thread could be
@@ -7095,7 +7102,7 @@ dwarf2_build_psymtabs_hard (dwarf2_per_objfile *per_objfile)
 	      }
 	  }
 	return result_type (thread_storage.release (), std::move (errors));
-      });
+      }, task_size);
 
     /* Only show a given exception a single time.  */
     std::unordered_set<gdb_exception> seen_exceptions;
-- 
2.35.3


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 4/4] [gdb] Add unit test for gdb::sequential_for_each
  2022-07-22 17:03 [PATCH 1/4] [COVER-LETTER] Use task size balancing in cooked index creation Tom de Vries
  2022-07-22 17:03 ` [PATCH 2/4] [gdbsupport] Add task size parameter in parallel_for_each Tom de Vries
  2022-07-22 17:03 ` [PATCH 3/4] [gdb/symtab] Use task size in parallel_for_each in dwarf2_build_psymtabs_hard Tom de Vries
@ 2022-07-22 17:03 ` Tom de Vries
  2 siblings, 0 replies; 4+ messages in thread
From: Tom de Vries @ 2022-07-22 17:03 UTC (permalink / raw)
  To: gdb-patches

With commit 18a5766d09c ("[gdbsupport] Add sequential_for_each") I added a
drop-in replacement for gdb::parallel_for_each, but there's nothing making
sure that the two remain in sync.

Extend the unit test for gdb::parallel_for_each to test both.

Do this using a slightly unusual file-self-inclusion.  Doing so keep things
readable and maintainable, and avoids macrofying functions.

Tested on x86_64-linux.
---
 gdb/unittests/parallel-for-selftests.c | 117 ++++++++++++++++---------
 1 file changed, 74 insertions(+), 43 deletions(-)

diff --git a/gdb/unittests/parallel-for-selftests.c b/gdb/unittests/parallel-for-selftests.c
index 6e341f64037..75c1deb17dc 100644
--- a/gdb/unittests/parallel-for-selftests.c
+++ b/gdb/unittests/parallel-for-selftests.c
@@ -17,6 +17,13 @@
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
 
+/* This file is divided in two parts:
+   - FOR_EACH-undefined, and
+   - FOR_EACH-defined.
+   The former includes the latter, more than once, with different values for
+   FOR_EACH.  The FOR_EACH-defined part reads like a regular function.  */
+#ifndef FOR_EACH
+
 #include "defs.h"
 #include "gdbsupport/selftest.h"
 #include "gdbsupport/parallel-for.h"
@@ -43,8 +50,54 @@ struct save_restore_n_threads
   int n_threads;
 };
 
+/* Define test_par using TEST in the FOR_EACH-defined part.  */
+#define TEST test_par
+#define FOR_EACH gdb::parallel_for_each
+#include "parallel-for-selftests.c"
+#undef FOR_EACH
+#undef TEST
+
+/* Define test_seq using TEST in the FOR_EACH-defined part.  */
+#define TEST test_seq
+#define FOR_EACH gdb::sequential_for_each
+#include "parallel-for-selftests.c"
+#undef FOR_EACH
+#undef TEST
+
 static void
 test (int n_threads)
+{
+  test_par (n_threads);
+  test_seq (n_threads);
+}
+
+static void
+test_n_threads ()
+{
+  test (0);
+  test (1);
+  test (3);
+}
+
+}
+}
+
+#endif /* CXX_STD_THREAD */
+
+void _initialize_parallel_for_selftests ();
+void
+_initialize_parallel_for_selftests ()
+{
+#ifdef CXX_STD_THREAD
+  selftests::register_test ("parallel_for",
+			    selftests::parallel_for::test_n_threads);
+#endif /* CXX_STD_THREAD */
+}
+
+#else /* FOR_EACH */
+
+static void
+TEST (int n_threads)
 {
   save_restore_n_threads saver;
   gdb::thread_pool::g_thread_pool->set_thread_count (n_threads);
@@ -52,20 +105,19 @@ test (int n_threads)
 #define NUMBER 10000
 
   std::atomic<int> counter (0);
-  gdb::parallel_for_each (1, 0, NUMBER,
-			  [&] (int start, int end)
-			  {
-			    counter += end - start;
-			  });
-
+  FOR_EACH (1, 0, NUMBER,
+	    [&] (int start, int end)
+	    {
+	      counter += end - start;
+	    });
   SELF_CHECK (counter == NUMBER);
 
   counter = 0;
-  gdb::parallel_for_each (1, 0, 0,
-			  [&] (int start, int end)
-			  {
-			    counter += end - start;
-			  });
+  FOR_EACH (1, 0, 0,
+	    [&] (int start, int end)
+	    {
+	      counter += end - start;
+	    });
   SELF_CHECK (counter == 0);
 
   auto task_size_max_ = [] (int iter)
@@ -75,11 +127,11 @@ test (int n_threads)
   auto task_size_max = gdb::make_function_view (task_size_max_);
 
   counter = 0;
-  gdb::parallel_for_each (1, 0, NUMBER,
-			  [&] (int start, int end)
-			  {
-			    counter += end - start;
-			  }, task_size_max);
+  FOR_EACH (1, 0, NUMBER,
+	    [&] (int start, int end)
+	    {
+	      counter += end - start;
+	    }, task_size_max);
   SELF_CHECK (counter == NUMBER);
 
   auto task_size_one_ = [] (int iter)
@@ -89,35 +141,14 @@ test (int n_threads)
   auto task_size_one = gdb::make_function_view (task_size_one_);
 
   counter = 0;
-  gdb::parallel_for_each (1, 0, NUMBER,
-			  [&] (int start, int end)
-			  {
-			    counter += end - start;
-			  }, task_size_one);
+  FOR_EACH (1, 0, NUMBER,
+	    [&] (int start, int end)
+	    {
+	      counter += end - start;
+	    }, task_size_one);
   SELF_CHECK (counter == NUMBER);
 
 #undef NUMBER
 }
 
-static void
-test_n_threads ()
-{
-  test (0);
-  test (1);
-  test (3);
-}
-
-}
-}
-
-#endif /* CXX_STD_THREAD */
-
-void _initialize_parallel_for_selftests ();
-void
-_initialize_parallel_for_selftests ()
-{
-#ifdef CXX_STD_THREAD
-  selftests::register_test ("parallel_for",
-			    selftests::parallel_for::test_n_threads);
-#endif /* CXX_STD_THREAD */
-}
+#endif /* FOR_EACH */
-- 
2.35.3


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-07-22 17:03 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-22 17:03 [PATCH 1/4] [COVER-LETTER] Use task size balancing in cooked index creation Tom de Vries
2022-07-22 17:03 ` [PATCH 2/4] [gdbsupport] Add task size parameter in parallel_for_each Tom de Vries
2022-07-22 17:03 ` [PATCH 3/4] [gdb/symtab] Use task size in parallel_for_each in dwarf2_build_psymtabs_hard Tom de Vries
2022-07-22 17:03 ` [PATCH 4/4] [gdb] Add unit test for gdb::sequential_for_each Tom de Vries

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).