[binutils-gdb] Parallelize DWARF indexing

public inbox for gdb-cvs@sourceware.org
help / color / mirror / Atom feed

* [binutils-gdb] Parallelize DWARF indexing
@ 2022-04-12 15:41 Tom Tromey
  0 siblings, 0 replies; only message in thread
From: Tom Tromey @ 2022-04-12 15:41 UTC (permalink / raw)
  To: gdb-cvs

https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;h=46114cb7be362cae03025d65f1b3bcbb1c7e8df0

commit 46114cb7be362cae03025d65f1b3bcbb1c7e8df0
Author: Tom Tromey <tom@tromey.com>
Date:   Sun Apr 18 15:20:43 2021 -0600

    Parallelize DWARF indexing
    
    This parallelizes the new DWARF indexer.  The indexer's storage was
    designed so that each storage object and each indexer is fully
    independent.  This setup makes it simple to scan different CUs
    independently.
    
    This patch creates a new cooked index storage object per thread, and
    then scans a subset of all the CUs in each such thread, using gdb's
    existing thread pool.
    
    In the ongoing "gdb gdb" example, this patch reduces the wall time
    down to 0.668923, from 0.903534.  (Note that the 0.903534 is the time
    for the new index -- that is, when the "enable the new index" patch is
    rebased to before this one.  However, in the final series, that patch
    appears toward the end.  Hopefully this isn't too confusing.)

Diff:
---
 gdb/dwarf2/cooked-index.c | 142 ++++++++++++++++++++++++++++++++++------------
 gdb/dwarf2/cooked-index.h | 116 ++++++++++++++++++++++++++-----------
 gdb/dwarf2/read.c         | 115 +++++++++++++++++++++++++++++++------
 gdb/dwarf2/read.h         |   4 +-
 4 files changed, 289 insertions(+), 88 deletions(-)

diff --git a/gdb/dwarf2/cooked-index.c b/gdb/dwarf2/cooked-index.c
index 1b7e25d2350..e56a47520f8 100644
--- a/gdb/dwarf2/cooked-index.c
+++ b/gdb/dwarf2/cooked-index.c
@@ -113,10 +113,41 @@ cooked_index::add (sect_offset die_offset, enum dwarf_tag tag,
   return result;
 }
 
+cooked_index_vector::cooked_index_vector (vec_type &&vec)
+  : m_vector (std::move (vec))
+{
+  finalize ();
+}
+
+/* See cooked-index.h.  */
+
+dwarf2_per_cu_data *
+cooked_index_vector::lookup (CORE_ADDR addr)
+{
+  for (const auto &index : m_vector)
+    {
+      dwarf2_per_cu_data *result = index->lookup (addr);
+      if (result != nullptr)
+	return result;
+    }
+  return nullptr;
+}
+
+/* See cooked-index.h.  */
+
+std::vector<addrmap *>
+cooked_index_vector::get_addrmaps ()
+{
+  std::vector<addrmap *> result;
+  for (const auto &index : m_vector)
+    result.push_back (index->m_addrmap);
+  return result;
+}
+
 /* See cooked-index.h.  */
 
-cooked_index::range
-cooked_index::find (gdb::string_view name, bool completing)
+cooked_index_vector::range
+cooked_index_vector::find (gdb::string_view name, bool completing)
 {
   auto lower = std::lower_bound (m_entries.begin (), m_entries.end (),
 				 name,
@@ -146,8 +177,8 @@ cooked_index::find (gdb::string_view name, bool completing)
 /* See cooked-index.h.  */
 
 gdb::unique_xmalloc_ptr<char>
-cooked_index::handle_gnat_encoded_entry (cooked_index_entry *entry,
-					 htab_t gnat_entries)
+cooked_index_vector::handle_gnat_encoded_entry (cooked_index_entry *entry,
+						htab_t gnat_entries)
 {
   std::string canonical = ada_decode (entry->name, false, false);
   if (canonical.empty ())
@@ -170,9 +201,11 @@ cooked_index::handle_gnat_encoded_entry (cooked_index_entry *entry,
 	{
 	  gdb::unique_xmalloc_ptr<char> new_name
 	    = make_unique_xstrndup (name.data (), name.length ());
-	  last = create (entry->die_offset, DW_TAG_namespace,
-			 0, new_name.get (), parent,
-			 entry->per_cu);
+	  /* It doesn't matter which obstack we allocate this on, so
+	     we pick the most convenient one.  */
+	  last = m_vector[0]->create (entry->die_offset, DW_TAG_namespace,
+				      0, new_name.get (), parent,
+				      entry->per_cu);
 	  last->canonical = last->name;
 	  m_names.push_back (std::move (new_name));
 	  *slot = last;
@@ -187,8 +220,28 @@ cooked_index::handle_gnat_encoded_entry (cooked_index_entry *entry,
 
 /* See cooked-index.h.  */
 
+const cooked_index_entry *
+cooked_index_vector::get_main () const
+{
+  const cooked_index_entry *result = nullptr;
+
+  for (const auto &index : m_vector)
+    {
+      const cooked_index_entry *entry = index->get_main ();
+      if (result == nullptr
+	  || ((result->flags & IS_MAIN) == 0
+	      && entry != nullptr
+	      && (entry->flags & IS_MAIN) != 0))
+	result = entry;
+    }
+
+  return result;
+}
+
+/* See cooked-index.h.  */
+
 void
-cooked_index::finalize ()
+cooked_index_vector::finalize ()
 {
   auto hash_name_ptr = [] (const void *p)
     {
@@ -210,38 +263,26 @@ cooked_index::finalize ()
   htab_up seen_names (htab_create_alloc (10, hash_name_ptr, eq_name_ptr,
 					 nullptr, xcalloc, xfree));
 
-  htab_up gnat_entries (htab_create_alloc (10, hash_entry, eq_entry,
-					   nullptr, xcalloc, xfree));
-
-  for (cooked_index_entry *entry : m_entries)
+  for (auto &index : m_vector)
     {
-      gdb_assert (entry->canonical == nullptr);
-      if ((entry->per_cu->lang != language_cplus
-	   && entry->per_cu->lang != language_ada)
-	  || (entry->flags & IS_LINKAGE) != 0)
-	entry->canonical = entry->name;
-      else
+      htab_up gnat_entries (htab_create_alloc (10, hash_entry, eq_entry,
+					       nullptr, xcalloc, xfree));
+
+      std::vector<cooked_index_entry *> entries
+	= std::move (index->m_entries);
+      for (cooked_index_entry *entry : entries)
 	{
-	  if (entry->per_cu->lang == language_ada)
-	    {
-	      gdb::unique_xmalloc_ptr<char> canon_name
-		= handle_gnat_encoded_entry (entry, gnat_entries.get ());
-	      if (canon_name == nullptr)
-		entry->canonical = entry->name;
-	      else
-		{
-		  entry->canonical = canon_name.get ();
-		  m_names.push_back (std::move (canon_name));
-		}
-	    }
+	  gdb_assert (entry->canonical == nullptr);
+	  if ((entry->per_cu->lang != language_cplus
+	       && entry->per_cu->lang != language_ada)
+	      || (entry->flags & IS_LINKAGE) != 0)
+	    entry->canonical = entry->name;
 	  else
 	    {
-	      void **slot = htab_find_slot (seen_names.get (), entry,
-					    INSERT);
-	      if (*slot == nullptr)
+	      if (entry->per_cu->lang == language_ada)
 		{
 		  gdb::unique_xmalloc_ptr<char> canon_name
-		    = cp_canonicalize_string (entry->name);
+		    = handle_gnat_encoded_entry (entry, gnat_entries.get ());
 		  if (canon_name == nullptr)
 		    entry->canonical = entry->name;
 		  else
@@ -252,12 +293,39 @@ cooked_index::finalize ()
 		}
 	      else
 		{
-		  const cooked_index_entry *other
-		    = (const cooked_index_entry *) *slot;
-		  entry->canonical = other->canonical;
+		  void **slot = htab_find_slot (seen_names.get (), entry,
+						INSERT);
+		  if (*slot == nullptr)
+		    {
+		      gdb::unique_xmalloc_ptr<char> canon_name
+			= cp_canonicalize_string (entry->name);
+		      if (canon_name == nullptr)
+			entry->canonical = entry->name;
+		      else
+			{
+			  entry->canonical = canon_name.get ();
+			  m_names.push_back (std::move (canon_name));
+			}
+		    }
+		  else
+		    {
+		      const cooked_index_entry *other
+			= (const cooked_index_entry *) *slot;
+		      entry->canonical = other->canonical;
+		    }
 		}
 	    }
 	}
+
+      if (m_entries.empty ())
+	m_entries = std::move (entries);
+      else
+	{
+	  size_t old_size = m_entries.size ();
+	  m_entries.resize (m_entries.size () + entries.size ());
+	  memcpy (m_entries.data () + old_size,
+		  entries.data (), entries.size () * sizeof (entries[0]));
+	}
     }
 
   m_names.shrink_to_fit ();
diff --git a/gdb/dwarf2/cooked-index.h b/gdb/dwarf2/cooked-index.h
index 0a38fc88e52..188155190ec 100644
--- a/gdb/dwarf2/cooked-index.h
+++ b/gdb/dwarf2/cooked-index.h
@@ -30,6 +30,7 @@
 #include "gdbsupport/gdb_obstack.h"
 #include "addrmap.h"
 #include "gdbsupport/iterator-range.h"
+#include "gdbsupport/thread-pool.h"
 
 struct dwarf2_per_cu_data;
 
@@ -155,6 +156,8 @@ private:
   void write_scope (struct obstack *storage, const char *sep) const;
 };
 
+class cooked_index_vector;
+
 /* An index of interesting DIEs.  This is "cooked", in contrast to a
    mapped .debug_names or .gdb_index, which are "raw".  An entry in
    the index is of type cooked_index_entry.
@@ -178,13 +181,6 @@ public:
 				 const cooked_index_entry *parent_entry,
 				 dwarf2_per_cu_data *per_cu);
 
-  /* Return the entry that is believed to represent the program's
-     "main".  This will return NULL if no such entry is available.  */
-  const cooked_index_entry *get_main () const
-  {
-    return m_main;
-  }
-
   /* Install a new fixed addrmap from the given mutable addrmap.  */
   void install_addrmap (addrmap *map)
   {
@@ -192,6 +188,17 @@ public:
     m_addrmap = addrmap_create_fixed (map, &m_storage);
   }
 
+  friend class cooked_index_vector;
+
+private:
+
+  /* Return the entry that is believed to represent the program's
+     "main".  This will return NULL if no such entry is available.  */
+  const cooked_index_entry *get_main () const
+  {
+    return m_main;
+  }
+
   /* Look up ADDR in the address map, and return either the
      corresponding CU, or nullptr if the address could not be
      found.  */
@@ -200,10 +207,52 @@ public:
     return (dwarf2_per_cu_data *) addrmap_find (m_addrmap, addr);
   }
 
-  /* Finalize the index.  This should be called a single time, when
-     the index has been fully populated.  It enters all the entries
-     into the internal hash table.  */
-  void finalize ();
+  /* Create a new cooked_index_entry and register it with this object.
+     Entries are owned by this object.  The new item is returned.  */
+  cooked_index_entry *create (sect_offset die_offset,
+			      enum dwarf_tag tag,
+			      cooked_index_flag flags,
+			      const char *name,
+			      const cooked_index_entry *parent_entry,
+			      dwarf2_per_cu_data *per_cu)
+  {
+    return new (&m_storage) cooked_index_entry (die_offset, tag, flags,
+						name, parent_entry,
+						per_cu);
+  }
+
+  /* Storage for the entries.  */
+  auto_obstack m_storage;
+  /* List of all entries.  */
+  std::vector<cooked_index_entry *> m_entries;
+  /* If we found "main" or an entry with 'is_main' set, store it
+     here.  */
+  cooked_index_entry *m_main = nullptr;
+  /* When constructing the index, entries are stored on a linked list.
+     This member points to the head of that list.  Later, they are
+     entered into the hash table, at which point this is no longer
+     used.  */
+  cooked_index_entry *m_start = nullptr;
+  /* The addrmap.  This maps address ranges to dwarf2_per_cu_data
+     objects.  */
+  addrmap *m_addrmap = nullptr;
+};
+
+/* The main index of DIEs.  The parallel DIE indexers create
+   cooked_index objects.  Then, these are all handled to a
+   cooked_index_vector for storage and final indexing.  The index is
+   made by iterating over the entries previously created.  */
+
+class cooked_index_vector
+{
+public:
+
+  /* A convenience typedef for the vector that is contained in this
+     object.  */
+  typedef std::vector<std::unique_ptr<cooked_index>> vec_type;
+
+  explicit cooked_index_vector (vec_type &&vec);
+  DISABLE_COPY_AND_ASSIGN (cooked_index_vector);
 
   /* A simple range over part of m_entries.  */
   typedef iterator_range<std::vector<cooked_index_entry *>::iterator> range;
@@ -219,6 +268,19 @@ public:
     return { m_entries.begin (), m_entries.end () };
   }
 
+  /* Look up ADDR in the address map, and return either the
+     corresponding CU, or nullptr if the address could not be
+     found.  */
+  dwarf2_per_cu_data *lookup (CORE_ADDR addr);
+
+  /* Return a new vector of all the addrmaps used by all the indexes
+     held by this object.  */
+  std::vector<addrmap *> get_addrmaps ();
+
+  /* Return the entry that is believed to represent the program's
+     "main".  This will return NULL if no such entry is available.  */
+  const cooked_index_entry *get_main () const;
+
 private:
 
   /* GNAT only emits mangled ("encoded") names in the DWARF, and does
@@ -229,32 +291,20 @@ private:
   gdb::unique_xmalloc_ptr<char> handle_gnat_encoded_entry
        (cooked_index_entry *entry, htab_t gnat_entries);
 
-  /* Create a new cooked_index_entry and register it with this object.
-     Entries are owned by this object.  The new item is returned.  */
-  cooked_index_entry *create (sect_offset die_offset,
-			      enum dwarf_tag tag,
-			      cooked_index_flag flags,
-			      const char *name,
-			      const cooked_index_entry *parent_entry,
-			      dwarf2_per_cu_data *per_cu)
-  {
-    return new (&m_storage) cooked_index_entry (die_offset, tag, flags,
-						name, parent_entry,
-						per_cu);
-  }
+  /* Finalize the index.  This should be called a single time, when
+     the index has been fully populated.  It enters all the entries
+     into the internal hash table.  */
+  void finalize ();
 
-  /* Storage for the entries.  */
-  auto_obstack m_storage;
-  /* List of all entries.  */
+  /* The vector of cooked_index objects.  This is stored because the
+     entries are stored on the obstacks in those objects.  */
+  vec_type m_vector;
+
+  /* List of all entries.  This is sorted during finalization.  */
   std::vector<cooked_index_entry *> m_entries;
-  /* If we found "main" or an entry with 'is_main' set, store it
-     here.  */
-  cooked_index_entry *m_main = nullptr;
+
   /* Storage for canonical names.  */
   std::vector<gdb::unique_xmalloc_ptr<char>> m_names;
-  /* The addrmap.  This maps address ranges to dwarf2_per_cu_data
-     objects.  */
-  addrmap *m_addrmap = nullptr;
 };
 
 #endif /* GDB_DWARF2_COOKED_INDEX_H */
diff --git a/gdb/dwarf2/read.c b/gdb/dwarf2/read.c
index 5998b7ffca7..fce465e4dab 100644
--- a/gdb/dwarf2/read.c
+++ b/gdb/dwarf2/read.c
@@ -92,6 +92,8 @@
 #include "dwarf2/abbrev-cache.h"
 #include "cooked-index.h"
 #include "split-name.h"
+#include "gdbsupport/parallel-for.h"
+#include "gdbsupport/thread-pool.h"
 
 /* When == 1, print basic high level tracing messages.
    When > 1, be more verbose.
@@ -6536,6 +6538,14 @@ lookup_dwo_id (struct dwarf2_cu *cu, struct die_info* comp_unit_die)
 static struct dwo_unit *
 lookup_dwo_unit (dwarf2_cu *cu, die_info *comp_unit_die, const char *dwo_name)
 {
+#if CXX_STD_THREAD
+  /* We need a lock here both to handle the DWO hash table, and BFD,
+     which is not thread-safe.  */
+  static std::mutex dwo_lock;
+
+  std::lock_guard<std::mutex> guard (dwo_lock);
+#endif
+
   dwarf2_per_cu_data *per_cu = cu->per_cu;
   struct dwo_unit *dwo_unit;
   const char *comp_dir;
@@ -6683,9 +6693,14 @@ cutu_reader::cutu_reader (dwarf2_per_cu_data *this_cu,
     }
   else
     {
-      /* If an existing_cu is provided, a dwarf2_cu must not exist for this_cu
-	 in per_objfile yet.  */
-      gdb_assert (per_objfile->get_cu (this_cu) == nullptr);
+      /* If an existing_cu is provided, a dwarf2_cu must not exist for
+	 this_cu in per_objfile yet.  Here, CACHE doubles as a flag to
+	 let us know that the CU is being scanned using the parallel
+	 indexer.  This assert is avoided in this case because (1) it
+	 is irrelevant, and (2) the get_cu method is not
+	 thread-safe.  */
+      gdb_assert (cache != nullptr
+		  || per_objfile->get_cu (this_cu) == nullptr);
       m_new_cu.reset (new dwarf2_cu (this_cu, per_objfile));
       cu = m_new_cu.get ();
     }
@@ -7499,9 +7514,9 @@ process_psymtab_comp_unit (dwarf2_per_cu_data *this_cu,
     {
       if (per_objfile->per_bfd->using_index)
 	{
-	  if (!this_cu->scanned)
+	  bool nope = false;
+	  if (this_cu->scanned.compare_exchange_strong (nope, true))
 	    {
-	      this_cu->scanned = true;
 	      prepare_one_comp_unit (reader.cu, reader.comp_unit_die,
 				     pretend_language);
 	      gdb_assert (storage != nullptr);
@@ -7869,6 +7884,7 @@ dwarf2_build_psymtabs_hard (dwarf2_per_objfile *per_objfile)
     = per_bfd->using_index ? &index_storage : nullptr;
   create_all_comp_units (per_objfile);
   build_type_psymtabs (per_objfile, index_storage_ptr);
+  std::vector<std::unique_ptr<cooked_index>> indexes;
   if (per_bfd->using_index)
     {
       per_bfd->quick_file_names_table
@@ -7877,15 +7893,70 @@ dwarf2_build_psymtabs_hard (dwarf2_per_objfile *per_objfile)
       if (!per_bfd->debug_aranges.empty ())
 	read_addrmap_from_aranges (per_objfile, &per_bfd->debug_aranges,
 				   index_storage.get_addrmap ());
-    }
 
-  for (const auto &per_cu : per_bfd->all_comp_units)
+      {
+	/* Ensure that complaints are handled correctly.  */
+	complaint_interceptor complaint_handler;
+
+	using iter_type = typeof (per_bfd->all_comp_units.begin ());
+
+	/* Each thread returns a pair holding a cooked index, and a vector
+	   of errors that should be printed.  The latter is done because
+	   GDB's I/O system is not thread-safe.  run_on_main_thread could be
+	   used, but that would mean the messages are printed after the
+	   prompt, which looks weird.  */
+	using result_type = std::pair<std::unique_ptr<cooked_index>,
+				      std::vector<gdb_exception>>;
+	std::vector<result_type> results
+	  = gdb::parallel_for_each (1, per_bfd->all_comp_units.begin (),
+				    per_bfd->all_comp_units.end (),
+				    [=] (iter_type iter, iter_type end)
+				    {
+				      std::vector<gdb_exception> errors;
+				      cooked_index_storage thread_storage;
+				      for (; iter != end; ++iter)
+					{
+					  dwarf2_per_cu_data *per_cu = iter->get ();
+					  try
+					    {
+					      process_psymtab_comp_unit (per_cu, per_objfile,
+									 false,
+									 language_minimal,
+									 &thread_storage);
+					    }
+					  catch (gdb_exception &except)
+					    {
+					      errors.push_back (std::move (except));
+					    }
+					}
+				      return result_type (thread_storage.release (),
+							  std::move (errors));
+				    });
+
+	/* Only show a given exception a single time.  */
+	std::unordered_set<gdb_exception> seen_exceptions;
+	for (auto &one_result : results)
+	  {
+	    indexes.push_back (std::move (one_result.first));
+	    for (auto &one_exc : one_result.second)
+	      {
+		if (seen_exceptions.insert (one_exc).second)
+		  exception_print (gdb_stderr, one_exc);
+	      }
+	  }
+      }
+    }
+  else
     {
-      if (!per_bfd->using_index && per_cu->v.psymtab != NULL)
-	/* In case a forward DW_TAG_imported_unit has read the CU already.  */
-	continue;
-      process_psymtab_comp_unit (per_cu.get (), per_objfile, false,
-				 language_minimal, index_storage_ptr);
+      for (const auto &per_cu : per_bfd->all_comp_units)
+	{
+	  if (!per_bfd->using_index && per_cu->v.psymtab != NULL)
+	    /* In case a forward DW_TAG_imported_unit has read the CU
+	       already.  */
+	    continue;
+	  process_psymtab_comp_unit (per_cu.get (), per_objfile, false,
+				     language_minimal, nullptr);
+	}
     }
 
   /* This has to wait until we read the CUs, we need the list of DWOs.  */
@@ -7903,8 +7974,20 @@ dwarf2_build_psymtabs_hard (dwarf2_per_objfile *per_objfile)
 
   if (per_bfd->using_index)
     {
-      per_bfd->cooked_index_table = index_storage.release ();
-      per_bfd->cooked_index_table->finalize ();
+      indexes.push_back (index_storage.release ());
+      /* Remove any NULL entries.  This might happen if parallel-for
+	 decides to throttle the number of threads that were used.  */
+      indexes.erase
+	(std::remove_if (indexes.begin (),
+			 indexes.end (),
+			 [] (const std::unique_ptr<cooked_index> &entry)
+			 {
+			   return entry == nullptr;
+			 }),
+	 indexes.end ());
+      indexes.shrink_to_fit ();
+      per_bfd->cooked_index_table.reset
+	(new cooked_index_vector (std::move (indexes)));
 
       const cooked_index_entry *main_entry
 	= per_bfd->cooked_index_table->get_main ();
@@ -19463,9 +19546,9 @@ cooked_indexer::ensure_cu_exists (cutu_reader *reader,
      Doing this check here avoids self-imports as well.  */
   if (for_scanning)
     {
-      if (per_cu->scanned)
+      bool nope = false;
+      if (!per_cu->scanned.compare_exchange_strong (nope, true))
 	return nullptr;
-      per_cu->scanned = true;
     }
   if (per_cu == m_per_cu)
     return reader;
diff --git a/gdb/dwarf2/read.h b/gdb/dwarf2/read.h
index 4ee7de98c50..4d39c465420 100644
--- a/gdb/dwarf2/read.h
+++ b/gdb/dwarf2/read.h
@@ -169,7 +169,7 @@ struct dwarf2_per_cu_data
 
   /* True if this CU has been scanned by the indexer; false if
      not.  */
-  bool scanned : 1;
+  std::atomic<bool> scanned;
 
   /* Our index in the unshared "symtabs" vector.  */
   unsigned index = 0;
@@ -457,7 +457,7 @@ public:
   std::unique_ptr<mapped_debug_names> debug_names_table;
 
   /* The cooked index, or NULL if not using one.  */
-  std::unique_ptr<cooked_index> cooked_index_table;
+  std::unique_ptr<cooked_index_vector> cooked_index_table;
 
   /* When using index_table, this keeps track of all quick_file_names entries.
      TUs typically share line table entries with a CU, so we maintain a


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-04-12 15:41 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-12 15:41 [binutils-gdb] Parallelize DWARF indexing Tom Tromey

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).