public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH RFC] introduce dl_iterate_phdr_parallel
@ 2016-07-25 14:44 Gleb Natapov
  2016-07-28 21:57 ` Adhemerval Zanella
  2016-08-01 20:26 ` Carlos O'Donell
  0 siblings, 2 replies; 29+ messages in thread
From: Gleb Natapov @ 2016-07-25 14:44 UTC (permalink / raw)
  To: libc-alpha

[-- Attachment #1: Type: text/plain, Size: 9522 bytes --]

Problem: exception handling is not scalable. Attached program mt.cc
demonstrates this easily. It runs around 1 seconds with 1 thread, but
with only 4 threads it takes 4.5 seconds to complete. The reason is
locks that are taken on unwind path.

There are two locks right now.

Fist is in libgcc's _Unwind_Find_registered_FDE and it is eliminated by
https://gcc.gnu.org/ml/gcc-patches/2016-07/msg01629.html (for dynamic
executables only, but this is common case).

Second one is dl_load_write_lock in __dl_iterate_phdr. It serves dual
purpose: it stops the list of loaded objects from been modified while
iterating over it and it makes sure that more than one callback will
not run in parallel. This means that even if we will find a cleaver way
to make __dl_iterate_phdr iterate over the objects list locklessly the
lock will still have to be taken around the call to callback to preserve
second guaranty.

This patch here propose to introduce another API: dl_iterate_phdr_parallel
which does exactly same thing as dl_iterate_phdr but may run more than
one provided callback in parallel. And to make it more scalable it
breaks single dl_load_write_lock into arrays of locks. Reader takes only
one lock, but writer has to lock all of them to proceed with modifying
the list.

Attached gcc.patch makes gcc use proposed dl_iterate_phdr_parallel
interface and with this modification (and the patch linked above) same
mt.cc with 4 threads runs around 1 second.

diff --git a/elf/Versions b/elf/Versions
index 23deda9..56b7510 100644
--- a/elf/Versions
+++ b/elf/Versions
@@ -11,6 +11,9 @@ libc {
   GLIBC_2.2.4 {
     dl_iterate_phdr;
   }
+  GLIBC_2.23 {
+    dl_iterate_phdr_parallel;
+  }
 %ifdef EXPORT_UNWIND_FIND_FDE
   # Needed for SHLIB_COMPAT calls using this version.
   GLIBC_2.2.5 {
diff --git a/elf/dl-close.c b/elf/dl-close.c
index 687d7de..9b8e9ac 100644
--- a/elf/dl-close.c
+++ b/elf/dl-close.c
@@ -535,7 +535,8 @@ _dl_close_worker (struct link_map *map, bool force)
   tls_free_start = tls_free_end = NO_TLS_OFFSET;
 
   /* We modify the list of loaded objects.  */
-  __rtld_lock_lock_recursive (GL(dl_load_write_lock));
+  for (int i = 0; i < _DL_LOCKS; i++)
+    __rtld_lock_lock_recursive (GL(dl_load_write_lock)[i]);
 
   /* Check each element of the search list to see if all references to
      it are gone.  */
@@ -748,7 +749,8 @@ _dl_close_worker (struct link_map *map, bool force)
 	}
     }
 
-  __rtld_lock_unlock_recursive (GL(dl_load_write_lock));
+  for (int i = 0; i < _DL_LOCKS; i++)
+    __rtld_lock_unlock_recursive (GL(dl_load_write_lock)[i]);
 
   /* If we removed any object which uses TLS bump the generation counter.  */
   if (any_tls)
diff --git a/elf/dl-iteratephdr.c b/elf/dl-iteratephdr.c
index 4db6938..69e4a79 100644
--- a/elf/dl-iteratephdr.c
+++ b/elf/dl-iteratephdr.c
@@ -22,24 +22,47 @@
 #include <stddef.h>
 #include <libc-lock.h>
 
+static
+uintptr_t crank(uintptr_t word)
+{ 
+  word ^= (word << 13); word ^= (word >> 17); word ^= (word << 5); 
+  word = 69069*word + 12345;
+  return word;
+}
+
+static
+uintptr_t hash(uintptr_t val)
+{
+  char* x = (char*)&val;
+  uintptr_t r = 0;
+  r = x[0];
+  for (unsigned i = 1; i < sizeof(val); ++i)
+    {
+      r = crank(r) + x[i];
+      r = crank(r);
+    }
+  return r & (_DL_LOCKS - 1);
+}
+
+
 static void
-cancel_handler (void *arg __attribute__((unused)))
+cancel_handler (void *arg)
 {
-  __rtld_lock_unlock_recursive (GL(dl_load_write_lock));
+  __rtld_lock_unlock_recursive (GL(dl_load_write_lock)[(uintptr_t)arg]);
 }
 
-hidden_proto (__dl_iterate_phdr)
+static
 int
-__dl_iterate_phdr (int (*callback) (struct dl_phdr_info *info,
-				    size_t size, void *data), void *data)
+__dl_iterate_phdr_internal (int (*callback) (struct dl_phdr_info *info,
+				    size_t size, void *data), void *data, unsigned idx)
 {
   struct link_map *l;
   struct dl_phdr_info info;
   int ret = 0;
 
   /* Make sure nobody modifies the list of loaded objects.  */
-  __rtld_lock_lock_recursive (GL(dl_load_write_lock));
-  __libc_cleanup_push (cancel_handler, NULL);
+  __rtld_lock_lock_recursive (GL(dl_load_write_lock)[idx]);
+  __libc_cleanup_push (cancel_handler, (void*)(uintptr_t)idx);
 
   /* We have to determine the namespace of the caller since this determines
      which namespace is reported.  */
@@ -80,10 +103,26 @@ __dl_iterate_phdr (int (*callback) (struct dl_phdr_info *info,
 
   /* Release the lock.  */
   __libc_cleanup_pop (0);
-  __rtld_lock_unlock_recursive (GL(dl_load_write_lock));
+  __rtld_lock_unlock_recursive (GL(dl_load_write_lock)[idx]);
 
   return ret;
 }
+
+hidden_proto (__dl_iterate_phdr)
+int
+__dl_iterate_phdr (int (*callback) (struct dl_phdr_info *info,
+				    size_t size, void *data), void *data) {
+  return __dl_iterate_phdr_internal(callback, data, 0);
+}
 hidden_def (__dl_iterate_phdr)
 
+hidden_proto (__dl_iterate_phdr_parallel)
+int
+__dl_iterate_phdr_parallel (int (*callback) (struct dl_phdr_info *info,
+				    size_t size, void *data), void *data) {
+  return __dl_iterate_phdr_internal(callback, data, hash((unsigned long)THREAD_SELF));
+}
+hidden_def (__dl_iterate_phdr_parallel)
+
 weak_alias (__dl_iterate_phdr, dl_iterate_phdr);
+weak_alias (__dl_iterate_phdr_parallel, dl_iterate_phdr_parallel);
diff --git a/elf/dl-object.c b/elf/dl-object.c
index 362992b..a8df503 100644
--- a/elf/dl-object.c
+++ b/elf/dl-object.c
@@ -31,7 +31,8 @@ internal_function
 _dl_add_to_namespace_list (struct link_map *new, Lmid_t nsid)
 {
   /* We modify the list of loaded objects.  */
-  __rtld_lock_lock_recursive (GL(dl_load_write_lock));
+  for (int i = 0; i < _DL_LOCKS; i++)
+    __rtld_lock_lock_recursive (GL(dl_load_write_lock)[i]);
 
   if (GL(dl_ns)[nsid]._ns_loaded != NULL)
     {
@@ -48,7 +49,8 @@ _dl_add_to_namespace_list (struct link_map *new, Lmid_t nsid)
   new->l_serial = GL(dl_load_adds);
   ++GL(dl_load_adds);
 
-  __rtld_lock_unlock_recursive (GL(dl_load_write_lock));
+  for (int i = 0; i < _DL_LOCKS; i++)
+    __rtld_lock_unlock_recursive (GL(dl_load_write_lock)[i]);
 }
 
 
diff --git a/elf/dl-support.c b/elf/dl-support.c
index c30194c..0fc12e4 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -213,7 +213,7 @@ __rtld_lock_define_initialized_recursive (, _dl_load_lock)
 /* This lock is used to keep __dl_iterate_phdr from inspecting the
    list of loaded objects while an object is added to or removed from
    that list.  */
-__rtld_lock_define_initialized_recursive (, _dl_load_write_lock)
+__rtld_lock_recursive_t _dl_load_write_lock[_DL_LOCKS] = { _RTLD_LOCK_RECURSIVE_INITIALIZER, };
 
 
 #ifdef HAVE_AUX_VECTOR
diff --git a/elf/link.h b/elf/link.h
index f448141..56fd4b5 100644
--- a/elf/link.h
+++ b/elf/link.h
@@ -168,6 +168,9 @@ extern int dl_iterate_phdr (int (*__callback) (struct dl_phdr_info *,
 					       size_t, void *),
 			    void *__data);
 
+extern int dl_iterate_phdr_parallel (int (*__callback) (struct dl_phdr_info *,
+					       size_t, void *),
+			    void *__data);
 
 /* Prototypes for the ld.so auditing interfaces.  These are not
    defined anywhere in ld.so but instead have to be provided by the
diff --git a/elf/rtld.c b/elf/rtld.c
index 647661c..9c65b03 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -128,7 +128,7 @@ struct rtld_global _rtld_global =
     ._dl_stack_flags = DEFAULT_STACK_PERMS,
 #ifdef _LIBC_REENTRANT
     ._dl_load_lock = _RTLD_LOCK_RECURSIVE_INITIALIZER,
-    ._dl_load_write_lock = _RTLD_LOCK_RECURSIVE_INITIALIZER,
+    ._dl_load_write_lock = { _RTLD_LOCK_RECURSIVE_INITIALIZER, },
 #endif
     ._dl_nns = 1,
     ._dl_ns =
diff --git a/include/link.h b/include/link.h
index 32a7392..9582ed9 100644
--- a/include/link.h
+++ b/include/link.h
@@ -336,6 +336,10 @@ extern int __dl_iterate_phdr (int (*callback) (struct dl_phdr_info *info,
 					       size_t size, void *data),
 			      void *data);
 
+extern int __dl_iterate_phdr_parallel (int (*callback) (struct dl_phdr_info *info,
+					       size_t size, void *data),
+			      void *data);
+
 /* We use this macro to refer to ELF macros independent of the native
    wordsize.  `ELFW(R_TYPE)' is used in place of `ELF32_R_TYPE' or
    `ELF64_R_TYPE'.  */
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index f68fdf4..0d2b2a1 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -268,6 +268,8 @@ typedef void (*receiver_fct) (int, const char *, const char *);
    user interface to run-time dynamic linking.  */
 
 
+#define _DL_LOCKS 64 
+
 #ifndef SHARED
 # define EXTERN extern
 # define GL(name) _##name
@@ -334,7 +336,7 @@ struct rtld_global
   /* This lock is used to keep __dl_iterate_phdr from inspecting the
      list of loaded objects while an object is added to or removed
      from that list.  */
-  __rtld_lock_define_recursive (EXTERN, _dl_load_write_lock)
+  __rtld_lock_define_recursive (EXTERN, _dl_load_write_lock[_DL_LOCKS])
 
   /* Incremented whenever something may have been added to dl_loaded.  */
   EXTERN unsigned long long _dl_load_adds;
diff --git a/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist b/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist
index c1054ce..ea9d2e9 100644
--- a/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/64/libc.abilist
@@ -82,6 +82,7 @@ GLIBC_2.17 clock_settime F
 GLIBC_2.17 secure_getenv F
 GLIBC_2.18 GLIBC_2.18 A
 GLIBC_2.18 __cxa_thread_atexit_impl F
+GLIBC_2.23 dl_iterate_phdr_parallel F
 GLIBC_2.2.5 GLIBC_2.2.5 A
 GLIBC_2.2.5 _Exit F
 GLIBC_2.2.5 _IO_2_1_stderr_ D 0xe0

--
			Gleb.

[-- Attachment #2: mt.cc --]
[-- Type: text/plain, Size: 634 bytes --]

#include <iostream>
#include <thread>
#include <vector>
#include <assert.h>

constexpr unsigned N = 3;

void func(int c) {
  cpu_set_t cpuset;
  CPU_ZERO(&cpuset);
  CPU_SET(c, &cpuset);
  int rc = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
  assert(!rc);

  for (auto i = 0; i < 1000000; i++) {
    try {
      throw 42;
    } catch (...) {
    }
  }
}

int main() {
  std::vector<std::thread> threads;
  threads.reserve(N);

  for (unsigned i = 0; i < N; i++)
    threads.emplace_back(std::thread( [i] { func(i); } ));

  func(N);

  for (unsigned i = 0; i < N; i++)
    threads[i].join();

  return 0;
}



[-- Attachment #3: gcc.patch --]
[-- Type: text/plain, Size: 2097 bytes --]

commit fc040d3c4cd2b4b69cd07187090f2a250712c47a
Author: Gleb Natapov <gleb@scylladb.com>
Date:   Mon Jul 25 16:28:53 2016 +0300

    use dl_iterate_phdr_parallel

diff --git a/libgcc/unwind-dw2-fde-dip.c b/libgcc/unwind-dw2-fde-dip.c
index f7a1c3f..d3be0ad 100644
--- a/libgcc/unwind-dw2-fde-dip.c
+++ b/libgcc/unwind-dw2-fde-dip.c
@@ -120,7 +120,7 @@ struct unw_eh_frame_hdr
 
 #define FRAME_HDR_CACHE_SIZE 8
 
-static struct frame_hdr_cache_element
+static __thread struct frame_hdr_cache_element
 {
   _Unwind_Ptr pc_low;
   _Unwind_Ptr pc_high;
@@ -130,7 +130,7 @@ static struct frame_hdr_cache_element
   struct frame_hdr_cache_element *link;
 } frame_hdr_cache[FRAME_HDR_CACHE_SIZE];
 
-static struct frame_hdr_cache_element *frame_hdr_cache_head;
+static __thread struct frame_hdr_cache_element *frame_hdr_cache_head;
 
 /* Like base_of_encoded_value, but take the base from a struct
    unw_eh_callback_data instead of an _Unwind_Context.  */
@@ -195,7 +195,7 @@ _Unwind_IteratePhdrCallback (struct dl_phdr_info *info, size_t size, void *ptr)
 
   if (data->check_cache && size >= sizeof (struct ext_dl_phdr_info))
     {
-      static unsigned long long adds = -1ULL, subs;
+      static __thread unsigned long long adds = -1ULL, subs;
       struct ext_dl_phdr_info *einfo = (struct ext_dl_phdr_info *) info;
 
       /* We use a least recently used cache replacement policy.  Also,
@@ -445,6 +445,10 @@ _Unwind_IteratePhdrCallback (struct dl_phdr_info *info, size_t size, void *ptr)
   return 1;
 }
 
+extern int dl_iterate_phdr_parallel (int (*__callback) (struct dl_phdr_info *,
+                                                        size_t, void *),
+                                     void *__data);
+
 const fde *
 _Unwind_Find_FDE (void *pc, struct dwarf_eh_bases *bases)
 {
@@ -462,7 +466,7 @@ _Unwind_Find_FDE (void *pc, struct dwarf_eh_bases *bases)
   data.ret = NULL;
   data.check_cache = 1;
 
-  if (dl_iterate_phdr (_Unwind_IteratePhdrCallback, &data) < 0)
+  if (dl_iterate_phdr_parallel (_Unwind_IteratePhdrCallback, &data) < 0)
     return NULL;
 
   if (data.ret)

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2016-08-04 18:28 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-25 14:44 [PATCH RFC] introduce dl_iterate_phdr_parallel Gleb Natapov
2016-07-28 21:57 ` Adhemerval Zanella
2016-07-31 13:51   ` Gleb Natapov
2016-08-01 18:06     ` Adhemerval Zanella
2016-08-01 18:50       ` Gleb Natapov
2016-08-01 19:46         ` Torvald Riegel
2016-08-01 20:07           ` Gleb Natapov
2016-08-01 20:20             ` Torvald Riegel
2016-08-01 20:42               ` Gleb Natapov
2016-08-03 17:26                 ` Torvald Riegel
2016-08-03 17:47                   ` Gleb Natapov
2016-08-04 17:17                     ` Torvald Riegel
2016-08-04 17:37                       ` Gleb Natapov
2016-08-04 17:58                         ` Torvald Riegel
2016-08-04 18:28                           ` Gleb Natapov
2016-08-03 10:54           ` Florian Weimer
2016-08-03 14:00             ` Adhemerval Zanella
2016-08-03 14:31               ` Gleb Natapov
2016-08-03 16:12             ` Torvald Riegel
2016-08-01 20:24         ` Adhemerval Zanella
2016-08-02 10:47           ` Gleb Natapov
2016-08-02 14:16             ` Adhemerval Zanella
2016-08-02 14:55               ` Gleb Natapov
2016-08-02 15:31           ` Gleb Natapov
2016-08-01 20:26 ` Carlos O'Donell
2016-08-01 20:52   ` Gleb Natapov
2016-08-01 20:58     ` Carlos O'Donell
2016-08-02  6:46       ` Gleb Natapov
2016-08-02  9:56   ` Gleb Natapov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).