From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 117617 invoked by alias); 29 Oct 2019 21:15:02 -0000 Mailing-List: contact elfutils-devel-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Post: List-Help: List-Subscribe: Sender: elfutils-devel-owner@sourceware.org Received: (qmail 117568 invoked by uid 89); 29 Oct 2019 21:15:01 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Checked: by ClamAV 0.100.3 on sourceware.org X-Virus-Found: No X-Spam-SWARE-Status: No, score=-19.1 required=5.0 tests=AWL,BAYES_00,GIT_PATCH_0,GIT_PATCH_1,GIT_PATCH_2,GIT_PATCH_3,SPF_PASS autolearn=ham version=3.3.1 spammy= X-Spam-Status: No, score=-19.1 required=5.0 tests=AWL,BAYES_00,GIT_PATCH_0,GIT_PATCH_1,GIT_PATCH_2,GIT_PATCH_3,SPF_PASS autolearn=ham version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on sourceware.org X-Spam-Level: X-HELO: gnu.wildebeest.org Received: from wildebeest.demon.nl (HELO gnu.wildebeest.org) (212.238.236.112) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Tue, 29 Oct 2019 21:14:59 +0000 Received: from librem (deer0x15.wildebeest.org [172.31.17.151]) (using TLSv1.2 with cipher ADH-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by gnu.wildebeest.org (Postfix) with ESMTPSA id 3017630003CD; Tue, 29 Oct 2019 22:14:57 +0100 (CET) Received: by librem (Postfix, from userid 1000) id 62725C0237; Tue, 29 Oct 2019 22:14:55 +0100 (CET) From: Mark Wielaard To: elfutils-devel@sourceware.org Cc: Jonathon Anderson Subject: [PATCH 2/2] libdw: Rewrite the memory handler to be more robust. Date: Tue, 29 Oct 2019 21:15:00 -0000 Message-Id: <20191029211437.3268-2-mark@klomp.org> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20191029211437.3268-1-mark@klomp.org> References: <1572380520.19948.0@rice.edu> <20191029211437.3268-1-mark@klomp.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Spam-Flag: NO X-IsSubscribed: yes X-SW-Source: 2019-q4/txt/msg00077.txt.bz2 From: Jonathon Anderson Pthread's thread-local variables are highly limited, which makes it difficult to use many Dwarfs. This replaces that with a less efficient (or elegant) but more robust method. Signed-off-by: Jonathon Anderson --- lib/atomics.h | 2 ++ libdw/ChangeLog | 9 ++++++ libdw/dwarf_begin_elf.c | 7 +++-- libdw/dwarf_end.c | 24 +++++++------- libdw/libdwP.h | 67 +++++++++++++++++++-------------------- libdw/libdw_alloc.c | 69 ++++++++++++++++++++++++++++++++++++++--- 6 files changed, 125 insertions(+), 53 deletions(-) diff --git a/lib/atomics.h b/lib/atomics.h index ffd12f87..e3773759 100644 --- a/lib/atomics.h +++ b/lib/atomics.h @@ -31,7 +31,9 @@ #if HAVE_STDATOMIC_H /* If possible, use the compiler's preferred atomics. */ # include +# include #else /* Otherwise, try to use the builtins provided by this compiler. */ # include "stdatomic-fbsd.h" +# define thread_local __thread #endif /* HAVE_STDATOMIC_H */ diff --git a/libdw/ChangeLog b/libdw/ChangeLog index 394c0df2..8de8a837 100644 --- a/libdw/ChangeLog +++ b/libdw/ChangeLog @@ -1,3 +1,12 @@ +2019-10-28 Jonathon Anderson + + * libdw_alloc.c: Added __libdw_alloc_tail. + (__libdw_allocate): Switch to use the mem_tails array. + * libdwP.h (Dwarf): Likewise. + * dwarf_begin_elf.c (dwarf_begin_elf): Support for above. + * dwarf_end.c (dwarf_end): Likewise. + * atomics.h: Add support for thread_local. + 2019-08-26 Jonathon Anderson * libdw_alloc.c (__libdw_allocate): Added thread-safe stack allocator. diff --git a/libdw/dwarf_begin_elf.c b/libdw/dwarf_begin_elf.c index 8d137414..eadff13b 100644 --- a/libdw/dwarf_begin_elf.c +++ b/libdw/dwarf_begin_elf.c @@ -418,13 +418,14 @@ dwarf_begin_elf (Elf *elf, Dwarf_Cmd cmd, Elf_Scn *scngrp) actual allocation. */ result->mem_default_size = mem_default_size; result->oom_handler = __libdw_oom; - if (pthread_key_create (&result->mem_key, NULL) != 0) + if (pthread_rwlock_init(&result->mem_rwl, NULL) != 0) { free (result); - __libdw_seterrno (DWARF_E_NOMEM); /* no memory or max pthread keys. */ + __libdw_seterrno (DWARF_E_NOMEM); /* no memory. */ return NULL; } - atomic_init (&result->mem_tail, (uintptr_t)NULL); + result->mem_stacks = 0; + result->mem_tails = NULL; if (cmd == DWARF_C_READ || cmd == DWARF_C_RDWR) { diff --git a/libdw/dwarf_end.c b/libdw/dwarf_end.c index a2e94436..3fd2836d 100644 --- a/libdw/dwarf_end.c +++ b/libdw/dwarf_end.c @@ -95,17 +95,19 @@ dwarf_end (Dwarf *dwarf) tdestroy (dwarf->split_tree, noop_free); /* Free the internally allocated memory. */ - struct libdw_memblock *memp; - memp = (struct libdw_memblock *) (atomic_load_explicit - (&dwarf->mem_tail, - memory_order_relaxed)); - while (memp != NULL) - { - struct libdw_memblock *prevp = memp->prev; - free (memp); - memp = prevp; - } - pthread_key_delete (dwarf->mem_key); + for (size_t i = 0; i < dwarf->mem_stacks; i++) + { + struct libdw_memblock *memp = dwarf->mem_tails[i]; + while (memp != NULL) + { + struct libdw_memblock *prevp = memp->prev; + free (memp); + memp = prevp; + } + } + if (dwarf->mem_tails != NULL) + free (dwarf->mem_tails); + pthread_rwlock_destroy (&dwarf->mem_rwl); /* Free the pubnames helper structure. */ free (dwarf->pubnames_sets); diff --git a/libdw/libdwP.h b/libdw/libdwP.h index ad2599eb..3e1ef59b 100644 --- a/libdw/libdwP.h +++ b/libdw/libdwP.h @@ -149,17 +149,6 @@ enum #include "dwarf_sig8_hash.h" -/* Structure for internal memory handling. This is basically a simplified - reimplementation of obstacks. Unfortunately the standard obstack - implementation is not usable in libraries. */ -struct libdw_memblock -{ - size_t size; - size_t remaining; - struct libdw_memblock *prev; - char mem[0]; -}; - /* This is the structure representing the debugging state. */ struct Dwarf { @@ -231,11 +220,22 @@ struct Dwarf /* Similar for addrx/constx, which will come from .debug_addr section. */ struct Dwarf_CU *fake_addr_cu; - /* Internal memory handling. Each thread allocates separately and only - allocates from its own blocks, while all the blocks are pushed atomically - onto a unified stack for easy deallocation. */ - pthread_key_t mem_key; - atomic_uintptr_t mem_tail; + /* Supporting lock for internal memory handling. Ensures threads that have + an entry in the mem_tails array are not disturbed by new threads doing + allocations for this Dwarf. */ + pthread_rwlock_t mem_rwl; + + /* Internal memory handling. This is basically a simplified thread-local + reimplementation of obstacks. Unfortunately the standard obstack + implementation is not usable in libraries. */ + size_t mem_stacks; + struct libdw_memblock + { + size_t size; + size_t remaining; + struct libdw_memblock *prev; + char mem[0]; + } **mem_tails; /* Default size of allocated memory blocks. */ size_t mem_default_size; @@ -578,34 +578,31 @@ libdw_valid_user_form (int form) extern void __libdw_seterrno (int value) internal_function; -/* Memory handling, the easy parts. This macro does not do nor need to do any - locking for proper concurrent operation. */ +/* Memory handling, the easy parts. */ #define libdw_alloc(dbg, type, tsize, cnt) \ - ({ struct libdw_memblock *_tail = pthread_getspecific (dbg->mem_key); \ - size_t _req = (tsize) * (cnt); \ - type *_result; \ - if (unlikely (_tail == NULL)) \ - _result = (type *) __libdw_allocate (dbg, _req, __alignof (type)); \ + ({ struct libdw_memblock *_tail = __libdw_alloc_tail(dbg); \ + size_t _required = (tsize) * (cnt); \ + type *_result = (type *) (_tail->mem + (_tail->size - _tail->remaining));\ + size_t _padding = ((__alignof (type) \ + - ((uintptr_t) _result & (__alignof (type) - 1))) \ + & (__alignof (type) - 1)); \ + if (unlikely (_tail->remaining < _required + _padding)) \ + _result = (type *) __libdw_allocate (dbg, _required, __alignof (type));\ else \ { \ - _result = (type *) (_tail->mem + (_tail->size - _tail->remaining)); \ - size_t _padding = ((__alignof (type) \ - - ((uintptr_t) _result & (__alignof (type) - 1))) \ - & (__alignof (type) - 1)); \ - if (unlikely (_tail->remaining < _req + _padding)) \ - _result = (type *) __libdw_allocate (dbg, _req, __alignof (type)); \ - else \ - { \ - _req += _padding; \ - _result = (type *) ((char *) _result + _padding); \ - _tail->remaining -= _req; \ - } \ + _required += _padding; \ + _result = (type *) ((char *) _result + _padding); \ + _tail->remaining -= _required; \ } \ _result; }) #define libdw_typed_alloc(dbg, type) \ libdw_alloc (dbg, type, sizeof (type), 1) +/* Callback to choose a thread-local memory allocation stack. */ +extern struct libdw_memblock *__libdw_alloc_tail (Dwarf* dbg) + __nonnull_attribute__ (1); + /* Callback to allocate more. */ extern void *__libdw_allocate (Dwarf *dbg, size_t minsize, size_t align) __attribute__ ((__malloc__)) __nonnull_attribute__ (1); diff --git a/libdw/libdw_alloc.c b/libdw/libdw_alloc.c index f2e74d18..86ca7032 100644 --- a/libdw/libdw_alloc.c +++ b/libdw/libdw_alloc.c @@ -35,7 +35,68 @@ #include #include "libdwP.h" #include "system.h" +#include "atomics.h" +#if USE_VG_ANNOTATIONS == 1 +#include +#include +#else +#define ANNOTATE_HAPPENS_BEFORE(X) +#define ANNOTATE_HAPPENS_AFTER(X) +#endif +#define THREAD_ID_UNSET ((size_t) -1) +static thread_local size_t thread_id = THREAD_ID_UNSET; +static atomic_size_t next_id = ATOMIC_VAR_INIT(0); + +struct libdw_memblock * +__libdw_alloc_tail (Dwarf *dbg) +{ + if (thread_id == THREAD_ID_UNSET) + thread_id = atomic_fetch_add (&next_id, 1); + + pthread_rwlock_rdlock (&dbg->mem_rwl); + if (thread_id >= dbg->mem_stacks) + { + pthread_rwlock_unlock (&dbg->mem_rwl); + pthread_rwlock_wrlock (&dbg->mem_rwl); + + /* Another thread may have already reallocated. In theory using an + atomic would be faster, but given that this only happens once per + thread per Dwarf, some minor slowdown should be fine. */ + if (thread_id >= dbg->mem_stacks) + { + dbg->mem_tails = realloc (dbg->mem_tails, (thread_id+1) + * sizeof (struct libdw_memblock *)); + if (dbg->mem_tails == NULL) + { + pthread_rwlock_unlock (&dbg->mem_rwl); + dbg->oom_handler(); + } + for (size_t i = dbg->mem_stacks; i <= thread_id; i++) + dbg->mem_tails[i] = NULL; + dbg->mem_stacks = thread_id + 1; + ANNOTATE_HAPPENS_BEFORE (&dbg->mem_tails); + } + + pthread_rwlock_unlock (&dbg->mem_rwl); + pthread_rwlock_rdlock (&dbg->mem_rwl); + } + + /* At this point, we have an entry in the tail array. */ + ANNOTATE_HAPPENS_AFTER (&dbg->mem_tails); + struct libdw_memblock *result = dbg->mem_tails[thread_id]; + if (result == NULL) + { + result = malloc (dbg->mem_default_size); + result->size = dbg->mem_default_size + - offsetof (struct libdw_memblock, mem); + result->remaining = result->size; + result->prev = NULL; + dbg->mem_tails[thread_id] = result; + } + pthread_rwlock_unlock (&dbg->mem_rwl); + return result; +} void * __libdw_allocate (Dwarf *dbg, size_t minsize, size_t align) @@ -52,10 +113,10 @@ __libdw_allocate (Dwarf *dbg, size_t minsize, size_t align) newp->size = size - offsetof (struct libdw_memblock, mem); newp->remaining = (uintptr_t) newp + size - (result + minsize); - newp->prev = (struct libdw_memblock*)atomic_exchange_explicit( - &dbg->mem_tail, (uintptr_t)newp, memory_order_relaxed); - if (pthread_setspecific (dbg->mem_key, newp) != 0) - dbg->oom_handler (); + pthread_rwlock_rdlock (&dbg->mem_rwl); + newp->prev = dbg->mem_tails[thread_id]; + dbg->mem_tails[thread_id] = newp; + pthread_rwlock_unlock (&dbg->mem_rwl); return (void *) result; } -- 2.20.1