From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 19318 invoked by alias); 27 Oct 2006 08:35:17 -0000 Received: (qmail 19301 invoked by uid 22791); 27 Oct 2006 08:35:17 -0000 X-Spam-Check-By: sourceware.org Received: from sunsite.ms.mff.cuni.cz (HELO sunsite.mff.cuni.cz) (195.113.15.26) by sourceware.org (qpsmtpd/0.31) with ESMTP; Fri, 27 Oct 2006 08:35:11 +0000 Received: from sunsite.mff.cuni.cz (sunsite.mff.cuni.cz [127.0.0.1]) by sunsite.mff.cuni.cz (8.13.1/8.13.1) with ESMTP id k9R8YoX1015593; Fri, 27 Oct 2006 10:34:50 +0200 Received: (from jj@localhost) by sunsite.mff.cuni.cz (8.13.1/8.13.1/Submit) id k9R8Yo0e015592; Fri, 27 Oct 2006 10:34:50 +0200 Date: Fri, 27 Oct 2006 08:35:00 -0000 From: Jakub Jelinek To: Ulrich Drepper Cc: Glibc hackers Subject: [PATCH] Speed up _dl_{,profile_}fixup Message-ID: <20061027083450.GQ5868@sunsite.mff.cuni.cz> Reply-To: Jakub Jelinek Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.4.1i Mailing-List: contact libc-hacker-help@sourceware.org; run by ezmlm Precedence: bulk List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-hacker-owner@sourceware.org X-SW-Source: 2006-10/txt/msg00019.txt.bz2 Hi! The 4 atomic instructions in _dl_fixup common case (iff lookup up something in dlopened library and pthread_create has been already called) are noticeable, this patch changes it to only 2 in the common case, at the expense that dlopen or dlclose might now wait slightly longer (until all currently pending lazy binding lookups and/or dl{,v}sym calls finish). 2006-10-27 Jakub Jelinek * elf/dl-libc.c: Revert l_scope name changes. * elf/dl-load.c: Likewise. * elf/dl-object.c: Likewise. * elf/rtld.c: Likewise. * elf/dl-close.c (_dl_close): Likewise. * elf/dl-open.c (dl_open_worker): Likewise. If not SINGLE_THREAD_P, always use __rtld_mrlock_{change,done}. Always free old scope list here if not l_scope_mem. * elf/dl-runtime.c (_dl_fixup, _dl_profile_fixup): Revert l_scope name change. Never free scope list here. Just __rtld_mrlock_lock before the lookup and __rtld_mrlock_unlock it after the lookup. * elf/dl-sym.c: Likewise. * include/link.h (struct r_scoperec): Remove. (struct link_map): Replace l_scoperec with l_scope, l_scoperec_mem with l_scope_mem and l_scoperec_lock with l_scope_lock. --- libc/elf/dl-close.c.jj 2006-10-19 17:28:01.000000000 +0200 +++ libc/elf/dl-close.c 2006-10-26 15:17:59.000000000 +0200 @@ -343,14 +343,14 @@ _dl_close (void *_map) one for the terminating NULL pointer. */ size_t remain = (new_list != NULL) + 1; bool removed_any = false; - for (size_t cnt = 0; imap->l_scoperec->scope[cnt] != NULL; ++cnt) + for (size_t cnt = 0; imap->l_scope[cnt] != NULL; ++cnt) /* This relies on l_scope[] entries being always set either to its own l_symbolic_searchlist address, or some map's l_searchlist address. */ - if (imap->l_scoperec->scope[cnt] != &imap->l_symbolic_searchlist) + if (imap->l_scope[cnt] != &imap->l_symbolic_searchlist) { struct link_map *tmap = (struct link_map *) - ((char *) imap->l_scoperec->scope[cnt] + ((char *) imap->l_scope[cnt] - offsetof (struct link_map, l_searchlist)); assert (tmap->l_ns == ns); if (tmap->l_idx == IDX_STILL_USED) @@ -368,38 +368,35 @@ _dl_close (void *_map) user of the current array. If possible use the link map's memory. */ size_t new_size; - struct r_scoperec *newp; - if (imap->l_scoperec != &imap->l_scoperec_mem - && remain < NINIT_SCOPE_ELEMS (imap) - && imap->l_scoperec_mem.nusers == 0) + struct r_scope_elem **newp; + +#define SCOPE_ELEMS(imap) \ + (sizeof (imap->l_scope_mem) / sizeof (imap->l_scope_mem[0])) + + if (imap->l_scope != imap->l_scope_mem + && remain < SCOPE_ELEMS (imap)) { - new_size = NINIT_SCOPE_ELEMS (imap); - newp = &imap->l_scoperec_mem; + new_size = SCOPE_ELEMS (imap); + newp = imap->l_scope_mem; } else { new_size = imap->l_scope_max; - newp = (struct r_scoperec *) - malloc (sizeof (struct r_scoperec) - + new_size * sizeof (struct r_scope_elem *)); + newp = (struct r_scope_elem **) + malloc (new_size * sizeof (struct r_scope_elem *)); if (newp == NULL) _dl_signal_error (ENOMEM, "dlclose", NULL, N_("cannot create scope list")); } - newp->nusers = 0; - newp->remove_after_use = false; - newp->notify = false; - /* Copy over the remaining scope elements. */ remain = 0; - for (size_t cnt = 0; imap->l_scoperec->scope[cnt] != NULL; ++cnt) + for (size_t cnt = 0; imap->l_scope[cnt] != NULL; ++cnt) { - if (imap->l_scoperec->scope[cnt] - != &imap->l_symbolic_searchlist) + if (imap->l_scope[cnt] != &imap->l_symbolic_searchlist) { struct link_map *tmap = (struct link_map *) - ((char *) imap->l_scoperec->scope[cnt] + ((char *) imap->l_scope[cnt] - offsetof (struct link_map, l_searchlist)); if (tmap->l_idx != IDX_STILL_USED) { @@ -407,38 +404,30 @@ _dl_close (void *_map) scope. */ if (new_list != NULL) { - newp->scope[remain++] = new_list; + newp[remain++] = new_list; new_list = NULL; } continue; } } - newp->scope[remain++] = imap->l_scoperec->scope[cnt]; + newp[remain++] = imap->l_scope[cnt]; } - newp->scope[remain] = NULL; + newp[remain] = NULL; - struct r_scoperec *old = imap->l_scoperec; + struct r_scope_elem **old = imap->l_scope; if (SINGLE_THREAD_P) - imap->l_scoperec = newp; + imap->l_scope = newp; else { - __rtld_mrlock_change (imap->l_scoperec_lock); - imap->l_scoperec = newp; - __rtld_mrlock_done (imap->l_scoperec_lock); - - if (atomic_increment_val (&old->nusers) != 1) - { - old->remove_after_use = true; - old->notify = true; - if (atomic_decrement_val (&old->nusers) != 0) - __rtld_waitzero (old->nusers); - } + __rtld_mrlock_change (imap->l_scope_lock); + imap->l_scope = newp; + __rtld_mrlock_done (imap->l_scope_lock); } /* No user anymore, we can free it now. */ - if (old != &imap->l_scoperec_mem) + if (old != imap->l_scope_mem) free (old); imap->l_scope_max = new_size; @@ -652,8 +641,8 @@ _dl_close (void *_map) free (imap->l_initfini); /* Remove the scope array if we allocated it. */ - if (imap->l_scoperec != &imap->l_scoperec_mem) - free (imap->l_scoperec); + if (imap->l_scope != imap->l_scope_mem) + free (imap->l_scope); if (imap->l_phdr_allocated) free ((void *) imap->l_phdr); --- libc/elf/dl-runtime.c.jj 2006-10-19 17:28:02.000000000 +0200 +++ libc/elf/dl-runtime.c 2006-10-26 14:24:18.000000000 +0200 @@ -93,29 +93,15 @@ _dl_fixup ( version = NULL; } - struct r_scoperec *scoperec = l->l_scoperec; if (l->l_type == lt_loaded && !SINGLE_THREAD_P) - { - __rtld_mrlock_lock (l->l_scoperec_lock); - scoperec = l->l_scoperec; - atomic_increment (&scoperec->nusers); - __rtld_mrlock_unlock (l->l_scoperec_lock); - } + __rtld_mrlock_lock (l->l_scope_lock); result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym, - scoperec->scope, version, - ELF_RTYPE_CLASS_PLT, + l->l_scope, version, ELF_RTYPE_CLASS_PLT, DL_LOOKUP_ADD_DEPENDENCY, NULL); - if (l->l_type == lt_loaded && !SINGLE_THREAD_P - && atomic_decrement_val (&scoperec->nusers) == 0 - && __builtin_expect (scoperec->remove_after_use, 0)) - { - if (scoperec->notify) - __rtld_notify (scoperec->nusers); - else - free (scoperec); - } + if (l->l_type == lt_loaded && !SINGLE_THREAD_P) + __rtld_mrlock_unlock (l->l_scope_lock); /* Currently result contains the base load address (or link map) of the object that defines sym. Now add in the symbol @@ -195,29 +181,16 @@ _dl_profile_fixup ( version = NULL; } - struct r_scoperec *scoperec = l->l_scoperec; if (l->l_type == lt_loaded && !SINGLE_THREAD_P) - { - __rtld_mrlock_lock (l->l_scoperec_lock); - scoperec = l->l_scoperec; - atomic_increment (&scoperec->nusers); - __rtld_mrlock_unlock (l->l_scoperec_lock); - } + __rtld_mrlock_lock (l->l_scope_lock); result = _dl_lookup_symbol_x (strtab + refsym->st_name, l, &defsym, - scoperec->scope, version, + l->l_scope, version, ELF_RTYPE_CLASS_PLT, DL_LOOKUP_ADD_DEPENDENCY, NULL); - if (l->l_type == lt_loaded && !SINGLE_THREAD_P - && atomic_decrement_val (&scoperec->nusers) == 0 - && __builtin_expect (scoperec->remove_after_use, 0)) - { - if (scoperec->notify) - __rtld_notify (scoperec->nusers); - else - free (scoperec); - } + if (l->l_type == lt_loaded && !SINGLE_THREAD_P) + __rtld_mrlock_unlock (l->l_scope_lock); /* Currently result contains the base load address (or link map) of the object that defines sym. Now add in the symbol --- libc/elf/dl-libc.c.jj 2006-10-19 17:28:02.000000000 +0200 +++ libc/elf/dl-libc.c 2006-10-26 14:19:53.000000000 +0200 @@ -133,8 +133,7 @@ do_dlsym_private (void *ptr) struct do_dlsym_args *args = (struct do_dlsym_args *) ptr; args->ref = NULL; l = GLRO(dl_lookup_symbol_x) (args->name, args->map, &args->ref, - args->map->l_scoperec->scope, &vers, 0, 0, - NULL); + args->map->l_scope, &vers, 0, 0, NULL); args->loadbase = l; } --- libc/elf/dl-sym.c.jj 2006-10-19 17:28:02.000000000 +0200 +++ libc/elf/dl-sym.c 2006-10-26 14:29:14.000000000 +0200 @@ -65,7 +65,6 @@ struct call_dl_lookup_args /* Arguments to do_dlsym. */ struct link_map *map; const char *name; - struct r_scope_elem **scope; struct r_found_version *vers; int flags; @@ -79,7 +78,7 @@ call_dl_lookup (void *ptr) { struct call_dl_lookup_args *args = (struct call_dl_lookup_args *) ptr; args->map = GLRO(dl_lookup_symbol_x) (args->name, args->map, args->refp, - args->scope, args->vers, 0, + args->map->l_scope, args->vers, 0, args->flags, NULL); } @@ -118,20 +117,16 @@ do_sym (void *handle, const char *name, array can change. */ if (match->l_type != lt_loaded || SINGLE_THREAD_P) result = GLRO(dl_lookup_symbol_x) (name, match, &ref, - match->l_scoperec->scope, vers, 0, + match->l_scope, vers, 0, flags | DL_LOOKUP_ADD_DEPENDENCY, NULL); else { - __rtld_mrlock_lock (match->l_scoperec_lock); - struct r_scoperec *scoperec = match->l_scoperec; - atomic_increment (&scoperec->nusers); - __rtld_mrlock_unlock (match->l_scoperec_lock); + __rtld_mrlock_lock (match->l_scope_lock); struct call_dl_lookup_args args; args.name = name; args.map = match; - args.scope = scoperec->scope; args.vers = vers; args.flags = flags | DL_LOOKUP_ADD_DEPENDENCY; args.refp = &ref; @@ -142,14 +137,7 @@ do_sym (void *handle, const char *name, int err = GLRO(dl_catch_error) (&objname, &errstring, &malloced, call_dl_lookup, &args); - if (atomic_decrement_val (&scoperec->nusers) == 0 - && __builtin_expect (scoperec->remove_after_use, 0)) - { - if (scoperec->notify) - __rtld_notify (scoperec->nusers); - else - free (scoperec); - } + __rtld_mrlock_unlock (match->l_scope_lock); if (__builtin_expect (errstring != NULL, 0)) { --- libc/elf/dl-open.c.jj 2006-10-19 17:28:02.000000000 +0200 +++ libc/elf/dl-open.c 2006-10-26 14:40:00.000000000 +0200 @@ -344,7 +344,7 @@ dl_open_worker (void *a) start the profiling. */ struct link_map *old_profile_map = GL(dl_profile_map); - _dl_relocate_object (l, l->l_scoperec->scope, 1, 1); + _dl_relocate_object (l, l->l_scope, 1, 1); if (old_profile_map == NULL && GL(dl_profile_map) != NULL) { @@ -357,7 +357,7 @@ dl_open_worker (void *a) } else #endif - _dl_relocate_object (l, l->l_scoperec->scope, lazy, 0); + _dl_relocate_object (l, l->l_scope, lazy, 0); } if (l == new) @@ -375,7 +375,7 @@ dl_open_worker (void *a) not been loaded here and now. */ if (imap->l_init_called && imap->l_type == lt_loaded) { - struct r_scope_elem **runp = imap->l_scoperec->scope; + struct r_scope_elem **runp = imap->l_scope; size_t cnt = 0; while (*runp != NULL) @@ -395,62 +395,51 @@ dl_open_worker (void *a) /* The 'r_scope' array is too small. Allocate a new one dynamically. */ size_t new_size; - struct r_scoperec *newp; + struct r_scope_elem **newp; - if (imap->l_scoperec != &imap->l_scoperec_mem - && imap->l_scope_max < NINIT_SCOPE_ELEMS (imap) - && imap->l_scoperec_mem.nusers == 0) +#define SCOPE_ELEMS(imap) \ + (sizeof (imap->l_scope_mem) / sizeof (imap->l_scope_mem[0])) + + if (imap->l_scope != imap->l_scope_mem + && imap->l_scope_max < SCOPE_ELEMS (imap)) { - new_size = NINIT_SCOPE_ELEMS (imap); - newp = &imap->l_scoperec_mem; + new_size = SCOPE_ELEMS (imap); + newp = imap->l_scope_mem; } else { new_size = imap->l_scope_max * 2; - newp = (struct r_scoperec *) - malloc (sizeof (struct r_scoperec) - + new_size * sizeof (struct r_scope_elem *)); + newp = (struct r_scope_elem **) + malloc (new_size * sizeof (struct r_scope_elem *)); if (newp == NULL) _dl_signal_error (ENOMEM, "dlopen", NULL, N_("cannot create scope list")); } - newp->nusers = 0; - newp->remove_after_use = false; - newp->notify = false; - memcpy (newp->scope, imap->l_scoperec->scope, - cnt * sizeof (imap->l_scoperec->scope[0])); - struct r_scoperec *old = imap->l_scoperec; - - if (old == &imap->l_scoperec_mem) - imap->l_scoperec = newp; - else if (SINGLE_THREAD_P) - { - imap->l_scoperec = newp; - free (old); - } + memcpy (newp, imap->l_scope, cnt * sizeof (imap->l_scope[0])); + struct r_scope_elem **old = imap->l_scope; + + if (SINGLE_THREAD_P) + imap->l_scope = newp; else { - __rtld_mrlock_change (imap->l_scoperec_lock); - imap->l_scoperec = newp; - __rtld_mrlock_done (imap->l_scoperec_lock); - - atomic_increment (&old->nusers); - old->remove_after_use = true; - if (atomic_decrement_val (&old->nusers) == 0) - /* No user, we can free it here and now. */ - free (old); + __rtld_mrlock_change (imap->l_scope_lock); + imap->l_scope = newp; + __rtld_mrlock_done (imap->l_scope_lock); } + if (old != imap->l_scope_mem) + free (old); + imap->l_scope_max = new_size; } /* First terminate the extended list. Otherwise a thread might use the new last element and then use the garbage at offset IDX+1. */ - imap->l_scoperec->scope[cnt + 1] = NULL; + imap->l_scope[cnt + 1] = NULL; atomic_write_barrier (); - imap->l_scoperec->scope[cnt] = &new->l_searchlist; + imap->l_scope[cnt] = &new->l_searchlist; } #if USE_TLS /* Only add TLS memory if this object is loaded now and --- libc/elf/dl-object.c.jj 2006-10-19 17:28:02.000000000 +0200 +++ libc/elf/dl-object.c 2006-10-26 14:18:55.000000000 +0200 @@ -82,13 +82,12 @@ _dl_new_object (char *realname, const ch /* Use the 'l_scope_mem' array by default for the the 'l_scope' information. If we need more entries we will allocate a large array dynamically. */ - new->l_scoperec = &new->l_scoperec_mem; - new->l_scope_max = (sizeof (new->l_scope_realmem.scope_elems) - / sizeof (new->l_scope_realmem.scope_elems[0])); + new->l_scope = new->l_scope_mem; + new->l_scope_max = sizeof (new->l_scope_mem) / sizeof (new->l_scope_mem[0]); /* No need to initialize the scope lock if the initializer is zero. */ #if _RTLD_MRLOCK_INITIALIZER != 0 - __rtld_mrlock_initialize (new->l_scoperec_mem.lock); + __rtld_mrlock_initialize (new->l_scope_lock); #endif /* Counter for the scopes we have to handle. */ @@ -104,8 +103,7 @@ _dl_new_object (char *realname, const ch l->l_next = new; /* Add the global scope. */ - new->l_scoperec->scope[idx++] - = &GL(dl_ns)[nsid]._ns_loaded->l_searchlist; + new->l_scope[idx++] = &GL(dl_ns)[nsid]._ns_loaded->l_searchlist; } else GL(dl_ns)[nsid]._ns_loaded = new; @@ -121,15 +119,15 @@ _dl_new_object (char *realname, const ch loader = loader->l_loader; /* Insert the scope if it isn't the global scope we already added. */ - if (idx == 0 || &loader->l_searchlist != new->l_scoperec->scope[0]) + if (idx == 0 || &loader->l_searchlist != new->l_scope[0]) { if ((mode & RTLD_DEEPBIND) != 0 && idx != 0) { - new->l_scoperec->scope[1] = new->l_scoperec->scope[0]; + new->l_scope[1] = new->l_scope[0]; idx = 0; } - new->l_scoperec->scope[idx] = &loader->l_searchlist; + new->l_scope[idx] = &loader->l_searchlist; } new->l_local_scope[0] = &new->l_searchlist; --- libc/elf/rtld.c.jj 2006-10-19 17:26:37.000000000 +0200 +++ libc/elf/rtld.c 2006-10-26 14:15:17.000000000 +0200 @@ -609,7 +609,7 @@ relocate_doit (void *a) { struct relocate_args *args = (struct relocate_args *) a; - _dl_relocate_object (args->l, args->l->l_scoperec->scope, args->lazy, 0); + _dl_relocate_object (args->l, args->l->l_scope, args->lazy, 0); } static void @@ -1963,7 +1963,7 @@ ERROR: ld.so: object '%s' cannot be load lookup_t result; result = _dl_lookup_symbol_x (INTUSE(_dl_argv)[i], main_map, - &ref, main_map->l_scoperec->scope, + &ref, main_map->l_scope, NULL, ELF_RTYPE_CLASS_PLT, DL_LOOKUP_ADD_DEPENDENCY, NULL); @@ -2007,7 +2007,7 @@ ERROR: ld.so: object '%s' cannot be load /* Mark the link map as not yet relocated again. */ GL(dl_rtld_map).l_relocated = 0; _dl_relocate_object (&GL(dl_rtld_map), - main_map->l_scoperec->scope, 0, 0); + main_map->l_scope, 0, 0); } } #define VERNEEDTAG (DT_NUM + DT_THISPROCNUM + DT_VERSIONTAGIDX (DT_VERNEED)) @@ -2227,7 +2227,7 @@ ERROR: ld.so: object '%s' cannot be load } if (l != &GL(dl_rtld_map)) - _dl_relocate_object (l, l->l_scoperec->scope, GLRO(dl_lazy), + _dl_relocate_object (l, l->l_scope, GLRO(dl_lazy), consider_profiling); #ifdef USE_TLS @@ -2303,8 +2303,7 @@ ERROR: ld.so: object '%s' cannot be load HP_TIMING_NOW (start); /* Mark the link map as not yet relocated again. */ GL(dl_rtld_map).l_relocated = 0; - _dl_relocate_object (&GL(dl_rtld_map), main_map->l_scoperec->scope, - 0, 0); + _dl_relocate_object (&GL(dl_rtld_map), main_map->l_scope, 0, 0); HP_TIMING_NOW (stop); HP_TIMING_DIFF (add, start, stop); HP_TIMING_ACCUM_NT (relocate_time, add); --- libc/elf/dl-load.c.jj 2006-10-19 17:28:02.000000000 +0200 +++ libc/elf/dl-load.c 2006-10-26 14:20:38.000000000 +0200 @@ -1473,7 +1473,7 @@ cannot enable executable stack as shared have to do this for the main map. */ if ((mode & RTLD_DEEPBIND) == 0 && __builtin_expect (l->l_info[DT_SYMBOLIC] != NULL, 0) - && &l->l_searchlist != l->l_scoperec->scope[0]) + && &l->l_searchlist != l->l_scope[0]) { /* Create an appropriate searchlist. It contains only this map. This is the definition of DT_SYMBOLIC in SysVr4. */ @@ -1490,11 +1490,11 @@ cannot enable executable stack as shared l->l_symbolic_searchlist.r_nlist = 1; /* Now move the existing entries one back. */ - memmove (&l->l_scoperec->scope[1], &l->l_scoperec->scope[0], - (l->l_scope_max - 1) * sizeof (l->l_scoperec->scope[0])); + memmove (&l->l_scope[1], &l->l_scope[0], + (l->l_scope_max - 1) * sizeof (l->l_scope[0])); /* Now add the new entry. */ - l->l_scoperec->scope[0] = &l->l_symbolic_searchlist; + l->l_scope[0] = &l->l_symbolic_searchlist; } /* Remember whether this object must be initialized first. */ --- libc/include/link.h.jj 2006-10-19 17:28:02.000000000 +0200 +++ libc/include/link.h 2006-10-26 14:00:56.000000000 +0200 @@ -75,18 +75,6 @@ struct r_search_path_struct }; -/* Structure for a scope. Each such data structure has a lock. The - lock allows many readers. It can be invalidated by setting bit 31 - which means that no more lockers are allowe */ -struct r_scoperec -{ - bool remove_after_use; - bool notify; - int nusers; - struct r_scope_elem *scope[0]; -}; - - /* Structure describing a loaded shared object. The `l_next' and `l_prev' members form a chain of all the shared objects loaded at startup. @@ -226,27 +214,14 @@ struct link_map ElfW(Addr) l_text_end; /* Default array for 'l_scope'. */ - union - { - struct r_scoperec l_scoperec_mem; - struct - { - struct r_scoperec scoperec_struct; - /* XXX This number should be increased once the scope memory - handling has been tested. */ - struct r_scope_elem *scope_elems[4]; -#define NINIT_SCOPE_ELEMS(map) \ - (sizeof ((map)->l_scope_realmem.scope_elems) \ - / sizeof ((map)->l_scope_realmem.scope_elems[0])) - } l_scope_realmem; - }; + struct r_scope_elem *l_scope_mem[4]; /* Size of array allocated for 'l_scope'. */ size_t l_scope_max; /* This is an array defining the lookup scope for this link map. There are initially at most three different scope lists. */ - struct r_scoperec *l_scoperec; + struct r_scope_elem **l_scope; /* We need to protect using the SCOPEREC. */ - __rtld_mrlock_define (, l_scoperec_lock) + __rtld_mrlock_define (, l_scope_lock) /* A similar array, this time only with the local scope. This is used occasionally. */ Jakub