* [PATCH] Speed up ___tls_get_addr
@ 2002-11-10 23:37 Jakub Jelinek
2002-11-11 0:48 ` Roland McGrath
0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2002-11-10 23:37 UTC (permalink / raw)
To: Glibc hackers
[-- Attachment #1: Type: text/plain, Size: 78 bytes --]
Hi!
Sending here just to get it archived.
Lets get back to it later.
Jakub
[-- Attachment #2: P1 --]
[-- Type: text/plain, Size: 13086 bytes --]
2002-11-10 Jakub Jelinek <jakub@redhat.com>
* sysdeps/generic/dl-tls.c (allocate_and_init): Change argument to
modid. Move part of code from __tls_get_addr here.
(tls_new_generation): New function. Moved from __tls_get_addr.
(__tls_get_addr): Move the slow path to separate functions.
--- libc/sysdeps/generic/dl-tls.c.jj 2002-11-05 23:10:28.000000000 +0100
+++ libc/sysdeps/generic/dl-tls.c 2002-11-09 22:17:31.000000000 +0100
@@ -421,10 +421,22 @@ _dl_tls_symaddr (struct link_map *map, c
static void *
-allocate_and_init (struct link_map *map)
+__attribute_noinline__ __attribute_used__
+allocate_and_init (size_t modid)
{
+ struct link_map *map;
+ size_t idx = modid;
void *newp;
+ dtv_t *dtv = THREAD_DTV ();
+ struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
+ /* Find the link map for this module. */
+ while (idx >= listp->len)
+ {
+ idx -= listp->len;
+ listp = listp->next;
+ }
+ map = listp->slotinfo[idx].map;
newp = __libc_memalign (map->l_tls_align, map->l_tls_blocksize);
if (newp == NULL)
oom ();
@@ -433,9 +445,158 @@ allocate_and_init (struct link_map *map)
memset (__mempcpy (newp, map->l_tls_initimage, map->l_tls_initimage_size),
'\0', map->l_tls_blocksize - map->l_tls_initimage_size);
+ dtv[modid].pointer = newp;
return newp;
}
+static dtv_t *
+__attribute_noinline__ __attribute_used__
+tls_new_generation (dtv_t *dtv, size_t modid)
+{
+ struct dtv_slotinfo_list *listp;
+ size_t idx;
+
+ /* The global dl_tls_dtv_slotinfo array contains for each module
+ index the generation counter current when the entry was
+ created. This array never shrinks so that all module indices
+ which were valid at some time can be used to access it.
+ Before the first use of a new module index in this function
+ the array was extended appropriately. Access also does not
+ have to be guarded against modifications of the array. It is
+ assumed that pointer-size values can be read atomically even
+ in SMP environments. It is possible that other threads at
+ the same time dynamically load code and therefore add to the
+ slotinfo list. This is a problem since we must not pick up
+ any information about incomplete work. The solution to this
+ is to ignore all dtv slots which were created after the one
+ we are currently interested. We know that dynamic loading
+ for this module is completed and this is the last load
+ operation we know finished. */
+ idx = modid;
+ listp = GL(dl_tls_dtv_slotinfo_list);
+ while (idx >= listp->len)
+ {
+ idx -= listp->len;
+ listp = listp->next;
+ }
+
+ if (dtv[0].counter < listp->slotinfo[idx].gen)
+ {
+ /* The generation counter for the slot is higher than what
+ the current dtv implements. We have to update the whole
+ dtv but only those entries with a generation counter <=
+ the one for the entry we need. */
+ size_t new_gen = listp->slotinfo[idx].gen;
+ size_t total = 0;
+
+ /* We have to look through the entire dtv slotinfo list. */
+ listp = GL(dl_tls_dtv_slotinfo_list);
+ do
+ {
+ size_t cnt;
+
+ for (cnt = total = 0 ? 1 : 0; cnt < listp->len; ++cnt)
+ {
+ size_t gen = listp->slotinfo[cnt].gen;
+ struct link_map *map;
+
+ if (gen > new_gen)
+ /* This is a slot for a generation younger than
+ the one we are handling now. It might be
+ incompletely set up so ignore it. */
+ continue;
+
+ /* If the entry is older than the current dtv layout
+ we know we don't have to handle it. */
+ if (gen <= dtv[0].counter)
+ continue;
+
+ /* If there is no map this means the entry is empty. */
+ map = listp->slotinfo[cnt].map;
+ if (map == NULL)
+ {
+ /* If this modid was used at some point the memory
+ might still be allocated. */
+ if (dtv[total + cnt].pointer != TLS_DTV_UNALLOCATED)
+ {
+ free (dtv[total + cnt].pointer);
+ dtv[total + cnt].pointer = TLS_DTV_UNALLOCATED;
+ }
+
+ continue;
+ }
+
+ /* Check whether the current dtv array is large enough. */
+ modid = map->l_tls_modid;
+ assert (total + cnt == modid);
+ if (dtv[-1].counter < modid)
+ {
+ /* Reallocate the dtv. */
+ dtv_t *newp;
+ size_t newsize = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
+ size_t oldsize = dtv[-1].counter;
+
+ assert (map->l_tls_modid <= newsize);
+
+ if (dtv == GL(dl_initial_dtv))
+ {
+ /* This is the initial dtv that was allocated
+ during rtld startup using the dl-minimal.c
+ malloc instead of the real malloc. We can't
+ free it, we have to abandon the old storage. */
+
+ newp = malloc ((2 + newsize) * sizeof (dtv_t));
+ if (newp == NULL)
+ oom ();
+ memcpy (newp, &dtv[-1], oldsize * sizeof (dtv_t));
+ }
+ else
+ {
+ newp = realloc (&dtv[-1],
+ (2 + newsize) * sizeof (dtv_t));
+ if (newp == NULL)
+ oom ();
+ }
+
+ newp[0].counter = newsize;
+
+ /* Clear the newly allocated part. */
+ memset (newp + 2 + oldsize, '\0',
+ (newsize - oldsize) * sizeof (dtv_t));
+
+ /* Point dtv to the generation counter. */
+ dtv = &newp[1];
+
+ /* Install this new dtv in the thread data
+ structures. */
+ INSTALL_NEW_DTV (dtv);
+ }
+
+ /* If there is currently memory allocate for this
+ dtv entry free it. */
+ /* XXX Ideally we will at some point create a memory
+ pool. */
+ if (dtv[modid].pointer != TLS_DTV_UNALLOCATED)
+ /* Note that free is called for NULL is well. We
+ deallocate even if it is this dtv entry we are
+ supposed to load. The reason is that we call
+ memalign and not malloc. */
+ free (dtv[modid].pointer);
+
+ /* This module is loaded dynamically- We defer
+ memory allocation. */
+ dtv[modid].pointer = TLS_DTV_UNALLOCATED;
+ }
+
+ total += listp->len;
+ }
+ while ((listp = listp->next) != NULL);
+
+ /* This will be the new maximum generation counter. */
+ dtv[0].counter = new_gen;
+ }
+ return dtv;
+}
/* The generic dynamic and local dynamic model cannot be used in
statically linked applications. */
@@ -443,181 +604,15 @@ void *
__tls_get_addr (GET_ADDR_ARGS)
{
dtv_t *dtv = THREAD_DTV ();
- struct link_map *the_map = NULL;
void *p;
if (__builtin_expect (dtv[0].counter != GL(dl_tls_generation), 0))
- {
- struct dtv_slotinfo_list *listp;
- size_t idx;
-
- /* The global dl_tls_dtv_slotinfo array contains for each module
- index the generation counter current when the entry was
- created. This array never shrinks so that all module indices
- which were valid at some time can be used to access it.
- Before the first use of a new module index in this function
- the array was extended appropriately. Access also does not
- have to be guarded against modifications of the array. It is
- assumed that pointer-size values can be read atomically even
- in SMP environments. It is possible that other threads at
- the same time dynamically load code and therefore add to the
- slotinfo list. This is a problem since we must not pick up
- any information about incomplete work. The solution to this
- is to ignore all dtv slots which were created after the one
- we are currently interested. We know that dynamic loading
- for this module is completed and this is the last load
- operation we know finished. */
- idx = GET_ADDR_MODULE;
- listp = GL(dl_tls_dtv_slotinfo_list);
- while (idx >= listp->len)
- {
- idx -= listp->len;
- listp = listp->next;
- }
-
- if (dtv[0].counter < listp->slotinfo[idx].gen)
- {
- /* The generation counter for the slot is higher than what
- the current dtv implements. We have to update the whole
- dtv but only those entries with a generation counter <=
- the one for the entry we need. */
- size_t new_gen = listp->slotinfo[idx].gen;
- size_t total = 0;
-
- /* We have to look through the entire dtv slotinfo list. */
- listp = GL(dl_tls_dtv_slotinfo_list);
- do
- {
- size_t cnt;
-
- for (cnt = total = 0 ? 1 : 0; cnt < listp->len; ++cnt)
- {
- size_t gen = listp->slotinfo[cnt].gen;
- struct link_map *map;
- size_t modid;
-
- if (gen > new_gen)
- /* This is a slot for a generation younger than
- the one we are handling now. It might be
- incompletely set up so ignore it. */
- continue;
-
- /* If the entry is older than the current dtv layout
- we know we don't have to handle it. */
- if (gen <= dtv[0].counter)
- continue;
-
- /* If there is no map this means the entry is empty. */
- map = listp->slotinfo[cnt].map;
- if (map == NULL)
- {
- /* If this modid was used at some point the memory
- might still be allocated. */
- if (dtv[total + cnt].pointer != TLS_DTV_UNALLOCATED)
- {
- free (dtv[total + cnt].pointer);
- dtv[total + cnt].pointer = TLS_DTV_UNALLOCATED;
- }
-
- continue;
- }
-
- /* Check whether the current dtv array is large enough. */
- modid = map->l_tls_modid;
- assert (total + cnt == modid);
- if (dtv[-1].counter < modid)
- {
- /* Reallocate the dtv. */
- dtv_t *newp;
- size_t newsize = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
- size_t oldsize = dtv[-1].counter;
-
- assert (map->l_tls_modid <= newsize);
-
- if (dtv == GL(dl_initial_dtv))
- {
- /* This is the initial dtv that was allocated
- during rtld startup using the dl-minimal.c
- malloc instead of the real malloc. We can't
- free it, we have to abandon the old storage. */
-
- newp = malloc ((2 + newsize) * sizeof (dtv_t));
- if (newp == NULL)
- oom ();
- memcpy (newp, &dtv[-1], oldsize * sizeof (dtv_t));
- }
- else
- {
- newp = realloc (&dtv[-1],
- (2 + newsize) * sizeof (dtv_t));
- if (newp == NULL)
- oom ();
- }
-
- newp[0].counter = newsize;
-
- /* Clear the newly allocated part. */
- memset (newp + 2 + oldsize, '\0',
- (newsize - oldsize) * sizeof (dtv_t));
-
- /* Point dtv to the generation counter. */
- dtv = &newp[1];
-
- /* Install this new dtv in the thread data
- structures. */
- INSTALL_NEW_DTV (dtv);
- }
-
- /* If there is currently memory allocate for this
- dtv entry free it. */
- /* XXX Ideally we will at some point create a memory
- pool. */
- if (dtv[modid].pointer != TLS_DTV_UNALLOCATED)
- /* Note that free is called for NULL is well. We
- deallocate even if it is this dtv entry we are
- supposed to load. The reason is that we call
- memalign and not malloc. */
- free (dtv[modid].pointer);
-
- /* This module is loaded dynamically- We defer
- memory allocation. */
- dtv[modid].pointer = TLS_DTV_UNALLOCATED;
-
- if (modid == GET_ADDR_MODULE)
- the_map = map;
- }
-
- total += listp->len;
- }
- while ((listp = listp->next) != NULL);
-
- /* This will be the new maximum generation counter. */
- dtv[0].counter = new_gen;
- }
- }
+ dtv = tls_new_generation (dtv, GET_ADDR_MODULE);
p = dtv[GET_ADDR_MODULE].pointer;
if (__builtin_expect (p == TLS_DTV_UNALLOCATED, 0))
- {
- /* The allocation was deferred. Do it now. */
- if (the_map == NULL)
- {
- /* Find the link map for this module. */
- size_t idx = GET_ADDR_MODULE;
- struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
-
- while (idx >= listp->len)
- {
- idx -= listp->len;
- listp = listp->next;
- }
-
- the_map = listp->slotinfo[idx].map;
- }
-
- p = dtv[GET_ADDR_MODULE].pointer = allocate_and_init (the_map);
- }
+ p = allocate_and_init (GET_ADDR_MODULE);
return (char *) p + GET_ADDR_OFFSET;
}
[-- Attachment #3: P2 --]
[-- Type: text/plain, Size: 15581 bytes --]
2002-11-10 Jakub Jelinek <jakub@redhat.com>
* sysdeps/generic/dl-tls.c (allocate_and_init): Change argument to
modid. Move part of code from __tls_get_addr here.
(tls_new_generation): New function. Moved from __tls_get_addr.
(__tls_get_addr): Move the slow path to separate functions.
Only define ifndef ARCH_TLS_GET_ADDR.
* sysdeps/i386/dl-tls.h (ARCH_TLS_GET_ADDR): Define.
Include ldsodefs.h and stddef.h.
(__tls_get_addr): Add assembly optimized ___tls_get_addr.
--- libc/sysdeps/generic/dl-tls.c.jj 2002-11-05 23:10:28.000000000 +0100
+++ libc/sysdeps/generic/dl-tls.c 2002-11-09 22:18:38.000000000 +0100
@@ -421,10 +421,22 @@ _dl_tls_symaddr (struct link_map *map, c
static void *
-allocate_and_init (struct link_map *map)
+__attribute_noinline__ __attribute_used__
+allocate_and_init (size_t modid)
{
+ struct link_map *map;
+ size_t idx = modid;
void *newp;
+ dtv_t *dtv = THREAD_DTV ();
+ struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
+ /* Find the link map for this module. */
+ while (idx >= listp->len)
+ {
+ idx -= listp->len;
+ listp = listp->next;
+ }
+ map = listp->slotinfo[idx].map;
newp = __libc_memalign (map->l_tls_align, map->l_tls_blocksize);
if (newp == NULL)
oom ();
@@ -433,194 +445,179 @@ allocate_and_init (struct link_map *map)
memset (__mempcpy (newp, map->l_tls_initimage, map->l_tls_initimage_size),
'\0', map->l_tls_blocksize - map->l_tls_initimage_size);
+ dtv[modid].pointer = newp;
return newp;
}
+static dtv_t *
+__attribute_noinline__ __attribute_used__
+tls_new_generation (dtv_t *dtv, size_t modid)
+{
+ struct dtv_slotinfo_list *listp;
+ size_t idx;
+
+ /* The global dl_tls_dtv_slotinfo array contains for each module
+ index the generation counter current when the entry was
+ created. This array never shrinks so that all module indices
+ which were valid at some time can be used to access it.
+ Before the first use of a new module index in this function
+ the array was extended appropriately. Access also does not
+ have to be guarded against modifications of the array. It is
+ assumed that pointer-size values can be read atomically even
+ in SMP environments. It is possible that other threads at
+ the same time dynamically load code and therefore add to the
+ slotinfo list. This is a problem since we must not pick up
+ any information about incomplete work. The solution to this
+ is to ignore all dtv slots which were created after the one
+ we are currently interested. We know that dynamic loading
+ for this module is completed and this is the last load
+ operation we know finished. */
+ idx = modid;
+ listp = GL(dl_tls_dtv_slotinfo_list);
+ while (idx >= listp->len)
+ {
+ idx -= listp->len;
+ listp = listp->next;
+ }
+ if (dtv[0].counter < listp->slotinfo[idx].gen)
+ {
+ /* The generation counter for the slot is higher than what
+ the current dtv implements. We have to update the whole
+ dtv but only those entries with a generation counter <=
+ the one for the entry we need. */
+ size_t new_gen = listp->slotinfo[idx].gen;
+ size_t total = 0;
+
+ /* We have to look through the entire dtv slotinfo list. */
+ listp = GL(dl_tls_dtv_slotinfo_list);
+ do
+ {
+ size_t cnt;
+
+ for (cnt = total = 0 ? 1 : 0; cnt < listp->len; ++cnt)
+ {
+ size_t gen = listp->slotinfo[cnt].gen;
+ struct link_map *map;
+
+ if (gen > new_gen)
+ /* This is a slot for a generation younger than
+ the one we are handling now. It might be
+ incompletely set up so ignore it. */
+ continue;
+
+ /* If the entry is older than the current dtv layout
+ we know we don't have to handle it. */
+ if (gen <= dtv[0].counter)
+ continue;
+
+ /* If there is no map this means the entry is empty. */
+ map = listp->slotinfo[cnt].map;
+ if (map == NULL)
+ {
+ /* If this modid was used at some point the memory
+ might still be allocated. */
+ if (dtv[total + cnt].pointer != TLS_DTV_UNALLOCATED)
+ {
+ free (dtv[total + cnt].pointer);
+ dtv[total + cnt].pointer = TLS_DTV_UNALLOCATED;
+ }
+
+ continue;
+ }
+
+ /* Check whether the current dtv array is large enough. */
+ modid = map->l_tls_modid;
+ assert (total + cnt == modid);
+ if (dtv[-1].counter < modid)
+ {
+ /* Reallocate the dtv. */
+ dtv_t *newp;
+ size_t newsize = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
+ size_t oldsize = dtv[-1].counter;
+
+ assert (map->l_tls_modid <= newsize);
+
+ if (dtv == GL(dl_initial_dtv))
+ {
+ /* This is the initial dtv that was allocated
+ during rtld startup using the dl-minimal.c
+ malloc instead of the real malloc. We can't
+ free it, we have to abandon the old storage. */
+
+ newp = malloc ((2 + newsize) * sizeof (dtv_t));
+ if (newp == NULL)
+ oom ();
+ memcpy (newp, &dtv[-1], oldsize * sizeof (dtv_t));
+ }
+ else
+ {
+ newp = realloc (&dtv[-1],
+ (2 + newsize) * sizeof (dtv_t));
+ if (newp == NULL)
+ oom ();
+ }
+
+ newp[0].counter = newsize;
+
+ /* Clear the newly allocated part. */
+ memset (newp + 2 + oldsize, '\0',
+ (newsize - oldsize) * sizeof (dtv_t));
+
+ /* Point dtv to the generation counter. */
+ dtv = &newp[1];
+
+ /* Install this new dtv in the thread data
+ structures. */
+ INSTALL_NEW_DTV (dtv);
+ }
+
+ /* If there is currently memory allocate for this
+ dtv entry free it. */
+ /* XXX Ideally we will at some point create a memory
+ pool. */
+ if (dtv[modid].pointer != TLS_DTV_UNALLOCATED)
+ /* Note that free is called for NULL is well. We
+ deallocate even if it is this dtv entry we are
+ supposed to load. The reason is that we call
+ memalign and not malloc. */
+ free (dtv[modid].pointer);
+
+ /* This module is loaded dynamically- We defer
+ memory allocation. */
+ dtv[modid].pointer = TLS_DTV_UNALLOCATED;
+ }
+
+ total += listp->len;
+ }
+ while ((listp = listp->next) != NULL);
+
+ /* This will be the new maximum generation counter. */
+ dtv[0].counter = new_gen;
+ }
+ return dtv;
+}
+
+# ifndef ARCH_TLS_GET_ADDR
/* The generic dynamic and local dynamic model cannot be used in
statically linked applications. */
void *
__tls_get_addr (GET_ADDR_ARGS)
{
dtv_t *dtv = THREAD_DTV ();
- struct link_map *the_map = NULL;
void *p;
if (__builtin_expect (dtv[0].counter != GL(dl_tls_generation), 0))
- {
- struct dtv_slotinfo_list *listp;
- size_t idx;
-
- /* The global dl_tls_dtv_slotinfo array contains for each module
- index the generation counter current when the entry was
- created. This array never shrinks so that all module indices
- which were valid at some time can be used to access it.
- Before the first use of a new module index in this function
- the array was extended appropriately. Access also does not
- have to be guarded against modifications of the array. It is
- assumed that pointer-size values can be read atomically even
- in SMP environments. It is possible that other threads at
- the same time dynamically load code and therefore add to the
- slotinfo list. This is a problem since we must not pick up
- any information about incomplete work. The solution to this
- is to ignore all dtv slots which were created after the one
- we are currently interested. We know that dynamic loading
- for this module is completed and this is the last load
- operation we know finished. */
- idx = GET_ADDR_MODULE;
- listp = GL(dl_tls_dtv_slotinfo_list);
- while (idx >= listp->len)
- {
- idx -= listp->len;
- listp = listp->next;
- }
-
- if (dtv[0].counter < listp->slotinfo[idx].gen)
- {
- /* The generation counter for the slot is higher than what
- the current dtv implements. We have to update the whole
- dtv but only those entries with a generation counter <=
- the one for the entry we need. */
- size_t new_gen = listp->slotinfo[idx].gen;
- size_t total = 0;
-
- /* We have to look through the entire dtv slotinfo list. */
- listp = GL(dl_tls_dtv_slotinfo_list);
- do
- {
- size_t cnt;
-
- for (cnt = total = 0 ? 1 : 0; cnt < listp->len; ++cnt)
- {
- size_t gen = listp->slotinfo[cnt].gen;
- struct link_map *map;
- size_t modid;
-
- if (gen > new_gen)
- /* This is a slot for a generation younger than
- the one we are handling now. It might be
- incompletely set up so ignore it. */
- continue;
-
- /* If the entry is older than the current dtv layout
- we know we don't have to handle it. */
- if (gen <= dtv[0].counter)
- continue;
-
- /* If there is no map this means the entry is empty. */
- map = listp->slotinfo[cnt].map;
- if (map == NULL)
- {
- /* If this modid was used at some point the memory
- might still be allocated. */
- if (dtv[total + cnt].pointer != TLS_DTV_UNALLOCATED)
- {
- free (dtv[total + cnt].pointer);
- dtv[total + cnt].pointer = TLS_DTV_UNALLOCATED;
- }
-
- continue;
- }
-
- /* Check whether the current dtv array is large enough. */
- modid = map->l_tls_modid;
- assert (total + cnt == modid);
- if (dtv[-1].counter < modid)
- {
- /* Reallocate the dtv. */
- dtv_t *newp;
- size_t newsize = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
- size_t oldsize = dtv[-1].counter;
-
- assert (map->l_tls_modid <= newsize);
-
- if (dtv == GL(dl_initial_dtv))
- {
- /* This is the initial dtv that was allocated
- during rtld startup using the dl-minimal.c
- malloc instead of the real malloc. We can't
- free it, we have to abandon the old storage. */
-
- newp = malloc ((2 + newsize) * sizeof (dtv_t));
- if (newp == NULL)
- oom ();
- memcpy (newp, &dtv[-1], oldsize * sizeof (dtv_t));
- }
- else
- {
- newp = realloc (&dtv[-1],
- (2 + newsize) * sizeof (dtv_t));
- if (newp == NULL)
- oom ();
- }
-
- newp[0].counter = newsize;
-
- /* Clear the newly allocated part. */
- memset (newp + 2 + oldsize, '\0',
- (newsize - oldsize) * sizeof (dtv_t));
-
- /* Point dtv to the generation counter. */
- dtv = &newp[1];
-
- /* Install this new dtv in the thread data
- structures. */
- INSTALL_NEW_DTV (dtv);
- }
-
- /* If there is currently memory allocate for this
- dtv entry free it. */
- /* XXX Ideally we will at some point create a memory
- pool. */
- if (dtv[modid].pointer != TLS_DTV_UNALLOCATED)
- /* Note that free is called for NULL is well. We
- deallocate even if it is this dtv entry we are
- supposed to load. The reason is that we call
- memalign and not malloc. */
- free (dtv[modid].pointer);
-
- /* This module is loaded dynamically- We defer
- memory allocation. */
- dtv[modid].pointer = TLS_DTV_UNALLOCATED;
-
- if (modid == GET_ADDR_MODULE)
- the_map = map;
- }
-
- total += listp->len;
- }
- while ((listp = listp->next) != NULL);
-
- /* This will be the new maximum generation counter. */
- dtv[0].counter = new_gen;
- }
- }
+ dtv = tls_new_generation (dtv, GET_ADDR_MODULE);
p = dtv[GET_ADDR_MODULE].pointer;
if (__builtin_expect (p == TLS_DTV_UNALLOCATED, 0))
- {
- /* The allocation was deferred. Do it now. */
- if (the_map == NULL)
- {
- /* Find the link map for this module. */
- size_t idx = GET_ADDR_MODULE;
- struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
-
- while (idx >= listp->len)
- {
- idx -= listp->len;
- listp = listp->next;
- }
-
- the_map = listp->slotinfo[idx].map;
- }
-
- p = dtv[GET_ADDR_MODULE].pointer = allocate_and_init (the_map);
- }
+ p = allocate_and_init (GET_ADDR_MODULE);
return (char *) p + GET_ADDR_OFFSET;
}
+# endif
# endif
#endif /* use TLS */
--- libc/sysdeps/i386/dl-tls.h.jj 2002-10-11 16:24:51.000000000 +0200
+++ libc/sysdeps/i386/dl-tls.h 2002-11-09 23:14:28.000000000 +0100
@@ -32,6 +32,9 @@ extern void *___tls_get_addr (tls_index
__attribute__ ((__regparm__ (1)));
extern void *___tls_get_addr_internal (tls_index *ti)
__attribute__ ((__regparm__ (1))) attribute_hidden;
+# define ARCH_TLS_GET_ADDR 1
+# include <stddef.h>
+# include <ldsodefs.h>
/* The special thing about the x86 TLS ABI is that we have two
variants of the __tls_get_addr function with different calling
@@ -42,6 +45,55 @@ extern void *___tls_get_addr_internal (t
void *
__tls_get_addr (tls_index *ti)
{
+# ifdef ARCH_TLS_GET_ADDR
+ asm volatile (
+" .section .gnu.linkonce.t.__i686.get_pc_thunk.cx,\"ax\",@progbits\n"
+" .globl __i686.get_pc_thunk.cx \n"
+" .hidden __i686.get_pc_thunk.cx \n"
+" .type __i686.get_pc_thunk.cx,@function \n"
+"__i686.get_pc_thunk.cx: \n"
+" movl (%%esp), %%ecx \n"
+" ret \n"
+" .previous \n"
+" \n"
+" .subsection 1 \n"
+" .globl ___tls_get_addr, ___tls_get_addr_internal \n"
+" .hidden ___tls_get_addr_internal \n"
+" .type ___tls_get_addr,@function \n"
+"___tls_get_addr: \n"
+"___tls_get_addr_internal: \n"
+" call __i686.get_pc_thunk.cx \n"
+" addl $_GLOBAL_OFFSET_TABLE_, %%ecx \n"
+" movl %%gs:0x4, %%edx \n"
+" movl %P0+_rtld_local@GOTOFF(%%ecx), %%ecx \n"
+" cmpl %%ecx, (%%edx) \n"
+" jne 4f \n"
+"1: movl (%%eax), %%ecx \n"
+" movl (%%edx,%%ecx,4), %%edx \n"
+" cmpl $-1, %%edx \n"
+" je 3f \n"
+"2: movl 0x4(%%eax), %%eax \n"
+" addl %%edx, %%eax \n"
+" ret \n"
+"3: pushl %%eax \n"
+" pushl %%ecx \n"
+" call allocate_and_init \n"
+" movl %%eax, %%edx \n"
+" popl %%ecx \n"
+" popl %%eax \n"
+" jmp 2b \n"
+"4: pushl %%eax \n"
+" pushl (%%eax) \n"
+" pushl %%edx \n"
+" call tls_new_generation \n"
+" popl %%edx \n"
+" popl %%ecx \n"
+" movl %%eax, %%edx \n"
+" popl %%eax \n"
+" jmp 1b \n"
+" .previous \n"
+ : : "i" (offsetof (struct rtld_global, _dl_tls_generation)));
+# endif
return ___tls_get_addr_internal (ti);
}
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] Speed up ___tls_get_addr
2002-11-10 23:37 [PATCH] Speed up ___tls_get_addr Jakub Jelinek
@ 2002-11-11 0:48 ` Roland McGrath
0 siblings, 0 replies; 2+ messages in thread
From: Roland McGrath @ 2002-11-11 0:48 UTC (permalink / raw)
To: Jakub Jelinek; +Cc: Glibc hackers
I think it is fine to put this in now, the dl-tls.c change that is. (The
asm I would rather hold off on for now.) But can you try internal_function
(i.e. regparm) on the new functions and see if that helps more? Also the
function definitions should have comments mentioning why they should not be
inlined, etc.
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2002-11-11 8:48 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-11-10 23:37 [PATCH] Speed up ___tls_get_addr Jakub Jelinek
2002-11-11 0:48 ` Roland McGrath
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).