public inbox for libc-hacker@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] Speed up ___tls_get_addr
@ 2002-11-10 23:37 Jakub Jelinek
  2002-11-11  0:48 ` Roland McGrath
  0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2002-11-10 23:37 UTC (permalink / raw)
  To: Glibc hackers

[-- Attachment #1: Type: text/plain, Size: 78 bytes --]

Hi!

Sending here just to get it archived.
Lets get back to it later.

	Jakub

[-- Attachment #2: P1 --]
[-- Type: text/plain, Size: 13086 bytes --]

2002-11-10  Jakub Jelinek  <jakub@redhat.com>

	* sysdeps/generic/dl-tls.c (allocate_and_init): Change argument to
	modid.  Move part of code from __tls_get_addr here.
	(tls_new_generation): New function.  Moved from __tls_get_addr.
	(__tls_get_addr): Move the slow path to separate functions.

--- libc/sysdeps/generic/dl-tls.c.jj	2002-11-05 23:10:28.000000000 +0100
+++ libc/sysdeps/generic/dl-tls.c	2002-11-09 22:17:31.000000000 +0100
@@ -421,10 +421,22 @@ _dl_tls_symaddr (struct link_map *map, c
 
 
 static void *
-allocate_and_init (struct link_map *map)
+__attribute_noinline__ __attribute_used__
+allocate_and_init (size_t modid)
 {
+  struct link_map *map;
+  size_t idx = modid;
   void *newp;
+  dtv_t *dtv = THREAD_DTV ();
+  struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
 
+  /* Find the link map for this module.  */
+  while (idx >= listp->len)
+    {
+      idx -= listp->len;
+      listp = listp->next;
+    }
+  map = listp->slotinfo[idx].map;
   newp = __libc_memalign (map->l_tls_align, map->l_tls_blocksize);
   if (newp == NULL)
     oom ();
@@ -433,9 +445,158 @@ allocate_and_init (struct link_map *map)
   memset (__mempcpy (newp, map->l_tls_initimage, map->l_tls_initimage_size),
 	  '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
 
+  dtv[modid].pointer = newp;
   return newp;
 }
 
+static dtv_t *
+__attribute_noinline__ __attribute_used__
+tls_new_generation (dtv_t *dtv, size_t modid)
+{
+  struct dtv_slotinfo_list *listp;
+  size_t idx;
+
+  /* The global dl_tls_dtv_slotinfo array contains for each module
+     index the generation counter current when the entry was
+     created.  This array never shrinks so that all module indices
+     which were valid at some time can be used to access it.
+     Before the first use of a new module index in this function
+     the array was extended appropriately.  Access also does not
+     have to be guarded against modifications of the array.  It is
+     assumed that pointer-size values can be read atomically even
+     in SMP environments.  It is possible that other threads at
+     the same time dynamically load code and therefore add to the
+     slotinfo list.  This is a problem since we must not pick up
+     any information about incomplete work.  The solution to this
+     is to ignore all dtv slots which were created after the one
+     we are currently interested.  We know that dynamic loading
+     for this module is completed and this is the last load
+     operation we know finished.  */
+  idx = modid;
+  listp = GL(dl_tls_dtv_slotinfo_list);
+  while (idx >= listp->len)
+    {
+      idx -= listp->len;
+      listp = listp->next;
+    }
+
+  if (dtv[0].counter < listp->slotinfo[idx].gen)
+    {
+      /* The generation counter for the slot is higher than what
+         the current dtv implements.  We have to update the whole
+         dtv but only those entries with a generation counter <=
+         the one for the entry we need.  */
+      size_t new_gen = listp->slotinfo[idx].gen;
+      size_t total = 0;
+
+      /* We have to look through the entire dtv slotinfo list.  */
+      listp =  GL(dl_tls_dtv_slotinfo_list);
+      do
+        {
+          size_t cnt;
+
+          for (cnt = total = 0 ? 1 : 0; cnt < listp->len; ++cnt)
+            {
+              size_t gen = listp->slotinfo[cnt].gen;
+              struct link_map *map;
+
+              if (gen > new_gen)
+                /* This is a slot for a generation younger than
+                   the one we are handling now.  It might be
+                   incompletely set up so ignore it.  */
+                continue;
+
+              /* If the entry is older than the current dtv layout
+                 we know we don't have to handle it.  */
+              if (gen <= dtv[0].counter)
+                continue;
+
+              /* If there is no map this means the entry is empty.  */
+              map = listp->slotinfo[cnt].map;
+              if (map == NULL)
+                {
+                  /* If this modid was used at some point the memory
+                     might still be allocated.  */
+                  if (dtv[total + cnt].pointer != TLS_DTV_UNALLOCATED)
+                    {
+                      free (dtv[total + cnt].pointer);
+                      dtv[total + cnt].pointer = TLS_DTV_UNALLOCATED;
+                    }
+
+                  continue;
+                }
+
+              /* Check whether the current dtv array is large enough.  */
+              modid = map->l_tls_modid;
+              assert (total + cnt == modid);
+              if (dtv[-1].counter < modid)
+                {
+                  /* Reallocate the dtv.  */
+                  dtv_t *newp;
+                  size_t newsize = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
+                  size_t oldsize = dtv[-1].counter;
+
+                  assert (map->l_tls_modid <= newsize);
+
+                  if (dtv == GL(dl_initial_dtv))
+                    {
+                      /* This is the initial dtv that was allocated
+                         during rtld startup using the dl-minimal.c
+                         malloc instead of the real malloc.  We can't
+                         free it, we have to abandon the old storage.  */
+
+                      newp = malloc ((2 + newsize) * sizeof (dtv_t));
+                      if (newp == NULL)
+                        oom ();
+                      memcpy (newp, &dtv[-1], oldsize * sizeof (dtv_t));
+                    }
+                  else
+                    {
+                      newp = realloc (&dtv[-1],
+                                      (2 + newsize) * sizeof (dtv_t));
+                      if (newp == NULL)
+                        oom ();
+                    }
+
+                  newp[0].counter = newsize;
+
+                  /* Clear the newly allocated part.  */
+                  memset (newp + 2 + oldsize, '\0',
+                          (newsize - oldsize) * sizeof (dtv_t));
+
+                  /* Point dtv to the generation counter.  */
+                  dtv = &newp[1];
+
+                  /* Install this new dtv in the thread data
+                     structures.  */
+                  INSTALL_NEW_DTV (dtv);
+                }
+
+              /* If there is currently memory allocate for this
+                 dtv entry free it.  */
+              /* XXX Ideally we will at some point create a memory
+                 pool.  */
+              if (dtv[modid].pointer != TLS_DTV_UNALLOCATED)
+                /* Note that free is called for NULL is well.  We
+                   deallocate even if it is this dtv entry we are
+                   supposed to load.  The reason is that we call
+                   memalign and not malloc.  */
+                free (dtv[modid].pointer);
+
+              /* This module is loaded dynamically- We defer
+                 memory allocation.  */
+              dtv[modid].pointer = TLS_DTV_UNALLOCATED;
+            }
+
+          total += listp->len;
+        }
+      while ((listp = listp->next) != NULL);
+
+      /* This will be the new maximum generation counter.  */
+      dtv[0].counter = new_gen;
+    }
+  return dtv;
+}
 
 /* The generic dynamic and local dynamic model cannot be used in
    statically linked applications.  */
@@ -443,181 +604,15 @@ void *
 __tls_get_addr (GET_ADDR_ARGS)
 {
   dtv_t *dtv = THREAD_DTV ();
-  struct link_map *the_map = NULL;
   void *p;
 
   if (__builtin_expect (dtv[0].counter != GL(dl_tls_generation), 0))
-    {
-      struct dtv_slotinfo_list *listp;
-      size_t idx;
-
-      /* The global dl_tls_dtv_slotinfo array contains for each module
-	 index the generation counter current when the entry was
-	 created.  This array never shrinks so that all module indices
-	 which were valid at some time can be used to access it.
-	 Before the first use of a new module index in this function
-	 the array was extended appropriately.  Access also does not
-	 have to be guarded against modifications of the array.  It is
-	 assumed that pointer-size values can be read atomically even
-	 in SMP environments.  It is possible that other threads at
-	 the same time dynamically load code and therefore add to the
-	 slotinfo list.  This is a problem since we must not pick up
-	 any information about incomplete work.  The solution to this
-	 is to ignore all dtv slots which were created after the one
-	 we are currently interested.  We know that dynamic loading
-	 for this module is completed and this is the last load
-	 operation we know finished.  */
-      idx = GET_ADDR_MODULE;
-      listp = GL(dl_tls_dtv_slotinfo_list);
-      while (idx >= listp->len)
-	{
-	  idx -= listp->len;
-	  listp = listp->next;
-	}
-
-      if (dtv[0].counter < listp->slotinfo[idx].gen)
-	{
-	  /* The generation counter for the slot is higher than what
-	     the current dtv implements.  We have to update the whole
-	     dtv but only those entries with a generation counter <=
-	     the one for the entry we need.  */
-	  size_t new_gen = listp->slotinfo[idx].gen;
-	  size_t total = 0;
-
-	  /* We have to look through the entire dtv slotinfo list.  */
-	  listp =  GL(dl_tls_dtv_slotinfo_list);
-	  do
-	    {
-	      size_t cnt;
-
-	      for (cnt = total = 0 ? 1 : 0; cnt < listp->len; ++cnt)
-		{
-		  size_t gen = listp->slotinfo[cnt].gen;
-		  struct link_map *map;
-		  size_t modid;
-
-		  if (gen > new_gen)
-		    /* This is a slot for a generation younger than
-		       the one we are handling now.  It might be
-		       incompletely set up so ignore it.  */
-		    continue;
-
-		  /* If the entry is older than the current dtv layout
-		     we know we don't have to handle it.  */
-		  if (gen <= dtv[0].counter)
-		    continue;
-
-		  /* If there is no map this means the entry is empty.  */
-		  map = listp->slotinfo[cnt].map;
-		  if (map == NULL)
-		    {
-		      /* If this modid was used at some point the memory
-			 might still be allocated.  */
-		      if (dtv[total + cnt].pointer != TLS_DTV_UNALLOCATED)
-			{
-			  free (dtv[total + cnt].pointer);
-			  dtv[total + cnt].pointer = TLS_DTV_UNALLOCATED;
-			}
-
-		      continue;
-		    }
-
-		  /* Check whether the current dtv array is large enough.  */
-		  modid = map->l_tls_modid;
-		  assert (total + cnt == modid);
-		  if (dtv[-1].counter < modid)
-		    {
-		      /* Reallocate the dtv.  */
-		      dtv_t *newp;
-		      size_t newsize = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
-		      size_t oldsize = dtv[-1].counter;
-
-		      assert (map->l_tls_modid <= newsize);
-
-		      if (dtv == GL(dl_initial_dtv))
-			{
-			  /* This is the initial dtv that was allocated
-			     during rtld startup using the dl-minimal.c
-			     malloc instead of the real malloc.  We can't
-			     free it, we have to abandon the old storage.  */
-
-			  newp = malloc ((2 + newsize) * sizeof (dtv_t));
-			  if (newp == NULL)
-			    oom ();
-			  memcpy (newp, &dtv[-1], oldsize * sizeof (dtv_t));
-			}
-		      else
-			{
-			  newp = realloc (&dtv[-1],
-					  (2 + newsize) * sizeof (dtv_t));
-			  if (newp == NULL)
-			    oom ();
-			}
-
-		      newp[0].counter = newsize;
-
-		      /* Clear the newly allocated part.  */
-		      memset (newp + 2 + oldsize, '\0',
-			      (newsize - oldsize) * sizeof (dtv_t));
-
-		      /* Point dtv to the generation counter.  */
-		      dtv = &newp[1];
-
-		      /* Install this new dtv in the thread data
-			 structures.  */
-		      INSTALL_NEW_DTV (dtv);
-		    }
-
-		  /* If there is currently memory allocate for this
-		     dtv entry free it.  */
-		  /* XXX Ideally we will at some point create a memory
-		     pool.  */
-		  if (dtv[modid].pointer != TLS_DTV_UNALLOCATED)
-		    /* Note that free is called for NULL is well.  We
-		       deallocate even if it is this dtv entry we are
-		       supposed to load.  The reason is that we call
-		       memalign and not malloc.  */
-		    free (dtv[modid].pointer);
-
-		  /* This module is loaded dynamically- We defer
-		     memory allocation.  */
-		  dtv[modid].pointer = TLS_DTV_UNALLOCATED;
-
-		  if (modid == GET_ADDR_MODULE)
-		    the_map = map;
-		}
-
-	      total += listp->len;
-	    }
-	  while ((listp = listp->next) != NULL);
-
-	  /* This will be the new maximum generation counter.  */
-	  dtv[0].counter = new_gen;
-	}
-    }
+    dtv = tls_new_generation (dtv, GET_ADDR_MODULE);
 
   p = dtv[GET_ADDR_MODULE].pointer;
 
   if (__builtin_expect (p == TLS_DTV_UNALLOCATED, 0))
-    {
-      /* The allocation was deferred.  Do it now.  */
-      if (the_map == NULL)
-	{
-	  /* Find the link map for this module.  */
-	  size_t idx = GET_ADDR_MODULE;
-	  struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
-
-	  while (idx >= listp->len)
-	    {
-	      idx -= listp->len;
-	      listp = listp->next;
-	    }
-
-	  the_map = listp->slotinfo[idx].map;
-	}
-
-      p = dtv[GET_ADDR_MODULE].pointer = allocate_and_init (the_map);
-    }
+    p = allocate_and_init (GET_ADDR_MODULE);
 
   return (char *) p + GET_ADDR_OFFSET;
 }

[-- Attachment #3: P2 --]
[-- Type: text/plain, Size: 15581 bytes --]

2002-11-10  Jakub Jelinek  <jakub@redhat.com>

	* sysdeps/generic/dl-tls.c (allocate_and_init): Change argument to
	modid.  Move part of code from __tls_get_addr here.
	(tls_new_generation): New function.  Moved from __tls_get_addr.
	(__tls_get_addr): Move the slow path to separate functions.
	Only define ifndef ARCH_TLS_GET_ADDR.
	* sysdeps/i386/dl-tls.h (ARCH_TLS_GET_ADDR): Define.
	Include ldsodefs.h and stddef.h.
	(__tls_get_addr): Add assembly optimized ___tls_get_addr.

--- libc/sysdeps/generic/dl-tls.c.jj	2002-11-05 23:10:28.000000000 +0100
+++ libc/sysdeps/generic/dl-tls.c	2002-11-09 22:18:38.000000000 +0100
@@ -421,10 +421,22 @@ _dl_tls_symaddr (struct link_map *map, c
 
 
 static void *
-allocate_and_init (struct link_map *map)
+__attribute_noinline__ __attribute_used__
+allocate_and_init (size_t modid)
 {
+  struct link_map *map;
+  size_t idx = modid;
   void *newp;
+  dtv_t *dtv = THREAD_DTV ();
+  struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
 
+  /* Find the link map for this module.  */
+  while (idx >= listp->len)
+    {
+      idx -= listp->len;
+      listp = listp->next;
+    }
+  map = listp->slotinfo[idx].map;
   newp = __libc_memalign (map->l_tls_align, map->l_tls_blocksize);
   if (newp == NULL)
     oom ();
@@ -433,194 +445,179 @@ allocate_and_init (struct link_map *map)
   memset (__mempcpy (newp, map->l_tls_initimage, map->l_tls_initimage_size),
 	  '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
 
+  dtv[modid].pointer = newp;
   return newp;
 }
 
+static dtv_t *
+__attribute_noinline__ __attribute_used__
+tls_new_generation (dtv_t *dtv, size_t modid)
+{
+  struct dtv_slotinfo_list *listp;
+  size_t idx;
+
+  /* The global dl_tls_dtv_slotinfo array contains for each module
+     index the generation counter current when the entry was
+     created.  This array never shrinks so that all module indices
+     which were valid at some time can be used to access it.
+     Before the first use of a new module index in this function
+     the array was extended appropriately.  Access also does not
+     have to be guarded against modifications of the array.  It is
+     assumed that pointer-size values can be read atomically even
+     in SMP environments.  It is possible that other threads at
+     the same time dynamically load code and therefore add to the
+     slotinfo list.  This is a problem since we must not pick up
+     any information about incomplete work.  The solution to this
+     is to ignore all dtv slots which were created after the one
+     we are currently interested.  We know that dynamic loading
+     for this module is completed and this is the last load
+     operation we know finished.  */
+  idx = modid;
+  listp = GL(dl_tls_dtv_slotinfo_list);
+  while (idx >= listp->len)
+    {
+      idx -= listp->len;
+      listp = listp->next;
+    }
 
+  if (dtv[0].counter < listp->slotinfo[idx].gen)
+    {
+      /* The generation counter for the slot is higher than what
+         the current dtv implements.  We have to update the whole
+         dtv but only those entries with a generation counter <=
+         the one for the entry we need.  */
+      size_t new_gen = listp->slotinfo[idx].gen;
+      size_t total = 0;
+
+      /* We have to look through the entire dtv slotinfo list.  */
+      listp =  GL(dl_tls_dtv_slotinfo_list);
+      do
+        {
+          size_t cnt;
+
+          for (cnt = total = 0 ? 1 : 0; cnt < listp->len; ++cnt)
+            {
+              size_t gen = listp->slotinfo[cnt].gen;
+              struct link_map *map;
+
+              if (gen > new_gen)
+                /* This is a slot for a generation younger than
+                   the one we are handling now.  It might be
+                   incompletely set up so ignore it.  */
+                continue;
+
+              /* If the entry is older than the current dtv layout
+                 we know we don't have to handle it.  */
+              if (gen <= dtv[0].counter)
+                continue;
+
+              /* If there is no map this means the entry is empty.  */
+              map = listp->slotinfo[cnt].map;
+              if (map == NULL)
+                {
+                  /* If this modid was used at some point the memory
+                     might still be allocated.  */
+                  if (dtv[total + cnt].pointer != TLS_DTV_UNALLOCATED)
+                    {
+                      free (dtv[total + cnt].pointer);
+                      dtv[total + cnt].pointer = TLS_DTV_UNALLOCATED;
+                    }
+
+                  continue;
+                }
+
+              /* Check whether the current dtv array is large enough.  */
+              modid = map->l_tls_modid;
+              assert (total + cnt == modid);
+              if (dtv[-1].counter < modid)
+                {
+                  /* Reallocate the dtv.  */
+                  dtv_t *newp;
+                  size_t newsize = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
+                  size_t oldsize = dtv[-1].counter;
+
+                  assert (map->l_tls_modid <= newsize);
+
+                  if (dtv == GL(dl_initial_dtv))
+                    {
+                      /* This is the initial dtv that was allocated
+                         during rtld startup using the dl-minimal.c
+                         malloc instead of the real malloc.  We can't
+                         free it, we have to abandon the old storage.  */
+
+                      newp = malloc ((2 + newsize) * sizeof (dtv_t));
+                      if (newp == NULL)
+                        oom ();
+                      memcpy (newp, &dtv[-1], oldsize * sizeof (dtv_t));
+                    }
+                  else
+                    {
+                      newp = realloc (&dtv[-1],
+                                      (2 + newsize) * sizeof (dtv_t));
+                      if (newp == NULL)
+                        oom ();
+                    }
+
+                  newp[0].counter = newsize;
+
+                  /* Clear the newly allocated part.  */
+                  memset (newp + 2 + oldsize, '\0',
+                          (newsize - oldsize) * sizeof (dtv_t));
+
+                  /* Point dtv to the generation counter.  */
+                  dtv = &newp[1];
+
+                  /* Install this new dtv in the thread data
+                     structures.  */
+                  INSTALL_NEW_DTV (dtv);
+                }
+
+              /* If there is currently memory allocate for this
+                 dtv entry free it.  */
+              /* XXX Ideally we will at some point create a memory
+                 pool.  */
+              if (dtv[modid].pointer != TLS_DTV_UNALLOCATED)
+                /* Note that free is called for NULL is well.  We
+                   deallocate even if it is this dtv entry we are
+                   supposed to load.  The reason is that we call
+                   memalign and not malloc.  */
+                free (dtv[modid].pointer);
+
+              /* This module is loaded dynamically- We defer
+                 memory allocation.  */
+              dtv[modid].pointer = TLS_DTV_UNALLOCATED;
+            }
+
+          total += listp->len;
+        }
+      while ((listp = listp->next) != NULL);
+
+      /* This will be the new maximum generation counter.  */
+      dtv[0].counter = new_gen;
+    }
+  return dtv;
+}
+
+#  ifndef ARCH_TLS_GET_ADDR
 /* The generic dynamic and local dynamic model cannot be used in
    statically linked applications.  */
 void *
 __tls_get_addr (GET_ADDR_ARGS)
 {
   dtv_t *dtv = THREAD_DTV ();
-  struct link_map *the_map = NULL;
   void *p;
 
   if (__builtin_expect (dtv[0].counter != GL(dl_tls_generation), 0))
-    {
-      struct dtv_slotinfo_list *listp;
-      size_t idx;
-
-      /* The global dl_tls_dtv_slotinfo array contains for each module
-	 index the generation counter current when the entry was
-	 created.  This array never shrinks so that all module indices
-	 which were valid at some time can be used to access it.
-	 Before the first use of a new module index in this function
-	 the array was extended appropriately.  Access also does not
-	 have to be guarded against modifications of the array.  It is
-	 assumed that pointer-size values can be read atomically even
-	 in SMP environments.  It is possible that other threads at
-	 the same time dynamically load code and therefore add to the
-	 slotinfo list.  This is a problem since we must not pick up
-	 any information about incomplete work.  The solution to this
-	 is to ignore all dtv slots which were created after the one
-	 we are currently interested.  We know that dynamic loading
-	 for this module is completed and this is the last load
-	 operation we know finished.  */
-      idx = GET_ADDR_MODULE;
-      listp = GL(dl_tls_dtv_slotinfo_list);
-      while (idx >= listp->len)
-	{
-	  idx -= listp->len;
-	  listp = listp->next;
-	}
-
-      if (dtv[0].counter < listp->slotinfo[idx].gen)
-	{
-	  /* The generation counter for the slot is higher than what
-	     the current dtv implements.  We have to update the whole
-	     dtv but only those entries with a generation counter <=
-	     the one for the entry we need.  */
-	  size_t new_gen = listp->slotinfo[idx].gen;
-	  size_t total = 0;
-
-	  /* We have to look through the entire dtv slotinfo list.  */
-	  listp =  GL(dl_tls_dtv_slotinfo_list);
-	  do
-	    {
-	      size_t cnt;
-
-	      for (cnt = total = 0 ? 1 : 0; cnt < listp->len; ++cnt)
-		{
-		  size_t gen = listp->slotinfo[cnt].gen;
-		  struct link_map *map;
-		  size_t modid;
-
-		  if (gen > new_gen)
-		    /* This is a slot for a generation younger than
-		       the one we are handling now.  It might be
-		       incompletely set up so ignore it.  */
-		    continue;
-
-		  /* If the entry is older than the current dtv layout
-		     we know we don't have to handle it.  */
-		  if (gen <= dtv[0].counter)
-		    continue;
-
-		  /* If there is no map this means the entry is empty.  */
-		  map = listp->slotinfo[cnt].map;
-		  if (map == NULL)
-		    {
-		      /* If this modid was used at some point the memory
-			 might still be allocated.  */
-		      if (dtv[total + cnt].pointer != TLS_DTV_UNALLOCATED)
-			{
-			  free (dtv[total + cnt].pointer);
-			  dtv[total + cnt].pointer = TLS_DTV_UNALLOCATED;
-			}
-
-		      continue;
-		    }
-
-		  /* Check whether the current dtv array is large enough.  */
-		  modid = map->l_tls_modid;
-		  assert (total + cnt == modid);
-		  if (dtv[-1].counter < modid)
-		    {
-		      /* Reallocate the dtv.  */
-		      dtv_t *newp;
-		      size_t newsize = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
-		      size_t oldsize = dtv[-1].counter;
-
-		      assert (map->l_tls_modid <= newsize);
-
-		      if (dtv == GL(dl_initial_dtv))
-			{
-			  /* This is the initial dtv that was allocated
-			     during rtld startup using the dl-minimal.c
-			     malloc instead of the real malloc.  We can't
-			     free it, we have to abandon the old storage.  */
-
-			  newp = malloc ((2 + newsize) * sizeof (dtv_t));
-			  if (newp == NULL)
-			    oom ();
-			  memcpy (newp, &dtv[-1], oldsize * sizeof (dtv_t));
-			}
-		      else
-			{
-			  newp = realloc (&dtv[-1],
-					  (2 + newsize) * sizeof (dtv_t));
-			  if (newp == NULL)
-			    oom ();
-			}
-
-		      newp[0].counter = newsize;
-
-		      /* Clear the newly allocated part.  */
-		      memset (newp + 2 + oldsize, '\0',
-			      (newsize - oldsize) * sizeof (dtv_t));
-
-		      /* Point dtv to the generation counter.  */
-		      dtv = &newp[1];
-
-		      /* Install this new dtv in the thread data
-			 structures.  */
-		      INSTALL_NEW_DTV (dtv);
-		    }
-
-		  /* If there is currently memory allocate for this
-		     dtv entry free it.  */
-		  /* XXX Ideally we will at some point create a memory
-		     pool.  */
-		  if (dtv[modid].pointer != TLS_DTV_UNALLOCATED)
-		    /* Note that free is called for NULL is well.  We
-		       deallocate even if it is this dtv entry we are
-		       supposed to load.  The reason is that we call
-		       memalign and not malloc.  */
-		    free (dtv[modid].pointer);
-
-		  /* This module is loaded dynamically- We defer
-		     memory allocation.  */
-		  dtv[modid].pointer = TLS_DTV_UNALLOCATED;
-
-		  if (modid == GET_ADDR_MODULE)
-		    the_map = map;
-		}
-
-	      total += listp->len;
-	    }
-	  while ((listp = listp->next) != NULL);
-
-	  /* This will be the new maximum generation counter.  */
-	  dtv[0].counter = new_gen;
-	}
-    }
+    dtv = tls_new_generation (dtv, GET_ADDR_MODULE);
 
   p = dtv[GET_ADDR_MODULE].pointer;
 
   if (__builtin_expect (p == TLS_DTV_UNALLOCATED, 0))
-    {
-      /* The allocation was deferred.  Do it now.  */
-      if (the_map == NULL)
-	{
-	  /* Find the link map for this module.  */
-	  size_t idx = GET_ADDR_MODULE;
-	  struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
-
-	  while (idx >= listp->len)
-	    {
-	      idx -= listp->len;
-	      listp = listp->next;
-	    }
-
-	  the_map = listp->slotinfo[idx].map;
-	}
-
-      p = dtv[GET_ADDR_MODULE].pointer = allocate_and_init (the_map);
-    }
+    p = allocate_and_init (GET_ADDR_MODULE);
 
   return (char *) p + GET_ADDR_OFFSET;
 }
+#  endif
 # endif
 
 #endif	/* use TLS */
--- libc/sysdeps/i386/dl-tls.h.jj	2002-10-11 16:24:51.000000000 +0200
+++ libc/sysdeps/i386/dl-tls.h	2002-11-09 23:14:28.000000000 +0100
@@ -32,6 +32,9 @@ extern void *___tls_get_addr (tls_index 
      __attribute__ ((__regparm__ (1)));
 extern void *___tls_get_addr_internal (tls_index *ti)
      __attribute__ ((__regparm__ (1))) attribute_hidden;
+# define ARCH_TLS_GET_ADDR 1
+# include <stddef.h>
+# include <ldsodefs.h>
 
 /* The special thing about the x86 TLS ABI is that we have two
    variants of the __tls_get_addr function with different calling
@@ -42,6 +45,55 @@ extern void *___tls_get_addr_internal (t
 void *
 __tls_get_addr (tls_index *ti)
 {
+# ifdef ARCH_TLS_GET_ADDR
+  asm volatile (
+"	.section .gnu.linkonce.t.__i686.get_pc_thunk.cx,\"ax\",@progbits\n"
+"	.globl	__i686.get_pc_thunk.cx					\n"
+"	.hidden	__i686.get_pc_thunk.cx					\n"
+"	.type	__i686.get_pc_thunk.cx,@function			\n"
+"__i686.get_pc_thunk.cx:						\n"
+"	movl	(%%esp), %%ecx						\n"
+"	ret								\n"
+"	.previous							\n"
+"									\n"
+"	.subsection	1						\n"
+"	.globl	___tls_get_addr, ___tls_get_addr_internal		\n"
+"	.hidden	___tls_get_addr_internal				\n"
+"	.type	___tls_get_addr,@function				\n"
+"___tls_get_addr:							\n"
+"___tls_get_addr_internal:						\n"
+"	call	__i686.get_pc_thunk.cx					\n"
+"	addl	$_GLOBAL_OFFSET_TABLE_, %%ecx				\n"
+"	movl	%%gs:0x4, %%edx						\n"
+"	movl	%P0+_rtld_local@GOTOFF(%%ecx), %%ecx			\n"
+"	cmpl	%%ecx, (%%edx)						\n"
+"	jne	4f							\n"
+"1:	movl	(%%eax), %%ecx						\n"
+"	movl	(%%edx,%%ecx,4), %%edx					\n"
+"	cmpl    $-1, %%edx						\n"
+"	je	3f							\n"
+"2:	movl	0x4(%%eax), %%eax					\n"
+"	addl	%%edx, %%eax						\n"
+"	ret								\n"
+"3:	pushl	%%eax							\n"
+"	pushl	%%ecx							\n"
+"	call	allocate_and_init					\n"
+"	movl	%%eax,  %%edx						\n"
+"	popl	%%ecx							\n"
+"	popl	%%eax							\n"
+"	jmp	2b							\n"
+"4:	pushl	%%eax							\n"
+"	pushl	(%%eax)							\n"
+"	pushl	%%edx							\n"
+"	call	tls_new_generation					\n"
+"	popl	%%edx							\n"
+"	popl	%%ecx							\n"
+"	movl	%%eax, %%edx						\n"
+"	popl	%%eax							\n"
+"	jmp	1b							\n"
+"	.previous							\n"
+  : : "i" (offsetof (struct rtld_global, _dl_tls_generation)));
+# endif
   return ___tls_get_addr_internal (ti);
 }
 

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] Speed up ___tls_get_addr
  2002-11-10 23:37 [PATCH] Speed up ___tls_get_addr Jakub Jelinek
@ 2002-11-11  0:48 ` Roland McGrath
  0 siblings, 0 replies; 2+ messages in thread
From: Roland McGrath @ 2002-11-11  0:48 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Glibc hackers

I think it is fine to put this in now, the dl-tls.c change that is.  (The
asm I would rather hold off on for now.)  But can you try internal_function
(i.e. regparm) on the new functions and see if that helps more?  Also the
function definitions should have comments mentioning why they should not be
inlined, etc.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2002-11-11  8:48 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-11-10 23:37 [PATCH] Speed up ___tls_get_addr Jakub Jelinek
2002-11-11  0:48 ` Roland McGrath

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).