public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* RFC: PATCH: ld/2442: ia64 ld slow with many local relocs (O(N^2) in get_dyn_sym_info)
@ 2006-03-31  2:18 H. J. Lu
  2006-03-31 16:35 ` H. J. Lu
  0 siblings, 1 reply; 9+ messages in thread
From: H. J. Lu @ 2006-03-31  2:18 UTC (permalink / raw)
  To: binutils; +Cc: wilson

The ia64 linker uses a linked list to keep track relocations with
different addends. When we have 100,000 relocations against the
same symbols with different addends, the linked list becomes extremely
slow. This patch uses an array to replace the linked list. I have used
the new linker to rebuild binutils 2 times. Theare are no regressions.

It cuts down the link time from several hours to about 100 seconds.


H.J.
----
2006-03-30  H.J. Lu  <hongjiu.lu@intel.com>

	PR ld/2442
	* elfxx-ia64.c (elfNN_ia64_dyn_sym_info): Remove next.
	(elfNN_ia64_local_hash_entry): Add count, sorted_count and
	size.
	(elfNN_ia64_link_hash_entry): Likewise.
	(elfNN_ia64_new_elf_hash_entry): Initialize count, sorted_count
	and size.
	(elfNN_ia64_hash_copy_indirect): Updated elfNN_ia64_dyn_sym_info
	processing.
	(elfNN_ia64_hash_hide_symbol): Likewise.
	(elfNN_ia64_global_dyn_sym_thunk): Likewise.
	(elfNN_ia64_local_dyn_sym_thunk): Likewise.
	(elfNN_ia64_global_dyn_info_free): New function.
	(elfNN_ia64_local_dyn_info_free): Likewise.
	(elfNN_ia64_hash_table_free): Free local and global
	elfNN_ia64_dyn_sym_info.
	(addend_compare): New function.
	(get_dyn_sym_info): Updated to use binary search for addend.
	(elfNN_ia64_check_relocs): Scan relocations to create dynamic
	relocation arrays first.

--- bfd/elfxx-ia64.c.hash	2006-03-27 09:06:23.000000000 -0800
+++ bfd/elfxx-ia64.c	2006-03-30 15:25:29.000000000 -0800
@@ -80,9 +80,6 @@ struct elfNN_ia64_dyn_sym_info
   /* The addend for which this entry is relevant.  */
   bfd_vma addend;
 
-  /* Next addend in the list.  */
-  struct elfNN_ia64_dyn_sym_info *next;
-
   bfd_vma got_offset;
   bfd_vma fptr_offset;
   bfd_vma pltoff_offset;
@@ -133,6 +130,9 @@ struct elfNN_ia64_local_hash_entry
 {
   int id;
   unsigned int r_sym;
+  unsigned int count;
+  unsigned int sorted_count;
+  unsigned int size;
   struct elfNN_ia64_dyn_sym_info *info;
 
   /* TRUE if this hash entry's addends was translated for
@@ -143,6 +143,9 @@ struct elfNN_ia64_local_hash_entry
 struct elfNN_ia64_link_hash_entry
 {
   struct elf_link_hash_entry root;
+  unsigned int count;
+  unsigned int sorted_count;
+  unsigned int size;
   struct elfNN_ia64_dyn_sym_info *info;
 };
 
@@ -1813,6 +1816,9 @@ elfNN_ia64_new_elf_hash_entry (entry, ta
 				     table, string));
 
   ret->info = NULL;
+  ret->count = 0;
+  ret->sorted_count = 0;
+  ret->size = 0;
   return (struct bfd_hash_entry *) ret;
 }
 
@@ -1843,16 +1849,25 @@ elfNN_ia64_hash_copy_indirect (info, xdi
   if (ind->info != NULL)
     {
       struct elfNN_ia64_dyn_sym_info *dyn_i;
-      struct elfNN_ia64_dyn_sym_info **pdyn;
+      unsigned int count;
+
+      if (dir->info)
+	free (dir->info);
+
+      dir->info = ind->info;
+      dir->count = ind->count;
+      dir->sorted_count = ind->sorted_count;
+      dir->size = ind->size;
 
-      pdyn = &dir->info;
-      while ((dyn_i = *pdyn) != NULL)
-	pdyn = &dyn_i->next;
-      *pdyn = dyn_i = ind->info;
       ind->info = NULL;
+      ind->count = 0;
+      ind->sorted_count = 0;
+      ind->size = 0;
 
       /* Fix up the dyn_sym_info pointers to the global symbol.  */
-      for (; dyn_i; dyn_i = dyn_i->next)
+      for (count = dir->count, dyn_i = dir->info;
+	   count != 0;
+	   count--, dyn_i++)
 	dyn_i->h = &dir->root;
     }
 
@@ -1878,12 +1893,15 @@ elfNN_ia64_hash_hide_symbol (info, xh, f
 {
   struct elfNN_ia64_link_hash_entry *h;
   struct elfNN_ia64_dyn_sym_info *dyn_i;
+  unsigned int count;
 
   h = (struct elfNN_ia64_link_hash_entry *)xh;
 
   _bfd_elf_link_hash_hide_symbol (info, &h->root, force_local);
 
-  for (dyn_i = h->info; dyn_i; dyn_i = dyn_i->next)
+  for (count = h->count, dyn_i = h->info;
+       count != 0;
+       count--, dyn_i++)
     {
       dyn_i->want_plt2 = 0;
       dyn_i->want_plt = 0;
@@ -1951,6 +1969,47 @@ elfNN_ia64_hash_table_create (abfd)
   return &ret->root.root;
 }
 
+static bfd_boolean
+elfNN_ia64_global_dyn_info_free (void **xentry,
+				PTR unused ATTRIBUTE_UNUSED)
+{
+  struct elfNN_ia64_link_hash_entry *entry
+    = (struct elfNN_ia64_link_hash_entry *) xentry;
+
+  if (entry->root.root.type == bfd_link_hash_warning)
+    entry = (struct elfNN_ia64_link_hash_entry *) entry->root.root.u.i.link;
+
+  if (entry->info)
+    {
+      free (entry->info);
+      entry->info = NULL;
+      entry->count = 0;
+      entry->sorted_count = 0;
+      entry->size = 0;
+    }
+
+  return TRUE;
+}
+
+static bfd_boolean
+elfNN_ia64_local_dyn_info_free (void **slot,
+				PTR unused ATTRIBUTE_UNUSED)
+{
+  struct elfNN_ia64_local_hash_entry *entry
+    = (struct elfNN_ia64_local_hash_entry *) *slot;
+
+  if (entry->info)
+    {
+      free (entry->info);
+      entry->info = NULL;
+      entry->count = 0;
+      entry->sorted_count = 0;
+      entry->size = 0;
+    }
+
+  return TRUE;
+}
+
 /* Destroy IA-64 linker hash table.  */
 
 static void
@@ -1960,9 +2019,15 @@ elfNN_ia64_hash_table_free (hash)
   struct elfNN_ia64_link_hash_table *ia64_info
     = (struct elfNN_ia64_link_hash_table *) hash;
   if (ia64_info->loc_hash_table)
-    htab_delete (ia64_info->loc_hash_table);
+    {
+      htab_traverse (ia64_info->loc_hash_table,
+		     elfNN_ia64_local_dyn_info_free, NULL);
+      htab_delete (ia64_info->loc_hash_table);
+    }
   if (ia64_info->loc_hash_memory)
     objalloc_free ((struct objalloc *) ia64_info->loc_hash_memory);
+  elf_link_hash_traverse (&ia64_info->root,
+			  elfNN_ia64_global_dyn_info_free, NULL);
   _bfd_generic_link_hash_table_free (hash);
 }
 
@@ -1984,11 +2049,14 @@ elfNN_ia64_global_dyn_sym_thunk (xentry,
   struct elfNN_ia64_dyn_sym_traverse_data *data
     = (struct elfNN_ia64_dyn_sym_traverse_data *) xdata;
   struct elfNN_ia64_dyn_sym_info *dyn_i;
+  unsigned int count;
 
   if (entry->root.root.type == bfd_link_hash_warning)
     entry = (struct elfNN_ia64_link_hash_entry *) entry->root.root.u.i.link;
 
-  for (dyn_i = entry->info; dyn_i; dyn_i = dyn_i->next)
+  for (count = entry->count, dyn_i = entry->info;
+       count != 0;
+       count--, dyn_i++)
     if (! (*data->func) (dyn_i, data->data))
       return FALSE;
   return TRUE;
@@ -2004,11 +2072,14 @@ elfNN_ia64_local_dyn_sym_thunk (slot, xd
   struct elfNN_ia64_dyn_sym_traverse_data *data
     = (struct elfNN_ia64_dyn_sym_traverse_data *) xdata;
   struct elfNN_ia64_dyn_sym_info *dyn_i;
+  unsigned int count;
 
-  for (dyn_i = entry->info; dyn_i; dyn_i = dyn_i->next)
+  for (count = entry->count, dyn_i = entry->info;
+       count != 0;
+       count--, dyn_i++)
     if (! (*data->func) (dyn_i, data->data))
-      return 0;
-  return 1;
+      return FALSE;
+  return TRUE;
 }
 
 static void
@@ -2117,6 +2188,17 @@ get_local_sym_hash (ia64_info, abfd, rel
   return ret;
 }
 
+static int
+addend_compare (const void *xp, const void *yp)
+{
+  const struct elfNN_ia64_dyn_sym_info *x
+    = (const struct elfNN_ia64_dyn_sym_info *) xp;
+  const struct elfNN_ia64_dyn_sym_info *y
+    = (const struct elfNN_ia64_dyn_sym_info *) yp;
+
+  return x->addend - y->addend;
+}
+
 /* Find and/or create a descriptor for dynamic symbol info.  This will
    vary based on global or local symbol, and the addend to the reloc.  */
 
@@ -2128,12 +2210,24 @@ get_dyn_sym_info (ia64_info, h, abfd, re
      const Elf_Internal_Rela *rel;
      bfd_boolean create;
 {
-  struct elfNN_ia64_dyn_sym_info **pp;
+  struct elfNN_ia64_dyn_sym_info **info_p;
   struct elfNN_ia64_dyn_sym_info *dyn_i;
+  unsigned int *count_p, *sorted_count_p, *size_p;
+  unsigned int count;
+  struct elfNN_ia64_dyn_sym_info key;
   bfd_vma addend = rel ? rel->r_addend : 0;
 
   if (h)
-    pp = &((struct elfNN_ia64_link_hash_entry *)h)->info;
+    {
+      struct elfNN_ia64_link_hash_entry *global_h;
+
+      global_h = (struct elfNN_ia64_link_hash_entry *) h;
+      info_p = &global_h->info;
+      count_p = &global_h->count;
+      sorted_count_p = &global_h->sorted_count;
+      size_p = &global_h->size;
+      count = global_h->count;
+    }
   else
     {
       struct elfNN_ia64_local_hash_entry *loc_h;
@@ -2145,18 +2239,159 @@ get_dyn_sym_info (ia64_info, h, abfd, re
 	  return NULL;
 	}
 
-      pp = &loc_h->info;
+      info_p = &loc_h->info;
+      count_p = &loc_h->count;
+      sorted_count_p = &loc_h->sorted_count;
+      size_p = &loc_h->size;
+      count = loc_h->count;
     }
 
-  for (dyn_i = *pp; dyn_i && dyn_i->addend != addend; dyn_i = *pp)
-    pp = &dyn_i->next;
-
-  if (dyn_i == NULL && create)
+  dyn_i = *info_p;
+  if (create)
     {
-      dyn_i = ((struct elfNN_ia64_dyn_sym_info *)
-	       bfd_zalloc (abfd, (bfd_size_type) sizeof *dyn_i));
-      *pp = dyn_i;
+      bfd_size_type amt;
+
+      /* Check the sourted array first.  */
+      if (dyn_i && *sorted_count_p)
+	{
+	  key.addend = addend;
+	  dyn_i = bsearch (&key, dyn_i, *sorted_count_p,
+			   sizeof (*dyn_i), addend_compare);
+
+	  /* Check the last one.  */
+	  if (!dyn_i)
+	    {
+	      dyn_i = *info_p + count - 1;
+	      if (dyn_i->addend == addend)
+		return dyn_i;
+	    }
+	  else
+	    return dyn_i;
+	}
+
+      if (*size_p == 0)
+	{
+	  *size_p = 128;
+	  amt = (*size_p) * sizeof (*dyn_i);
+	  *info_p = bfd_malloc (amt);
+	}
+      else if (*size_p <= count)
+	{
+	  *size_p <<= 1;
+	  amt = (*size_p) * sizeof (*dyn_i);
+	  *info_p = bfd_realloc (*info_p, amt);
+	}
+
+      if (*info_p == NULL)
+	return NULL;
+      
+      dyn_i = *info_p + count;
+      memset (dyn_i, 0, sizeof (*dyn_i));
       dyn_i->addend = addend;
+      
+      /* We increment count only.  */
+      (*count_p)++;
+    }
+  else
+    {
+      /* Sort the array.  */
+      if (count != *sorted_count_p)
+	{
+	  bfd_vma curr, prev;
+	  unsigned int i, dup, diff, dest, src, len;
+
+	  qsort (dyn_i, count, sizeof (*dyn_i), addend_compare);
+
+	  /* Find the first duplicate.  */
+	  prev = dyn_i [0].addend;
+	  for (i = 1; i < count; i++)
+	    {
+	      curr = dyn_i [i].addend;
+	      if (curr == prev)
+		break;
+	      prev = curr;
+	    }
+
+	  /* Remove duplicates.  */
+	  if (i < count)
+	    {
+	      /* We need to move a block of elements to here.  */
+	      dest = i++;
+	      while (i < count)
+		{
+		  curr = dyn_i [i].addend;
+
+		  /* Move a block of elements whose first one is
+		     different from the previous.  */
+		  if (curr == prev)
+		    {
+		      for (src = i + 1; src < count; src++)
+			if (dyn_i [src].addend != curr)
+			  break;
+		    }
+		  else
+		    src = i;
+
+		  if (src >= count)
+		    break;
+
+		  /* Find the next duplicate.  */
+		  prev = dyn_i [src].addend;
+		  for (dup = src + 1; dup < count; dup++)
+		    {
+		      curr = dyn_i [dup].addend;
+		      if (curr == prev)
+			break;
+		      prev = curr;
+		    }
+
+		  /* How much to move.  */
+		  len = dup - src;
+		  i = dup + 1;
+
+		  if (len == 1 && dup < count)
+		    {
+		      /* We only move 1 element. We combine it with the
+			 next one.  Find the next different one.  */
+		      for (diff = dup + 1, src++;
+			   diff < count;
+			   diff++, src++)
+			if (dyn_i [diff].addend != curr)
+			  break;
+
+		      if (diff < count)
+			{
+			  /* Find the next duplicate.  */
+			  prev = dyn_i [diff].addend;
+			  for (dup = diff + 1; dup < count; dup++)
+			    {
+			      curr = dyn_i [dup].addend;
+			      if (curr == prev)
+				break;
+			      prev = curr;
+			      diff++;
+			    }
+
+			  len = diff - src + 1;
+			  i = diff + 1;
+			}
+		    }
+
+		  memmove (&dyn_i [dest], &dyn_i [src],
+			   len * sizeof (*dyn_i));
+
+		  dest += len;
+		}
+
+	      count = dest;
+	      *count_p = count;
+	    }
+	  *sorted_count_p = count;
+	}
+
+      key.addend = addend;
+      dyn_i = bsearch (&key, dyn_i, count,
+		       sizeof (*dyn_i), addend_compare);
     }
 
   return dyn_i;
@@ -2382,6 +2617,23 @@ elfNN_ia64_check_relocs (abfd, info, sec
   Elf_Internal_Shdr *symtab_hdr;
   const Elf_Internal_Rela *rel;
   asection *got, *fptr, *srel, *pltoff;
+  enum {
+    NEED_GOT = 1,
+    NEED_GOTX = 2,
+    NEED_FPTR = 4,
+    NEED_PLTOFF = 8,
+    NEED_MIN_PLT = 16,
+    NEED_FULL_PLT = 32,
+    NEED_DYNREL = 64,
+    NEED_LTOFF_FPTR = 128,
+    NEED_TPREL = 256,
+    NEED_DTPMOD = 512,
+    NEED_DTPREL = 1024
+  };
+  int need_entry;
+  struct elf_link_hash_entry *h;
+  unsigned long r_symndx;
+  bfd_boolean maybe_dynamic;
 
   if (info->relocatable)
     return TRUE;
@@ -2392,29 +2644,178 @@ elfNN_ia64_check_relocs (abfd, info, sec
   got = fptr = srel = pltoff = NULL;
 
   relend = relocs + sec->reloc_count;
+
+  /* We scan relocations first to create dynamic relocation arrays.  */
   for (rel = relocs; rel < relend; ++rel)
     {
-      enum {
-	NEED_GOT = 1,
-	NEED_GOTX = 2,
-	NEED_FPTR = 4,
-	NEED_PLTOFF = 8,
-	NEED_MIN_PLT = 16,
-	NEED_FULL_PLT = 32,
-	NEED_DYNREL = 64,
-	NEED_LTOFF_FPTR = 128,
-	NEED_TPREL = 256,
-	NEED_DTPMOD = 512,
-	NEED_DTPREL = 1024
-      };
+      r_symndx = ELFNN_R_SYM (rel->r_info);
+      if (r_symndx >= symtab_hdr->sh_info)
+	{
+	  long indx = r_symndx - symtab_hdr->sh_info;
+	  h = elf_sym_hashes (abfd)[indx];
+	  while (h->root.type == bfd_link_hash_indirect
+		 || h->root.type == bfd_link_hash_warning)
+	    h = (struct elf_link_hash_entry *) h->root.u.i.link;
+	}
+      else
+	h = NULL;
 
-      struct elf_link_hash_entry *h = NULL;
-      unsigned long r_symndx = ELFNN_R_SYM (rel->r_info);
+      /* We can only get preliminary data on whether a symbol is
+	 locally or externally defined, as not all of the input files
+	 have yet been processed.  Do something with what we know, as
+	 this may help reduce memory usage and processing time later.  */
+      maybe_dynamic = (h && ((!info->executable
+			      && (!info->symbolic
+				  || info->unresolved_syms_in_shared_libs == RM_IGNORE))
+			     || !h->def_regular
+			     || h->root.type == bfd_link_hash_defweak));
+
+      need_entry = 0;
+      switch (ELFNN_R_TYPE (rel->r_info))
+	{
+	case R_IA64_TPREL64MSB:
+	case R_IA64_TPREL64LSB:
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_LTOFF_TPREL22:
+	  need_entry = NEED_TPREL;
+	  if (info->shared)
+	    info->flags |= DF_STATIC_TLS;
+	  break;
+
+	case R_IA64_DTPREL32MSB:
+	case R_IA64_DTPREL32LSB:
+	case R_IA64_DTPREL64MSB:
+	case R_IA64_DTPREL64LSB:
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_LTOFF_DTPREL22:
+	  need_entry = NEED_DTPREL;
+	  break;
+
+	case R_IA64_DTPMOD64MSB:
+	case R_IA64_DTPMOD64LSB:
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_LTOFF_DTPMOD22:
+	  need_entry = NEED_DTPMOD;
+	  break;
+
+	case R_IA64_LTOFF_FPTR22:
+	case R_IA64_LTOFF_FPTR64I:
+	case R_IA64_LTOFF_FPTR32MSB:
+	case R_IA64_LTOFF_FPTR32LSB:
+	case R_IA64_LTOFF_FPTR64MSB:
+	case R_IA64_LTOFF_FPTR64LSB:
+	  need_entry = NEED_FPTR | NEED_GOT | NEED_LTOFF_FPTR;
+	  break;
+
+	case R_IA64_FPTR64I:
+	case R_IA64_FPTR32MSB:
+	case R_IA64_FPTR32LSB:
+	case R_IA64_FPTR64MSB:
+	case R_IA64_FPTR64LSB:
+	  if (info->shared || h)
+	    need_entry = NEED_FPTR | NEED_DYNREL;
+	  else
+	    need_entry = NEED_FPTR;
+	  break;
+
+	case R_IA64_LTOFF22:
+	case R_IA64_LTOFF64I:
+	  need_entry = NEED_GOT;
+	  break;
+
+	case R_IA64_LTOFF22X:
+	  need_entry = NEED_GOTX;
+	  break;
+
+	case R_IA64_PLTOFF22:
+	case R_IA64_PLTOFF64I:
+	case R_IA64_PLTOFF64MSB:
+	case R_IA64_PLTOFF64LSB:
+	  need_entry = NEED_PLTOFF;
+	  if (h)
+	    {
+	      if (maybe_dynamic)
+		need_entry |= NEED_MIN_PLT;
+	    }
+	  else
+	    {
+	      (*info->callbacks->warning)
+		(info, _("@pltoff reloc against local symbol"), 0,
+		 abfd, 0, (bfd_vma) 0);
+	    }
+	  break;
+
+	case R_IA64_PCREL21B:
+        case R_IA64_PCREL60B:
+	  /* Depending on where this symbol is defined, we may or may not
+	     need a full plt entry.  Only skip if we know we'll not need
+	     the entry -- static or symbolic, and the symbol definition
+	     has already been seen.  */
+	  if (maybe_dynamic && rel->r_addend == 0)
+	    need_entry = NEED_FULL_PLT;
+	  break;
+
+	case R_IA64_IMM14:
+	case R_IA64_IMM22:
+	case R_IA64_IMM64:
+	case R_IA64_DIR32MSB:
+	case R_IA64_DIR32LSB:
+	case R_IA64_DIR64MSB:
+	case R_IA64_DIR64LSB:
+	  /* Shared objects will always need at least a REL relocation.  */
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_IPLTMSB:
+	case R_IA64_IPLTLSB:
+	  /* Shared objects will always need at least a REL relocation.  */
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_PCREL22:
+	case R_IA64_PCREL64I:
+	case R_IA64_PCREL32MSB:
+	case R_IA64_PCREL32LSB:
+	case R_IA64_PCREL64MSB:
+	case R_IA64_PCREL64LSB:
+	  if (maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+	}
+
+      if (!need_entry)
+	continue;
+
+      if ((need_entry & NEED_FPTR) != 0
+	  && rel->r_addend)
+	{
+	  (*info->callbacks->warning)
+	    (info, _("non-zero addend in @fptr reloc"), 0,
+	     abfd, 0, (bfd_vma) 0);
+	}
+
+      if (get_dyn_sym_info (ia64_info, h, abfd, rel, TRUE) == NULL)
+	return FALSE;
+    }
+
+   
+  for (rel = relocs; rel < relend; ++rel)
+    {
       struct elfNN_ia64_dyn_sym_info *dyn_i;
-      int need_entry;
-      bfd_boolean maybe_dynamic;
       int dynrel_type = R_IA64_NONE;
 
+      r_symndx = ELFNN_R_SYM (rel->r_info);
       if (r_symndx >= symtab_hdr->sh_info)
 	{
 	  /* We're dealing with a global symbol -- find its hash entry
@@ -2427,18 +2828,18 @@ elfNN_ia64_check_relocs (abfd, info, sec
 
 	  h->ref_regular = 1;
 	}
+      else
+	h = NULL;
 
       /* We can only get preliminary data on whether a symbol is
 	 locally or externally defined, as not all of the input files
 	 have yet been processed.  Do something with what we know, as
 	 this may help reduce memory usage and processing time later.  */
-      maybe_dynamic = FALSE;
-      if (h && ((!info->executable
-		 && (!info->symbolic
-		     || info->unresolved_syms_in_shared_libs == RM_IGNORE))
-		|| !h->def_regular
-		|| h->root.type == bfd_link_hash_defweak))
-	maybe_dynamic = TRUE;
+      maybe_dynamic = (h && ((!info->executable
+			      && (!info->symbolic
+				  || info->unresolved_syms_in_shared_libs == RM_IGNORE))
+			     || !h->def_regular
+			     || h->root.type == bfd_link_hash_defweak));
 
       need_entry = 0;
       switch (ELFNN_R_TYPE (rel->r_info))
@@ -2522,12 +2923,6 @@ elfNN_ia64_check_relocs (abfd, info, sec
 	      if (maybe_dynamic)
 		need_entry |= NEED_MIN_PLT;
 	    }
-	  else
-	    {
-	      (*info->callbacks->warning)
-		(info, _("@pltoff reloc against local symbol"), 0,
-		 abfd, 0, (bfd_vma) 0);
-	    }
 	  break;
 
 	case R_IA64_PCREL21B:
@@ -2576,15 +2971,7 @@ elfNN_ia64_check_relocs (abfd, info, sec
       if (!need_entry)
 	continue;
 
-      if ((need_entry & NEED_FPTR) != 0
-	  && rel->r_addend)
-	{
-	  (*info->callbacks->warning)
-	    (info, _("non-zero addend in @fptr reloc"), 0,
-	     abfd, 0, (bfd_vma) 0);
-	}
-
-      dyn_i = get_dyn_sym_info (ia64_info, h, abfd, rel, TRUE);
+      dyn_i = get_dyn_sym_info (ia64_info, h, abfd, rel, FALSE);
 
       /* Record whether or not this is a local symbol.  */
       dyn_i->h = h;
@@ -4159,8 +4546,11 @@ elfNN_ia64_relocate_section (output_bfd,
 	      if (loc_h && ! loc_h->sec_merge_done)
 		{
 		  struct elfNN_ia64_dyn_sym_info *dynent;
+		  unsigned int count;
 
-		  for (dynent = loc_h->info; dynent; dynent = dynent->next)
+		  for (count = loc_h->count, dynent = loc_h->info;
+		       count != 0;
+		       count--, dynent++)
 		    {
 		      msec = sym_sec;
 		      dynent->addend =
@@ -4175,6 +4565,10 @@ elfNN_ia64_relocate_section (output_bfd,
 					- sym_sec->output_section->vma
 					- sym_sec->output_offset;
 		    }
+		  
+		  qsort (loc_h->info, loc_h->count,
+			 sizeof (*loc_h->info), addend_compare);
+
 		  loc_h->sec_merge_done = 1;
 		}
 	    }

^ permalink raw reply	[flat|nested] 9+ messages in thread

* PATCH: ld/2442: ia64 ld slow with many local relocs (O(N^2) in get_dyn_sym_info)
  2006-03-31  2:18 RFC: PATCH: ld/2442: ia64 ld slow with many local relocs (O(N^2) in get_dyn_sym_info) H. J. Lu
@ 2006-03-31 16:35 ` H. J. Lu
  2006-04-02 18:33   ` H. J. Lu
  0 siblings, 1 reply; 9+ messages in thread
From: H. J. Lu @ 2006-03-31 16:35 UTC (permalink / raw)
  To: binutils; +Cc: wilson

On Thu, Mar 30, 2006 at 04:15:52PM -0800, H. J. Lu wrote:
> The ia64 linker uses a linked list to keep track relocations with
> different addends. When we have 100,000 relocations against the
> same symbols with different addends, the linked list becomes extremely
> slow. This patch uses an array to replace the linked list. I have used
> the new linker to rebuild binutils 2 times. Theare are no regressions.
> 
> It cuts down the link time from several hours to about 100 seconds.
> 
> 

Here is an updated patch with some minor adjustments. I was told that
the link time went from 1300 minutes to 1 minute. It was a 1300 X
speed up.


H.J.
----
2006-03-30  H.J. Lu  <hongjiu.lu@intel.com>

	PR ld/2442
	* elfxx-ia64.c (elfNN_ia64_dyn_sym_info): Remove next.
	(elfNN_ia64_local_hash_entry): Add count, sorted_count and
	size.
	(elfNN_ia64_link_hash_entry): Likewise.
	(elfNN_ia64_new_elf_hash_entry): Initialize count, sorted_count
	and size.
	(elfNN_ia64_hash_copy_indirect): Updated elfNN_ia64_dyn_sym_info
	processing.
	(elfNN_ia64_hash_hide_symbol): Likewise.
	(elfNN_ia64_global_dyn_sym_thunk): Likewise.
	(elfNN_ia64_local_dyn_sym_thunk): Likewise.
	(elfNN_ia64_global_dyn_info_free): New function.
	(elfNN_ia64_local_dyn_info_free): Likewise.
	(elfNN_ia64_hash_table_free): Free local and global
	elfNN_ia64_dyn_sym_info.
	(addend_compare): New function.
	(get_dyn_sym_info): Updated to use binary search for addend.
	(elfNN_ia64_check_relocs): Scan relocations to create dynamic
	relocation arrays first.

--- bfd/elfxx-ia64.c.hash	2006-03-27 09:06:23.000000000 -0800
+++ bfd/elfxx-ia64.c	2006-03-31 07:11:48.000000000 -0800
@@ -80,9 +80,6 @@ struct elfNN_ia64_dyn_sym_info
   /* The addend for which this entry is relevant.  */
   bfd_vma addend;
 
-  /* Next addend in the list.  */
-  struct elfNN_ia64_dyn_sym_info *next;
-
   bfd_vma got_offset;
   bfd_vma fptr_offset;
   bfd_vma pltoff_offset;
@@ -133,6 +130,9 @@ struct elfNN_ia64_local_hash_entry
 {
   int id;
   unsigned int r_sym;
+  unsigned int count;
+  unsigned int sorted_count;
+  unsigned int size;
   struct elfNN_ia64_dyn_sym_info *info;
 
   /* TRUE if this hash entry's addends was translated for
@@ -143,6 +143,9 @@ struct elfNN_ia64_local_hash_entry
 struct elfNN_ia64_link_hash_entry
 {
   struct elf_link_hash_entry root;
+  unsigned int count;
+  unsigned int sorted_count;
+  unsigned int size;
   struct elfNN_ia64_dyn_sym_info *info;
 };
 
@@ -1813,6 +1816,9 @@ elfNN_ia64_new_elf_hash_entry (entry, ta
 				     table, string));
 
   ret->info = NULL;
+  ret->count = 0;
+  ret->sorted_count = 0;
+  ret->size = 0;
   return (struct bfd_hash_entry *) ret;
 }
 
@@ -1843,16 +1849,25 @@ elfNN_ia64_hash_copy_indirect (info, xdi
   if (ind->info != NULL)
     {
       struct elfNN_ia64_dyn_sym_info *dyn_i;
-      struct elfNN_ia64_dyn_sym_info **pdyn;
+      unsigned int count;
+
+      if (dir->info)
+	free (dir->info);
+
+      dir->info = ind->info;
+      dir->count = ind->count;
+      dir->sorted_count = ind->sorted_count;
+      dir->size = ind->size;
 
-      pdyn = &dir->info;
-      while ((dyn_i = *pdyn) != NULL)
-	pdyn = &dyn_i->next;
-      *pdyn = dyn_i = ind->info;
       ind->info = NULL;
+      ind->count = 0;
+      ind->sorted_count = 0;
+      ind->size = 0;
 
       /* Fix up the dyn_sym_info pointers to the global symbol.  */
-      for (; dyn_i; dyn_i = dyn_i->next)
+      for (count = dir->count, dyn_i = dir->info;
+	   count != 0;
+	   count--, dyn_i++)
 	dyn_i->h = &dir->root;
     }
 
@@ -1878,12 +1893,15 @@ elfNN_ia64_hash_hide_symbol (info, xh, f
 {
   struct elfNN_ia64_link_hash_entry *h;
   struct elfNN_ia64_dyn_sym_info *dyn_i;
+  unsigned int count;
 
   h = (struct elfNN_ia64_link_hash_entry *)xh;
 
   _bfd_elf_link_hash_hide_symbol (info, &h->root, force_local);
 
-  for (dyn_i = h->info; dyn_i; dyn_i = dyn_i->next)
+  for (count = h->count, dyn_i = h->info;
+       count != 0;
+       count--, dyn_i++)
     {
       dyn_i->want_plt2 = 0;
       dyn_i->want_plt = 0;
@@ -1951,6 +1969,47 @@ elfNN_ia64_hash_table_create (abfd)
   return &ret->root.root;
 }
 
+static bfd_boolean
+elfNN_ia64_global_dyn_info_free (void **xentry,
+				PTR unused ATTRIBUTE_UNUSED)
+{
+  struct elfNN_ia64_link_hash_entry *entry
+    = (struct elfNN_ia64_link_hash_entry *) xentry;
+
+  if (entry->root.root.type == bfd_link_hash_warning)
+    entry = (struct elfNN_ia64_link_hash_entry *) entry->root.root.u.i.link;
+
+  if (entry->info)
+    {
+      free (entry->info);
+      entry->info = NULL;
+      entry->count = 0;
+      entry->sorted_count = 0;
+      entry->size = 0;
+    }
+
+  return TRUE;
+}
+
+static bfd_boolean
+elfNN_ia64_local_dyn_info_free (void **slot,
+				PTR unused ATTRIBUTE_UNUSED)
+{
+  struct elfNN_ia64_local_hash_entry *entry
+    = (struct elfNN_ia64_local_hash_entry *) *slot;
+
+  if (entry->info)
+    {
+      free (entry->info);
+      entry->info = NULL;
+      entry->count = 0;
+      entry->sorted_count = 0;
+      entry->size = 0;
+    }
+
+  return TRUE;
+}
+
 /* Destroy IA-64 linker hash table.  */
 
 static void
@@ -1960,9 +2019,15 @@ elfNN_ia64_hash_table_free (hash)
   struct elfNN_ia64_link_hash_table *ia64_info
     = (struct elfNN_ia64_link_hash_table *) hash;
   if (ia64_info->loc_hash_table)
-    htab_delete (ia64_info->loc_hash_table);
+    {
+      htab_traverse (ia64_info->loc_hash_table,
+		     elfNN_ia64_local_dyn_info_free, NULL);
+      htab_delete (ia64_info->loc_hash_table);
+    }
   if (ia64_info->loc_hash_memory)
     objalloc_free ((struct objalloc *) ia64_info->loc_hash_memory);
+  elf_link_hash_traverse (&ia64_info->root,
+			  elfNN_ia64_global_dyn_info_free, NULL);
   _bfd_generic_link_hash_table_free (hash);
 }
 
@@ -1984,11 +2049,14 @@ elfNN_ia64_global_dyn_sym_thunk (xentry,
   struct elfNN_ia64_dyn_sym_traverse_data *data
     = (struct elfNN_ia64_dyn_sym_traverse_data *) xdata;
   struct elfNN_ia64_dyn_sym_info *dyn_i;
+  unsigned int count;
 
   if (entry->root.root.type == bfd_link_hash_warning)
     entry = (struct elfNN_ia64_link_hash_entry *) entry->root.root.u.i.link;
 
-  for (dyn_i = entry->info; dyn_i; dyn_i = dyn_i->next)
+  for (count = entry->count, dyn_i = entry->info;
+       count != 0;
+       count--, dyn_i++)
     if (! (*data->func) (dyn_i, data->data))
       return FALSE;
   return TRUE;
@@ -2004,11 +2072,14 @@ elfNN_ia64_local_dyn_sym_thunk (slot, xd
   struct elfNN_ia64_dyn_sym_traverse_data *data
     = (struct elfNN_ia64_dyn_sym_traverse_data *) xdata;
   struct elfNN_ia64_dyn_sym_info *dyn_i;
+  unsigned int count;
 
-  for (dyn_i = entry->info; dyn_i; dyn_i = dyn_i->next)
+  for (count = entry->count, dyn_i = entry->info;
+       count != 0;
+       count--, dyn_i++)
     if (! (*data->func) (dyn_i, data->data))
-      return 0;
-  return 1;
+      return FALSE;
+  return TRUE;
 }
 
 static void
@@ -2117,6 +2188,17 @@ get_local_sym_hash (ia64_info, abfd, rel
   return ret;
 }
 
+static int
+addend_compare (const void *xp, const void *yp)
+{
+  const struct elfNN_ia64_dyn_sym_info *x
+    = (const struct elfNN_ia64_dyn_sym_info *) xp;
+  const struct elfNN_ia64_dyn_sym_info *y
+    = (const struct elfNN_ia64_dyn_sym_info *) yp;
+
+  return x->addend - y->addend;
+}
+
 /* Find and/or create a descriptor for dynamic symbol info.  This will
    vary based on global or local symbol, and the addend to the reloc.  */
 
@@ -2128,12 +2210,21 @@ get_dyn_sym_info (ia64_info, h, abfd, re
      const Elf_Internal_Rela *rel;
      bfd_boolean create;
 {
-  struct elfNN_ia64_dyn_sym_info **pp;
-  struct elfNN_ia64_dyn_sym_info *dyn_i;
+  struct elfNN_ia64_dyn_sym_info **info_p, *info, *dyn_i, key;
+  unsigned int *count_p, *sorted_count_p, *size_p;
+  unsigned int count, sorted_count, size;
   bfd_vma addend = rel ? rel->r_addend : 0;
 
   if (h)
-    pp = &((struct elfNN_ia64_link_hash_entry *)h)->info;
+    {
+      struct elfNN_ia64_link_hash_entry *global_h;
+
+      global_h = (struct elfNN_ia64_link_hash_entry *) h;
+      info_p = &global_h->info;
+      count_p = &global_h->count;
+      sorted_count_p = &global_h->sorted_count;
+      size_p = &global_h->size;
+    }
   else
     {
       struct elfNN_ia64_local_hash_entry *loc_h;
@@ -2145,18 +2236,170 @@ get_dyn_sym_info (ia64_info, h, abfd, re
 	  return NULL;
 	}
 
-      pp = &loc_h->info;
+      info_p = &loc_h->info;
+      count_p = &loc_h->count;
+      sorted_count_p = &loc_h->sorted_count;
+      size_p = &loc_h->size;
     }
 
-  for (dyn_i = *pp; dyn_i && dyn_i->addend != addend; dyn_i = *pp)
-    pp = &dyn_i->next;
-
-  if (dyn_i == NULL && create)
+  count = *count_p;
+  sorted_count = *sorted_count_p;
+  size = *size_p;
+  info = *info_p;
+  if (create)
     {
-      dyn_i = ((struct elfNN_ia64_dyn_sym_info *)
-	       bfd_zalloc (abfd, (bfd_size_type) sizeof *dyn_i));
-      *pp = dyn_i;
+      bfd_size_type amt;
+
+      /* Check the sorted array first.  */
+      if (info)
+	{
+	  if (sorted_count)
+	    {
+	      key.addend = addend;
+	      dyn_i = bsearch (&key, info, sorted_count,
+			       sizeof (*info), addend_compare);
+
+	      if (dyn_i)
+		return dyn_i;
+	    }
+
+	  /* Check the last one.  */
+	  dyn_i = info + count - 1;
+	  if (dyn_i->addend == addend)
+	    return dyn_i;
+	}
+
+      if (size == 0)
+	{
+	  size = 128;
+	  amt = size * sizeof (*info);
+	  info = bfd_malloc (amt);
+	}
+      else if (size <= count)
+	{
+	  size += size;
+	  amt = size * sizeof (*info);
+	  info = bfd_realloc (info, amt);
+	}
+
+      if (size != *size_p)
+	{
+	  if (info == NULL)
+	    return NULL;
+	  *size_p = size;
+	  *info_p = info;
+	}
+      
+      /* Append the new one to the array.  */
+      dyn_i = info + count;
+      memset (dyn_i, 0, sizeof (*dyn_i));
       dyn_i->addend = addend;
+      
+      /* We increment count only since the new ones are unsorted and
+	 may have duplicate.  */
+      (*count_p)++;
+    }
+  else
+    {
+      /* Sort the array if needed.  */
+      if (count != sorted_count)
+	{
+	  bfd_vma curr, prev;
+	  unsigned int i, dup, diff, dest, src, len;
+
+	  qsort (info, count, sizeof (*info), addend_compare);
+
+	  /* Find the first duplicate.  */
+	  prev = info [0].addend;
+	  for (i = 1; i < count; i++)
+	    {
+	      curr = info [i].addend;
+	      if (curr == prev)
+		break;
+	      prev = curr;
+	    }
+
+	  /* Remove duplicates.  */
+	  if (i < count)
+	    {
+	      /* We need to move a block of elements to here.  */
+	      dest = i++;
+	      while (i < count)
+		{
+		  curr = info [i].addend;
+
+		  /* Move a block of elements whose first one is
+		     different from the previous.  */
+		  if (curr == prev)
+		    {
+		      for (src = i + 1; src < count; src++)
+			if (info [src].addend != curr)
+			  break;
+		    }
+		  else
+		    src = i;
+
+		  if (src >= count)
+		    break;
+
+		  /* Find the next duplicate.  */
+		  prev = info [src].addend;
+		  for (dup = src + 1; dup < count; dup++)
+		    {
+		      curr = info [dup].addend;
+		      if (curr == prev)
+			break;
+		      prev = curr;
+		    }
+
+		  /* How much to move.  */
+		  len = dup - src;
+		  i = dup + 1;
+
+		  if (len == 1 && dup < count)
+		    {
+		      /* If we only move 1 element, we combine it with
+			 the next one.  Find the next different one.  */
+		      for (diff = dup + 1, src++;
+			   diff < count;
+			   diff++, src++)
+			if (info [diff].addend != curr)
+			  break;
+
+		      if (diff < count)
+			{
+			  /* Find the next duplicate.  */
+			  prev = info [diff].addend;
+			  for (dup = diff + 1; dup < count; dup++)
+			    {
+			      curr = info [dup].addend;
+			      if (curr == prev)
+				break;
+			      prev = curr;
+			      diff++;
+			    }
+
+			  len = diff - src + 1;
+			  i = diff + 1;
+			}
+		    }
+
+		  memmove (&info [dest], &info [src],
+			   len * sizeof (*info));
+
+		  dest += len;
+		}
+
+	      count = dest;
+	      *count_p = count;
+	    }
+
+	  *sorted_count_p = count;
+	}
+
+      key.addend = addend;
+      dyn_i = bsearch (&key, info, count,
+		       sizeof (*info), addend_compare);
     }
 
   return dyn_i;
@@ -2382,6 +2625,23 @@ elfNN_ia64_check_relocs (abfd, info, sec
   Elf_Internal_Shdr *symtab_hdr;
   const Elf_Internal_Rela *rel;
   asection *got, *fptr, *srel, *pltoff;
+  enum {
+    NEED_GOT = 1,
+    NEED_GOTX = 2,
+    NEED_FPTR = 4,
+    NEED_PLTOFF = 8,
+    NEED_MIN_PLT = 16,
+    NEED_FULL_PLT = 32,
+    NEED_DYNREL = 64,
+    NEED_LTOFF_FPTR = 128,
+    NEED_TPREL = 256,
+    NEED_DTPMOD = 512,
+    NEED_DTPREL = 1024
+  };
+  int need_entry;
+  struct elf_link_hash_entry *h;
+  unsigned long r_symndx;
+  bfd_boolean maybe_dynamic;
 
   if (info->relocatable)
     return TRUE;
@@ -2392,29 +2652,178 @@ elfNN_ia64_check_relocs (abfd, info, sec
   got = fptr = srel = pltoff = NULL;
 
   relend = relocs + sec->reloc_count;
+
+  /* We scan relocations first to create dynamic relocation arrays.  */
   for (rel = relocs; rel < relend; ++rel)
     {
-      enum {
-	NEED_GOT = 1,
-	NEED_GOTX = 2,
-	NEED_FPTR = 4,
-	NEED_PLTOFF = 8,
-	NEED_MIN_PLT = 16,
-	NEED_FULL_PLT = 32,
-	NEED_DYNREL = 64,
-	NEED_LTOFF_FPTR = 128,
-	NEED_TPREL = 256,
-	NEED_DTPMOD = 512,
-	NEED_DTPREL = 1024
-      };
+      r_symndx = ELFNN_R_SYM (rel->r_info);
+      if (r_symndx >= symtab_hdr->sh_info)
+	{
+	  long indx = r_symndx - symtab_hdr->sh_info;
+	  h = elf_sym_hashes (abfd)[indx];
+	  while (h->root.type == bfd_link_hash_indirect
+		 || h->root.type == bfd_link_hash_warning)
+	    h = (struct elf_link_hash_entry *) h->root.u.i.link;
+	}
+      else
+	h = NULL;
+
+      /* We can only get preliminary data on whether a symbol is
+	 locally or externally defined, as not all of the input files
+	 have yet been processed.  Do something with what we know, as
+	 this may help reduce memory usage and processing time later.  */
+      maybe_dynamic = (h && ((!info->executable
+			      && (!info->symbolic
+				  || info->unresolved_syms_in_shared_libs == RM_IGNORE))
+			     || !h->def_regular
+			     || h->root.type == bfd_link_hash_defweak));
+
+      need_entry = 0;
+      switch (ELFNN_R_TYPE (rel->r_info))
+	{
+	case R_IA64_TPREL64MSB:
+	case R_IA64_TPREL64LSB:
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_LTOFF_TPREL22:
+	  need_entry = NEED_TPREL;
+	  if (info->shared)
+	    info->flags |= DF_STATIC_TLS;
+	  break;
+
+	case R_IA64_DTPREL32MSB:
+	case R_IA64_DTPREL32LSB:
+	case R_IA64_DTPREL64MSB:
+	case R_IA64_DTPREL64LSB:
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
 
-      struct elf_link_hash_entry *h = NULL;
-      unsigned long r_symndx = ELFNN_R_SYM (rel->r_info);
+	case R_IA64_LTOFF_DTPREL22:
+	  need_entry = NEED_DTPREL;
+	  break;
+
+	case R_IA64_DTPMOD64MSB:
+	case R_IA64_DTPMOD64LSB:
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_LTOFF_DTPMOD22:
+	  need_entry = NEED_DTPMOD;
+	  break;
+
+	case R_IA64_LTOFF_FPTR22:
+	case R_IA64_LTOFF_FPTR64I:
+	case R_IA64_LTOFF_FPTR32MSB:
+	case R_IA64_LTOFF_FPTR32LSB:
+	case R_IA64_LTOFF_FPTR64MSB:
+	case R_IA64_LTOFF_FPTR64LSB:
+	  need_entry = NEED_FPTR | NEED_GOT | NEED_LTOFF_FPTR;
+	  break;
+
+	case R_IA64_FPTR64I:
+	case R_IA64_FPTR32MSB:
+	case R_IA64_FPTR32LSB:
+	case R_IA64_FPTR64MSB:
+	case R_IA64_FPTR64LSB:
+	  if (info->shared || h)
+	    need_entry = NEED_FPTR | NEED_DYNREL;
+	  else
+	    need_entry = NEED_FPTR;
+	  break;
+
+	case R_IA64_LTOFF22:
+	case R_IA64_LTOFF64I:
+	  need_entry = NEED_GOT;
+	  break;
+
+	case R_IA64_LTOFF22X:
+	  need_entry = NEED_GOTX;
+	  break;
+
+	case R_IA64_PLTOFF22:
+	case R_IA64_PLTOFF64I:
+	case R_IA64_PLTOFF64MSB:
+	case R_IA64_PLTOFF64LSB:
+	  need_entry = NEED_PLTOFF;
+	  if (h)
+	    {
+	      if (maybe_dynamic)
+		need_entry |= NEED_MIN_PLT;
+	    }
+	  else
+	    {
+	      (*info->callbacks->warning)
+		(info, _("@pltoff reloc against local symbol"), 0,
+		 abfd, 0, (bfd_vma) 0);
+	    }
+	  break;
+
+	case R_IA64_PCREL21B:
+        case R_IA64_PCREL60B:
+	  /* Depending on where this symbol is defined, we may or may not
+	     need a full plt entry.  Only skip if we know we'll not need
+	     the entry -- static or symbolic, and the symbol definition
+	     has already been seen.  */
+	  if (maybe_dynamic && rel->r_addend == 0)
+	    need_entry = NEED_FULL_PLT;
+	  break;
+
+	case R_IA64_IMM14:
+	case R_IA64_IMM22:
+	case R_IA64_IMM64:
+	case R_IA64_DIR32MSB:
+	case R_IA64_DIR32LSB:
+	case R_IA64_DIR64MSB:
+	case R_IA64_DIR64LSB:
+	  /* Shared objects will always need at least a REL relocation.  */
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_IPLTMSB:
+	case R_IA64_IPLTLSB:
+	  /* Shared objects will always need at least a REL relocation.  */
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_PCREL22:
+	case R_IA64_PCREL64I:
+	case R_IA64_PCREL32MSB:
+	case R_IA64_PCREL32LSB:
+	case R_IA64_PCREL64MSB:
+	case R_IA64_PCREL64LSB:
+	  if (maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+	}
+
+      if (!need_entry)
+	continue;
+
+      if ((need_entry & NEED_FPTR) != 0
+	  && rel->r_addend)
+	{
+	  (*info->callbacks->warning)
+	    (info, _("non-zero addend in @fptr reloc"), 0,
+	     abfd, 0, (bfd_vma) 0);
+	}
+
+      if (get_dyn_sym_info (ia64_info, h, abfd, rel, TRUE) == NULL)
+	return FALSE;
+    }
+
+   
+  for (rel = relocs; rel < relend; ++rel)
+    {
       struct elfNN_ia64_dyn_sym_info *dyn_i;
-      int need_entry;
-      bfd_boolean maybe_dynamic;
       int dynrel_type = R_IA64_NONE;
 
+      r_symndx = ELFNN_R_SYM (rel->r_info);
       if (r_symndx >= symtab_hdr->sh_info)
 	{
 	  /* We're dealing with a global symbol -- find its hash entry
@@ -2427,18 +2836,18 @@ elfNN_ia64_check_relocs (abfd, info, sec
 
 	  h->ref_regular = 1;
 	}
+      else
+	h = NULL;
 
       /* We can only get preliminary data on whether a symbol is
 	 locally or externally defined, as not all of the input files
 	 have yet been processed.  Do something with what we know, as
 	 this may help reduce memory usage and processing time later.  */
-      maybe_dynamic = FALSE;
-      if (h && ((!info->executable
-		 && (!info->symbolic
-		     || info->unresolved_syms_in_shared_libs == RM_IGNORE))
-		|| !h->def_regular
-		|| h->root.type == bfd_link_hash_defweak))
-	maybe_dynamic = TRUE;
+      maybe_dynamic = (h && ((!info->executable
+			      && (!info->symbolic
+				  || info->unresolved_syms_in_shared_libs == RM_IGNORE))
+			     || !h->def_regular
+			     || h->root.type == bfd_link_hash_defweak));
 
       need_entry = 0;
       switch (ELFNN_R_TYPE (rel->r_info))
@@ -2522,12 +2931,6 @@ elfNN_ia64_check_relocs (abfd, info, sec
 	      if (maybe_dynamic)
 		need_entry |= NEED_MIN_PLT;
 	    }
-	  else
-	    {
-	      (*info->callbacks->warning)
-		(info, _("@pltoff reloc against local symbol"), 0,
-		 abfd, 0, (bfd_vma) 0);
-	    }
 	  break;
 
 	case R_IA64_PCREL21B:
@@ -2576,15 +2979,7 @@ elfNN_ia64_check_relocs (abfd, info, sec
       if (!need_entry)
 	continue;
 
-      if ((need_entry & NEED_FPTR) != 0
-	  && rel->r_addend)
-	{
-	  (*info->callbacks->warning)
-	    (info, _("non-zero addend in @fptr reloc"), 0,
-	     abfd, 0, (bfd_vma) 0);
-	}
-
-      dyn_i = get_dyn_sym_info (ia64_info, h, abfd, rel, TRUE);
+      dyn_i = get_dyn_sym_info (ia64_info, h, abfd, rel, FALSE);
 
       /* Record whether or not this is a local symbol.  */
       dyn_i->h = h;
@@ -4159,8 +4554,11 @@ elfNN_ia64_relocate_section (output_bfd,
 	      if (loc_h && ! loc_h->sec_merge_done)
 		{
 		  struct elfNN_ia64_dyn_sym_info *dynent;
+		  unsigned int count;
 
-		  for (dynent = loc_h->info; dynent; dynent = dynent->next)
+		  for (count = loc_h->count, dynent = loc_h->info;
+		       count != 0;
+		       count--, dynent++)
 		    {
 		      msec = sym_sec;
 		      dynent->addend =
@@ -4175,6 +4573,10 @@ elfNN_ia64_relocate_section (output_bfd,
 					- sym_sec->output_section->vma
 					- sym_sec->output_offset;
 		    }
+		  
+		  qsort (loc_h->info, loc_h->count,
+			 sizeof (*loc_h->info), addend_compare);
+
 		  loc_h->sec_merge_done = 1;
 		}
 	    }

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: PATCH: ld/2442: ia64 ld slow with many local relocs (O(N^2) in get_dyn_sym_info)
  2006-03-31 16:35 ` H. J. Lu
@ 2006-04-02 18:33   ` H. J. Lu
  2006-04-04 19:02     ` James E Wilson
  0 siblings, 1 reply; 9+ messages in thread
From: H. J. Lu @ 2006-04-02 18:33 UTC (permalink / raw)
  To: binutils; +Cc: wilson

On Fri, Mar 31, 2006 at 07:57:12AM -0800, H. J. Lu wrote:
> On Thu, Mar 30, 2006 at 04:15:52PM -0800, H. J. Lu wrote:
> > The ia64 linker uses a linked list to keep track relocations with
> > different addends. When we have 100,000 relocations against the
> > same symbols with different addends, the linked list becomes extremely
> > slow. This patch uses an array to replace the linked list. I have used
> > the new linker to rebuild binutils 2 times. Theare are no regressions.
> > 
> > It cuts down the link time from several hours to about 100 seconds.
> > 
> > 
> 
> Here is an updated patch with some minor adjustments. I was told that
> the link time went from 1300 minutes to 1 minute. It was a 1300 X
> speed up.
> 

This patch is very similar to the last one. The main difference is
it frees the unused portion of elfNN_ia64_dyn_sym_info array. The
speed improvement is the same 1300 X. It may use less memory than
the unmodified linker.


H.J.
----
2006-04-02  H.J. Lu  <hongjiu.lu@intel.com>

	PR ld/2442
	* elfxx-ia64.c (elfNN_ia64_dyn_sym_info): Remove next.
	(elfNN_ia64_local_hash_entry): Add count, sorted_count and
	size.
	(elfNN_ia64_link_hash_entry): Likewise.
	(elfNN_ia64_new_elf_hash_entry): Initialize count, sorted_count
	and size.
	(elfNN_ia64_hash_copy_indirect): Updated elfNN_ia64_dyn_sym_info
	processing.
	(elfNN_ia64_hash_hide_symbol): Likewise.
	(elfNN_ia64_global_dyn_sym_thunk): Likewise.
	(elfNN_ia64_local_dyn_sym_thunk): Likewise.
	(elfNN_ia64_global_dyn_info_free): New function.
	(elfNN_ia64_local_dyn_info_free): Likewise.
	(elfNN_ia64_hash_table_free): Free local and global
	elfNN_ia64_dyn_sym_info.
	(addend_compare): New function.
	(sort_dyn_sym_info): Likewise.
	(get_dyn_sym_info): Updated to use binary search for addend.
	(elfNN_ia64_check_relocs): Scan relocations to create dynamic
	relocation arrays first.

--- bfd/elfxx-ia64.c.hash	2006-03-27 09:06:23.000000000 -0800
+++ bfd/elfxx-ia64.c	2006-04-02 10:35:34.000000000 -0700
@@ -80,9 +80,6 @@ struct elfNN_ia64_dyn_sym_info
   /* The addend for which this entry is relevant.  */
   bfd_vma addend;
 
-  /* Next addend in the list.  */
-  struct elfNN_ia64_dyn_sym_info *next;
-
   bfd_vma got_offset;
   bfd_vma fptr_offset;
   bfd_vma pltoff_offset;
@@ -133,6 +130,9 @@ struct elfNN_ia64_local_hash_entry
 {
   int id;
   unsigned int r_sym;
+  unsigned int count;
+  unsigned int sorted_count;
+  unsigned int size;
   struct elfNN_ia64_dyn_sym_info *info;
 
   /* TRUE if this hash entry's addends was translated for
@@ -143,6 +143,9 @@ struct elfNN_ia64_local_hash_entry
 struct elfNN_ia64_link_hash_entry
 {
   struct elf_link_hash_entry root;
+  unsigned int count;
+  unsigned int sorted_count;
+  unsigned int size;
   struct elfNN_ia64_dyn_sym_info *info;
 };
 
@@ -1813,6 +1816,9 @@ elfNN_ia64_new_elf_hash_entry (entry, ta
 				     table, string));
 
   ret->info = NULL;
+  ret->count = 0;
+  ret->sorted_count = 0;
+  ret->size = 0;
   return (struct bfd_hash_entry *) ret;
 }
 
@@ -1843,16 +1849,25 @@ elfNN_ia64_hash_copy_indirect (info, xdi
   if (ind->info != NULL)
     {
       struct elfNN_ia64_dyn_sym_info *dyn_i;
-      struct elfNN_ia64_dyn_sym_info **pdyn;
+      unsigned int count;
+
+      if (dir->info)
+	free (dir->info);
+
+      dir->info = ind->info;
+      dir->count = ind->count;
+      dir->sorted_count = ind->sorted_count;
+      dir->size = ind->size;
 
-      pdyn = &dir->info;
-      while ((dyn_i = *pdyn) != NULL)
-	pdyn = &dyn_i->next;
-      *pdyn = dyn_i = ind->info;
       ind->info = NULL;
+      ind->count = 0;
+      ind->sorted_count = 0;
+      ind->size = 0;
 
       /* Fix up the dyn_sym_info pointers to the global symbol.  */
-      for (; dyn_i; dyn_i = dyn_i->next)
+      for (count = dir->count, dyn_i = dir->info;
+	   count != 0;
+	   count--, dyn_i++)
 	dyn_i->h = &dir->root;
     }
 
@@ -1878,12 +1893,15 @@ elfNN_ia64_hash_hide_symbol (info, xh, f
 {
   struct elfNN_ia64_link_hash_entry *h;
   struct elfNN_ia64_dyn_sym_info *dyn_i;
+  unsigned int count;
 
   h = (struct elfNN_ia64_link_hash_entry *)xh;
 
   _bfd_elf_link_hash_hide_symbol (info, &h->root, force_local);
 
-  for (dyn_i = h->info; dyn_i; dyn_i = dyn_i->next)
+  for (count = h->count, dyn_i = h->info;
+       count != 0;
+       count--, dyn_i++)
     {
       dyn_i->want_plt2 = 0;
       dyn_i->want_plt = 0;
@@ -1951,6 +1969,47 @@ elfNN_ia64_hash_table_create (abfd)
   return &ret->root.root;
 }
 
+static bfd_boolean
+elfNN_ia64_global_dyn_info_free (void **xentry,
+				PTR unused ATTRIBUTE_UNUSED)
+{
+  struct elfNN_ia64_link_hash_entry *entry
+    = (struct elfNN_ia64_link_hash_entry *) xentry;
+
+  if (entry->root.root.type == bfd_link_hash_warning)
+    entry = (struct elfNN_ia64_link_hash_entry *) entry->root.root.u.i.link;
+
+  if (entry->info)
+    {
+      free (entry->info);
+      entry->info = NULL;
+      entry->count = 0;
+      entry->sorted_count = 0;
+      entry->size = 0;
+    }
+
+  return TRUE;
+}
+
+static bfd_boolean
+elfNN_ia64_local_dyn_info_free (void **slot,
+				PTR unused ATTRIBUTE_UNUSED)
+{
+  struct elfNN_ia64_local_hash_entry *entry
+    = (struct elfNN_ia64_local_hash_entry *) *slot;
+
+  if (entry->info)
+    {
+      free (entry->info);
+      entry->info = NULL;
+      entry->count = 0;
+      entry->sorted_count = 0;
+      entry->size = 0;
+    }
+
+  return TRUE;
+}
+
 /* Destroy IA-64 linker hash table.  */
 
 static void
@@ -1960,9 +2019,15 @@ elfNN_ia64_hash_table_free (hash)
   struct elfNN_ia64_link_hash_table *ia64_info
     = (struct elfNN_ia64_link_hash_table *) hash;
   if (ia64_info->loc_hash_table)
-    htab_delete (ia64_info->loc_hash_table);
+    {
+      htab_traverse (ia64_info->loc_hash_table,
+		     elfNN_ia64_local_dyn_info_free, NULL);
+      htab_delete (ia64_info->loc_hash_table);
+    }
   if (ia64_info->loc_hash_memory)
     objalloc_free ((struct objalloc *) ia64_info->loc_hash_memory);
+  elf_link_hash_traverse (&ia64_info->root,
+			  elfNN_ia64_global_dyn_info_free, NULL);
   _bfd_generic_link_hash_table_free (hash);
 }
 
@@ -1984,11 +2049,14 @@ elfNN_ia64_global_dyn_sym_thunk (xentry,
   struct elfNN_ia64_dyn_sym_traverse_data *data
     = (struct elfNN_ia64_dyn_sym_traverse_data *) xdata;
   struct elfNN_ia64_dyn_sym_info *dyn_i;
+  unsigned int count;
 
   if (entry->root.root.type == bfd_link_hash_warning)
     entry = (struct elfNN_ia64_link_hash_entry *) entry->root.root.u.i.link;
 
-  for (dyn_i = entry->info; dyn_i; dyn_i = dyn_i->next)
+  for (count = entry->count, dyn_i = entry->info;
+       count != 0;
+       count--, dyn_i++)
     if (! (*data->func) (dyn_i, data->data))
       return FALSE;
   return TRUE;
@@ -2004,11 +2072,14 @@ elfNN_ia64_local_dyn_sym_thunk (slot, xd
   struct elfNN_ia64_dyn_sym_traverse_data *data
     = (struct elfNN_ia64_dyn_sym_traverse_data *) xdata;
   struct elfNN_ia64_dyn_sym_info *dyn_i;
+  unsigned int count;
 
-  for (dyn_i = entry->info; dyn_i; dyn_i = dyn_i->next)
+  for (count = entry->count, dyn_i = entry->info;
+       count != 0;
+       count--, dyn_i++)
     if (! (*data->func) (dyn_i, data->data))
-      return 0;
-  return 1;
+      return FALSE;
+  return TRUE;
 }
 
 static void
@@ -2117,6 +2188,110 @@ get_local_sym_hash (ia64_info, abfd, rel
   return ret;
 }
 
+static int
+addend_compare (const void *xp, const void *yp)
+{
+  const struct elfNN_ia64_dyn_sym_info *x
+    = (const struct elfNN_ia64_dyn_sym_info *) xp;
+  const struct elfNN_ia64_dyn_sym_info *y
+    = (const struct elfNN_ia64_dyn_sym_info *) yp;
+
+  return x->addend - y->addend;
+}
+
+static unsigned int
+sort_dyn_sym_info (struct elfNN_ia64_dyn_sym_info *info,
+		   unsigned int count)
+{
+  bfd_vma curr, prev;
+  unsigned int i, dup, diff, dest, src, len;
+
+  qsort (info, count, sizeof (*info), addend_compare);
+
+  /* Find the first duplicate.  */
+  prev = info [0].addend;
+  for (i = 1; i < count; i++)
+    {
+      curr = info [i].addend;
+      if (curr == prev)
+	break;
+      prev = curr;
+    }
+
+  /* Remove duplicates.  */
+  if (i < count)
+    {
+      /* We need to move a block of elements to here.  */
+      dest = i++;
+      while (i < count)
+	{
+	  curr = info [i].addend;
+
+	  /* Move a block of elements whose first one is different from
+	     the previous.  */
+	  if (curr == prev)
+	    {
+	      for (src = i + 1; src < count; src++)
+		if (info [src].addend != curr)
+		  break;
+	    }
+	  else
+	    src = i;
+
+	  if (src >= count)
+	    break;
+
+	  /* Find the next duplicate.  */
+	  prev = info [src].addend;
+	  for (dup = src + 1; dup < count; dup++)
+	    {
+	      curr = info [dup].addend;
+	      if (curr == prev)
+		break;
+	      prev = curr;
+	    }
+
+	  /* How much to move.  */
+	  len = dup - src;
+	  i = dup + 1;
+
+	  if (len == 1 && dup < count)
+	    {
+	      /* If we only move 1 element, we combine it with the next
+		 one.  Find the next different one.  */
+	      for (diff = dup + 1, src++; diff < count; diff++, src++)
+		if (info [diff].addend != curr)
+		  break;
+
+	      if (diff < count)
+		{
+		  /* Find the next duplicate.  */
+		  prev = info [diff].addend;
+		  for (dup = diff + 1; dup < count; dup++)
+		    {
+		      curr = info [dup].addend;
+		      if (curr == prev)
+			break;
+		      prev = curr;
+		      diff++;
+		    }
+
+		  len = diff - src + 1;
+		  i = diff + 1;
+		}
+	    }
+
+	  memmove (&info [dest], &info [src], len * sizeof (*info));
+
+	  dest += len;
+	}
+
+      count = dest;
+    }
+
+  return count;
+}
+
 /* Find and/or create a descriptor for dynamic symbol info.  This will
    vary based on global or local symbol, and the addend to the reloc.  */
 
@@ -2128,12 +2303,22 @@ get_dyn_sym_info (ia64_info, h, abfd, re
      const Elf_Internal_Rela *rel;
      bfd_boolean create;
 {
-  struct elfNN_ia64_dyn_sym_info **pp;
-  struct elfNN_ia64_dyn_sym_info *dyn_i;
+  struct elfNN_ia64_dyn_sym_info **info_p, *info, *dyn_i, key;
+  unsigned int *count_p, *sorted_count_p, *size_p;
+  unsigned int count, sorted_count, size;
   bfd_vma addend = rel ? rel->r_addend : 0;
+  bfd_size_type amt;
 
   if (h)
-    pp = &((struct elfNN_ia64_link_hash_entry *)h)->info;
+    {
+      struct elfNN_ia64_link_hash_entry *global_h;
+
+      global_h = (struct elfNN_ia64_link_hash_entry *) h;
+      info_p = &global_h->info;
+      count_p = &global_h->count;
+      sorted_count_p = &global_h->sorted_count;
+      size_p = &global_h->size;
+    }
   else
     {
       struct elfNN_ia64_local_hash_entry *loc_h;
@@ -2145,18 +2330,98 @@ get_dyn_sym_info (ia64_info, h, abfd, re
 	  return NULL;
 	}
 
-      pp = &loc_h->info;
+      info_p = &loc_h->info;
+      count_p = &loc_h->count;
+      sorted_count_p = &loc_h->sorted_count;
+      size_p = &loc_h->size;
     }
 
-  for (dyn_i = *pp; dyn_i && dyn_i->addend != addend; dyn_i = *pp)
-    pp = &dyn_i->next;
-
-  if (dyn_i == NULL && create)
+  count = *count_p;
+  sorted_count = *sorted_count_p;
+  size = *size_p;
+  info = *info_p;
+  if (create)
     {
-      dyn_i = ((struct elfNN_ia64_dyn_sym_info *)
-	       bfd_zalloc (abfd, (bfd_size_type) sizeof *dyn_i));
-      *pp = dyn_i;
+      /* Check the sorted array first.  */
+      if (info)
+	{
+	  if (sorted_count)
+	    {
+	      key.addend = addend;
+	      dyn_i = bsearch (&key, info, sorted_count,
+			       sizeof (*info), addend_compare);
+
+	      if (dyn_i)
+		{
+		  return dyn_i;
+		}
+	    }
+
+	  /* Check the last one.  */
+	  dyn_i = info + count - 1;
+	  if (dyn_i->addend == addend)
+	    {
+	      return dyn_i;
+	    }
+	}
+
+      if (size == 0)
+	{
+	  size = 1;
+	  amt = size * sizeof (*info);
+	  info = bfd_malloc (amt);
+	}
+      else if (size <= count)
+	{
+	  size += size;
+	  amt = size * sizeof (*info);
+	  info = bfd_realloc (info, amt);
+	}
+      else
+	goto has_space;
+
+      if (info == NULL)
+	return NULL;
+      *size_p = size;
+      *info_p = info;
+
+has_space:
+      /* Append the new one to the array.  */
+      dyn_i = info + count;
+      memset (dyn_i, 0, sizeof (*dyn_i));
       dyn_i->addend = addend;
+      
+      /* We increment count only since the new ones are unsorted and
+	 may have duplicate.  */
+      (*count_p)++;
+    }
+  else
+    {
+      /* Sort the array if needed.  */
+      if (count != sorted_count)
+	{
+	  count = sort_dyn_sym_info (info, count);
+	  *count_p = count;
+	  *sorted_count_p = count;
+	}
+
+      /* Free unused memory.  */
+      if (size != count)
+	{
+	  amt = count * sizeof (*info);
+	  info = bfd_malloc (amt);
+	  if (info != NULL)
+	    {
+	      memcpy (info, *info_p, amt);
+	      free (*info_p);
+	      *size_p = count;
+	      *info_p = info;
+	    }
+	}
+
+      key.addend = addend;
+      dyn_i = bsearch (&key, info, count,
+		       sizeof (*info), addend_compare);
     }
 
   return dyn_i;
@@ -2382,6 +2647,23 @@ elfNN_ia64_check_relocs (abfd, info, sec
   Elf_Internal_Shdr *symtab_hdr;
   const Elf_Internal_Rela *rel;
   asection *got, *fptr, *srel, *pltoff;
+  enum {
+    NEED_GOT = 1,
+    NEED_GOTX = 2,
+    NEED_FPTR = 4,
+    NEED_PLTOFF = 8,
+    NEED_MIN_PLT = 16,
+    NEED_FULL_PLT = 32,
+    NEED_DYNREL = 64,
+    NEED_LTOFF_FPTR = 128,
+    NEED_TPREL = 256,
+    NEED_DTPMOD = 512,
+    NEED_DTPREL = 1024
+  };
+  int need_entry;
+  struct elf_link_hash_entry *h;
+  unsigned long r_symndx;
+  bfd_boolean maybe_dynamic;
 
   if (info->relocatable)
     return TRUE;
@@ -2392,29 +2674,178 @@ elfNN_ia64_check_relocs (abfd, info, sec
   got = fptr = srel = pltoff = NULL;
 
   relend = relocs + sec->reloc_count;
+
+  /* We scan relocations first to create dynamic relocation arrays.  */
   for (rel = relocs; rel < relend; ++rel)
     {
-      enum {
-	NEED_GOT = 1,
-	NEED_GOTX = 2,
-	NEED_FPTR = 4,
-	NEED_PLTOFF = 8,
-	NEED_MIN_PLT = 16,
-	NEED_FULL_PLT = 32,
-	NEED_DYNREL = 64,
-	NEED_LTOFF_FPTR = 128,
-	NEED_TPREL = 256,
-	NEED_DTPMOD = 512,
-	NEED_DTPREL = 1024
-      };
+      r_symndx = ELFNN_R_SYM (rel->r_info);
+      if (r_symndx >= symtab_hdr->sh_info)
+	{
+	  long indx = r_symndx - symtab_hdr->sh_info;
+	  h = elf_sym_hashes (abfd)[indx];
+	  while (h->root.type == bfd_link_hash_indirect
+		 || h->root.type == bfd_link_hash_warning)
+	    h = (struct elf_link_hash_entry *) h->root.u.i.link;
+	}
+      else
+	h = NULL;
+
+      /* We can only get preliminary data on whether a symbol is
+	 locally or externally defined, as not all of the input files
+	 have yet been processed.  Do something with what we know, as
+	 this may help reduce memory usage and processing time later.  */
+      maybe_dynamic = (h && ((!info->executable
+			      && (!info->symbolic
+				  || info->unresolved_syms_in_shared_libs == RM_IGNORE))
+			     || !h->def_regular
+			     || h->root.type == bfd_link_hash_defweak));
+
+      need_entry = 0;
+      switch (ELFNN_R_TYPE (rel->r_info))
+	{
+	case R_IA64_TPREL64MSB:
+	case R_IA64_TPREL64LSB:
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
 
-      struct elf_link_hash_entry *h = NULL;
-      unsigned long r_symndx = ELFNN_R_SYM (rel->r_info);
+	case R_IA64_LTOFF_TPREL22:
+	  need_entry = NEED_TPREL;
+	  if (info->shared)
+	    info->flags |= DF_STATIC_TLS;
+	  break;
+
+	case R_IA64_DTPREL32MSB:
+	case R_IA64_DTPREL32LSB:
+	case R_IA64_DTPREL64MSB:
+	case R_IA64_DTPREL64LSB:
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_LTOFF_DTPREL22:
+	  need_entry = NEED_DTPREL;
+	  break;
+
+	case R_IA64_DTPMOD64MSB:
+	case R_IA64_DTPMOD64LSB:
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_LTOFF_DTPMOD22:
+	  need_entry = NEED_DTPMOD;
+	  break;
+
+	case R_IA64_LTOFF_FPTR22:
+	case R_IA64_LTOFF_FPTR64I:
+	case R_IA64_LTOFF_FPTR32MSB:
+	case R_IA64_LTOFF_FPTR32LSB:
+	case R_IA64_LTOFF_FPTR64MSB:
+	case R_IA64_LTOFF_FPTR64LSB:
+	  need_entry = NEED_FPTR | NEED_GOT | NEED_LTOFF_FPTR;
+	  break;
+
+	case R_IA64_FPTR64I:
+	case R_IA64_FPTR32MSB:
+	case R_IA64_FPTR32LSB:
+	case R_IA64_FPTR64MSB:
+	case R_IA64_FPTR64LSB:
+	  if (info->shared || h)
+	    need_entry = NEED_FPTR | NEED_DYNREL;
+	  else
+	    need_entry = NEED_FPTR;
+	  break;
+
+	case R_IA64_LTOFF22:
+	case R_IA64_LTOFF64I:
+	  need_entry = NEED_GOT;
+	  break;
+
+	case R_IA64_LTOFF22X:
+	  need_entry = NEED_GOTX;
+	  break;
+
+	case R_IA64_PLTOFF22:
+	case R_IA64_PLTOFF64I:
+	case R_IA64_PLTOFF64MSB:
+	case R_IA64_PLTOFF64LSB:
+	  need_entry = NEED_PLTOFF;
+	  if (h)
+	    {
+	      if (maybe_dynamic)
+		need_entry |= NEED_MIN_PLT;
+	    }
+	  else
+	    {
+	      (*info->callbacks->warning)
+		(info, _("@pltoff reloc against local symbol"), 0,
+		 abfd, 0, (bfd_vma) 0);
+	    }
+	  break;
+
+	case R_IA64_PCREL21B:
+        case R_IA64_PCREL60B:
+	  /* Depending on where this symbol is defined, we may or may not
+	     need a full plt entry.  Only skip if we know we'll not need
+	     the entry -- static or symbolic, and the symbol definition
+	     has already been seen.  */
+	  if (maybe_dynamic && rel->r_addend == 0)
+	    need_entry = NEED_FULL_PLT;
+	  break;
+
+	case R_IA64_IMM14:
+	case R_IA64_IMM22:
+	case R_IA64_IMM64:
+	case R_IA64_DIR32MSB:
+	case R_IA64_DIR32LSB:
+	case R_IA64_DIR64MSB:
+	case R_IA64_DIR64LSB:
+	  /* Shared objects will always need at least a REL relocation.  */
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_IPLTMSB:
+	case R_IA64_IPLTLSB:
+	  /* Shared objects will always need at least a REL relocation.  */
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_PCREL22:
+	case R_IA64_PCREL64I:
+	case R_IA64_PCREL32MSB:
+	case R_IA64_PCREL32LSB:
+	case R_IA64_PCREL64MSB:
+	case R_IA64_PCREL64LSB:
+	  if (maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+	}
+
+      if (!need_entry)
+	continue;
+
+      if ((need_entry & NEED_FPTR) != 0
+	  && rel->r_addend)
+	{
+	  (*info->callbacks->warning)
+	    (info, _("non-zero addend in @fptr reloc"), 0,
+	     abfd, 0, (bfd_vma) 0);
+	}
+
+      if (get_dyn_sym_info (ia64_info, h, abfd, rel, TRUE) == NULL)
+	return FALSE;
+    }
+
+   
+  for (rel = relocs; rel < relend; ++rel)
+    {
       struct elfNN_ia64_dyn_sym_info *dyn_i;
-      int need_entry;
-      bfd_boolean maybe_dynamic;
       int dynrel_type = R_IA64_NONE;
 
+      r_symndx = ELFNN_R_SYM (rel->r_info);
       if (r_symndx >= symtab_hdr->sh_info)
 	{
 	  /* We're dealing with a global symbol -- find its hash entry
@@ -2427,18 +2858,18 @@ elfNN_ia64_check_relocs (abfd, info, sec
 
 	  h->ref_regular = 1;
 	}
+      else
+	h = NULL;
 
       /* We can only get preliminary data on whether a symbol is
 	 locally or externally defined, as not all of the input files
 	 have yet been processed.  Do something with what we know, as
 	 this may help reduce memory usage and processing time later.  */
-      maybe_dynamic = FALSE;
-      if (h && ((!info->executable
-		 && (!info->symbolic
-		     || info->unresolved_syms_in_shared_libs == RM_IGNORE))
-		|| !h->def_regular
-		|| h->root.type == bfd_link_hash_defweak))
-	maybe_dynamic = TRUE;
+      maybe_dynamic = (h && ((!info->executable
+			      && (!info->symbolic
+				  || info->unresolved_syms_in_shared_libs == RM_IGNORE))
+			     || !h->def_regular
+			     || h->root.type == bfd_link_hash_defweak));
 
       need_entry = 0;
       switch (ELFNN_R_TYPE (rel->r_info))
@@ -2522,12 +2953,6 @@ elfNN_ia64_check_relocs (abfd, info, sec
 	      if (maybe_dynamic)
 		need_entry |= NEED_MIN_PLT;
 	    }
-	  else
-	    {
-	      (*info->callbacks->warning)
-		(info, _("@pltoff reloc against local symbol"), 0,
-		 abfd, 0, (bfd_vma) 0);
-	    }
 	  break;
 
 	case R_IA64_PCREL21B:
@@ -2576,15 +3001,7 @@ elfNN_ia64_check_relocs (abfd, info, sec
       if (!need_entry)
 	continue;
 
-      if ((need_entry & NEED_FPTR) != 0
-	  && rel->r_addend)
-	{
-	  (*info->callbacks->warning)
-	    (info, _("non-zero addend in @fptr reloc"), 0,
-	     abfd, 0, (bfd_vma) 0);
-	}
-
-      dyn_i = get_dyn_sym_info (ia64_info, h, abfd, rel, TRUE);
+      dyn_i = get_dyn_sym_info (ia64_info, h, abfd, rel, FALSE);
 
       /* Record whether or not this is a local symbol.  */
       dyn_i->h = h;
@@ -4159,8 +4576,11 @@ elfNN_ia64_relocate_section (output_bfd,
 	      if (loc_h && ! loc_h->sec_merge_done)
 		{
 		  struct elfNN_ia64_dyn_sym_info *dynent;
+		  unsigned int count;
 
-		  for (dynent = loc_h->info; dynent; dynent = dynent->next)
+		  for (count = loc_h->count, dynent = loc_h->info;
+		       count != 0;
+		       count--, dynent++)
 		    {
 		      msec = sym_sec;
 		      dynent->addend =
@@ -4175,6 +4595,10 @@ elfNN_ia64_relocate_section (output_bfd,
 					- sym_sec->output_section->vma
 					- sym_sec->output_offset;
 		    }
+		  
+		  qsort (loc_h->info, loc_h->count,
+			 sizeof (*loc_h->info), addend_compare);
+
 		  loc_h->sec_merge_done = 1;
 		}
 	    }

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: PATCH: ld/2442: ia64 ld slow with many local relocs (O(N^2) in  get_dyn_sym_info)
  2006-04-02 18:33   ` H. J. Lu
@ 2006-04-04 19:02     ` James E Wilson
  2006-04-04 20:31       ` Andreas Schwab
  2006-04-05 20:07       ` H. J. Lu
  0 siblings, 2 replies; 9+ messages in thread
From: James E Wilson @ 2006-04-04 19:02 UTC (permalink / raw)
  To: H. J. Lu; +Cc: binutils

On Sun, 2006-04-02 at 11:33, H. J. Lu wrote:
> This patch is very similar to the last one. The main difference is
> it frees the unused portion of elfNN_ia64_dyn_sym_info array. The
> speed improvement is the same 1300 X. It may use less memory than
> the unmodified linker.

I've looked through the patch.  It looks pretty reasonable to me.

You have created four new functions, but failed to add comments
explaining what they do.  This needs to be fixed before the patch is
checked in.

I had trouble understanding how the patch works.  I had to spend a bit
of time reading and rereading it.  It really needs some better comments
to explain what the new fields count, sorted_count and size mean and how
they are used.  In elfNN_ia64_check_relocs, you have a one line comment
+  /* We scan relocations first to create dynamic relocation arrays.  */
that doesn't adequately explain what is going on.  One reason that it is
hard to understand is that it is followed by over a hundred lines of
code, and it isn't clear what this is referring to.

Also, it would have helped if you had provided an explanation of what
the patch does and how it works.  This is something that should always
be provided with a non-trivial patch.

The missing explanation here is that we place all get_dyn_sym_info (...
TRUE) calls before all get_dyn_sym_info (... FALSE) calls.  We don't
sort when inserting.  Also, we don't check for duplicates, except in the
previously sorted section if one exists, and against the last inserted
entry.  This allows insertions to be fast.  When we see a ...FALSE call,
we sort and eliminate duplicates if there is an unsorted section. 
Typically, this will only happen once, because we do all insertions
before lookups.  We then use bsearch to do a lookup.  This also allows
lookups to be fast.  So we have fast insertion (O(log N) due to
duplicate check), fast lookup (O(log N)) and one sort (O(N log N)
expected time).  Previously, all lookups were O(N) because of the use of
the linked list.  Also, all insertions were O(N) because of the check
for duplicates.  There are some complications here because the array
size grows occasionally, which may add an O(N) factor, but this should
be rare.  Also, you free the excess array allocation, which requires a
copy which is O(N), but this only happens once.

I see that bsearch is already used by the xtensa port, so there should
be no portability problem there.  bsearch is in ISO C99, but not in ISO
C90.  It is also in BSD4.3 and SVID 3, so probably every host we care
about has it.
-- 
Jim Wilson, GNU Tools Support, http://www.specifix.com

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: PATCH: ld/2442: ia64 ld slow with many local relocs (O(N^2) in  get_dyn_sym_info)
  2006-04-04 19:02     ` James E Wilson
@ 2006-04-04 20:31       ` Andreas Schwab
  2006-04-04 20:48         ` James E Wilson
  2006-04-05 20:07       ` H. J. Lu
  1 sibling, 1 reply; 9+ messages in thread
From: Andreas Schwab @ 2006-04-04 20:31 UTC (permalink / raw)
  To: James E Wilson; +Cc: H. J. Lu, binutils

James E Wilson <wilson@specifixinc.com> writes:

> I see that bsearch is already used by the xtensa port, so there should
> be no portability problem there.  bsearch is in ISO C99, but not in ISO
> C90.

AFAIK bsearch is C90, at least that's what K&R2 says.

Andreas.

-- 
Andreas Schwab, SuSE Labs, schwab@suse.de
SuSE Linux Products GmbH, Maxfeldstraße 5, 90409 Nürnberg, Germany
PGP key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: PATCH: ld/2442: ia64 ld slow with many local relocs (O(N^2) in  get_dyn_sym_info)
  2006-04-04 20:31       ` Andreas Schwab
@ 2006-04-04 20:48         ` James E Wilson
  0 siblings, 0 replies; 9+ messages in thread
From: James E Wilson @ 2006-04-04 20:48 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: H. J. Lu, binutils

On Tue, 2006-04-04 at 13:31, Andreas Schwab wrote:
> AFAIK bsearch is C90, at least that's what K&R2 says.

I was going by the glibc man page.  I should have checked the standard. 
You are right, it is in C90.
-- 
Jim Wilson, GNU Tools Support, http://www.specifix.com

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: PATCH: ld/2442: ia64 ld slow with many local relocs (O(N^2) in  get_dyn_sym_info)
  2006-04-04 19:02     ` James E Wilson
  2006-04-04 20:31       ` Andreas Schwab
@ 2006-04-05 20:07       ` H. J. Lu
  2006-04-05 21:01         ` James E Wilson
  1 sibling, 1 reply; 9+ messages in thread
From: H. J. Lu @ 2006-04-05 20:07 UTC (permalink / raw)
  To: James E Wilson; +Cc: binutils

On Tue, Apr 04, 2006 at 12:02:01PM -0700, James E Wilson wrote:
> On Sun, 2006-04-02 at 11:33, H. J. Lu wrote:
> > This patch is very similar to the last one. The main difference is
> > it frees the unused portion of elfNN_ia64_dyn_sym_info array. The
> > speed improvement is the same 1300 X. It may use less memory than
> > the unmodified linker.
> 
> I've looked through the patch.  It looks pretty reasonable to me.
> 
> You have created four new functions, but failed to add comments
> explaining what they do.  This needs to be fixed before the patch is
> checked in.
> 

Hi Jim,

Thanks for your review. When you spent so much time on the problem,
you forgot that it wasn't as clear as what you though. I incorporated
your comments in the patch. Does it look OK?

Thanks.


H.J.
----
2006-04-05  H.J. Lu  <hongjiu.lu@intel.com>
	    James E Wilson  <wilson@specifixinc.com>

	PR ld/2442
	* elfxx-ia64.c (elfNN_ia64_dyn_sym_info): Remove next.
	(elfNN_ia64_local_hash_entry): Add count, sorted_count and
	size.
	(elfNN_ia64_link_hash_entry): Likewise.
	(elfNN_ia64_new_elf_hash_entry): Initialize count, sorted_count
	and size.
	(elfNN_ia64_hash_copy_indirect): Updated elfNN_ia64_dyn_sym_info
	processing.
	(elfNN_ia64_hash_hide_symbol): Likewise.
	(elfNN_ia64_global_dyn_sym_thunk): Likewise.
	(elfNN_ia64_local_dyn_sym_thunk): Likewise.
	(elfNN_ia64_global_dyn_info_free): New function.
	(elfNN_ia64_local_dyn_info_free): Likewise.
	(elfNN_ia64_hash_table_free): Free local and global
	elfNN_ia64_dyn_sym_info.
	(addend_compare): New function.
	(sort_dyn_sym_info): Likewise.
	(get_dyn_sym_info): Updated to use binary search for addend.
	(elfNN_ia64_check_relocs): Scan relocations to create dynamic
	relocation arrays first.

--- bfd/elfxx-ia64.c.hash	2006-04-05 11:11:55.000000000 -0700
+++ bfd/elfxx-ia64.c	2006-04-05 12:58:38.000000000 -0700
@@ -80,9 +80,6 @@ struct elfNN_ia64_dyn_sym_info
   /* The addend for which this entry is relevant.  */
   bfd_vma addend;
 
-  /* Next addend in the list.  */
-  struct elfNN_ia64_dyn_sym_info *next;
-
   bfd_vma got_offset;
   bfd_vma fptr_offset;
   bfd_vma pltoff_offset;
@@ -133,6 +130,13 @@ struct elfNN_ia64_local_hash_entry
 {
   int id;
   unsigned int r_sym;
+  /* The number of elements in elfNN_ia64_dyn_sym_info array.  */
+  unsigned int count;
+  /* The number of sorted elements in elfNN_ia64_dyn_sym_info array.  */
+  unsigned int sorted_count;
+  /* The size of elfNN_ia64_dyn_sym_info array.  */
+  unsigned int size;
+  /* The array of elfNN_ia64_dyn_sym_info.  */
   struct elfNN_ia64_dyn_sym_info *info;
 
   /* TRUE if this hash entry's addends was translated for
@@ -143,6 +147,13 @@ struct elfNN_ia64_local_hash_entry
 struct elfNN_ia64_link_hash_entry
 {
   struct elf_link_hash_entry root;
+  /* The number of elements in elfNN_ia64_dyn_sym_info array.  */
+  unsigned int count;
+  /* The number of sorted elements in elfNN_ia64_dyn_sym_info array.  */
+  unsigned int sorted_count;
+  /* The size of elfNN_ia64_dyn_sym_info array.  */
+  unsigned int size;
+  /* The array of elfNN_ia64_dyn_sym_info.  */
   struct elfNN_ia64_dyn_sym_info *info;
 };
 
@@ -1813,6 +1824,9 @@ elfNN_ia64_new_elf_hash_entry (entry, ta
 				     table, string));
 
   ret->info = NULL;
+  ret->count = 0;
+  ret->sorted_count = 0;
+  ret->size = 0;
   return (struct bfd_hash_entry *) ret;
 }
 
@@ -1843,16 +1857,25 @@ elfNN_ia64_hash_copy_indirect (info, xdi
   if (ind->info != NULL)
     {
       struct elfNN_ia64_dyn_sym_info *dyn_i;
-      struct elfNN_ia64_dyn_sym_info **pdyn;
+      unsigned int count;
+
+      if (dir->info)
+	free (dir->info);
+
+      dir->info = ind->info;
+      dir->count = ind->count;
+      dir->sorted_count = ind->sorted_count;
+      dir->size = ind->size;
 
-      pdyn = &dir->info;
-      while ((dyn_i = *pdyn) != NULL)
-	pdyn = &dyn_i->next;
-      *pdyn = dyn_i = ind->info;
       ind->info = NULL;
+      ind->count = 0;
+      ind->sorted_count = 0;
+      ind->size = 0;
 
       /* Fix up the dyn_sym_info pointers to the global symbol.  */
-      for (; dyn_i; dyn_i = dyn_i->next)
+      for (count = dir->count, dyn_i = dir->info;
+	   count != 0;
+	   count--, dyn_i++)
 	dyn_i->h = &dir->root;
     }
 
@@ -1878,12 +1901,15 @@ elfNN_ia64_hash_hide_symbol (info, xh, f
 {
   struct elfNN_ia64_link_hash_entry *h;
   struct elfNN_ia64_dyn_sym_info *dyn_i;
+  unsigned int count;
 
   h = (struct elfNN_ia64_link_hash_entry *)xh;
 
   _bfd_elf_link_hash_hide_symbol (info, &h->root, force_local);
 
-  for (dyn_i = h->info; dyn_i; dyn_i = dyn_i->next)
+  for (count = h->count, dyn_i = h->info;
+       count != 0;
+       count--, dyn_i++)
     {
       dyn_i->want_plt2 = 0;
       dyn_i->want_plt = 0;
@@ -1951,6 +1977,51 @@ elfNN_ia64_hash_table_create (abfd)
   return &ret->root.root;
 }
 
+/* Free the global elfNN_ia64_dyn_sym_info array.  */
+
+static bfd_boolean
+elfNN_ia64_global_dyn_info_free (void **xentry,
+				PTR unused ATTRIBUTE_UNUSED)
+{
+  struct elfNN_ia64_link_hash_entry *entry
+    = (struct elfNN_ia64_link_hash_entry *) xentry;
+
+  if (entry->root.root.type == bfd_link_hash_warning)
+    entry = (struct elfNN_ia64_link_hash_entry *) entry->root.root.u.i.link;
+
+  if (entry->info)
+    {
+      free (entry->info);
+      entry->info = NULL;
+      entry->count = 0;
+      entry->sorted_count = 0;
+      entry->size = 0;
+    }
+
+  return TRUE;
+}
+
+/* Free the local elfNN_ia64_dyn_sym_info array.  */
+
+static bfd_boolean
+elfNN_ia64_local_dyn_info_free (void **slot,
+				PTR unused ATTRIBUTE_UNUSED)
+{
+  struct elfNN_ia64_local_hash_entry *entry
+    = (struct elfNN_ia64_local_hash_entry *) *slot;
+
+  if (entry->info)
+    {
+      free (entry->info);
+      entry->info = NULL;
+      entry->count = 0;
+      entry->sorted_count = 0;
+      entry->size = 0;
+    }
+
+  return TRUE;
+}
+
 /* Destroy IA-64 linker hash table.  */
 
 static void
@@ -1960,9 +2031,15 @@ elfNN_ia64_hash_table_free (hash)
   struct elfNN_ia64_link_hash_table *ia64_info
     = (struct elfNN_ia64_link_hash_table *) hash;
   if (ia64_info->loc_hash_table)
-    htab_delete (ia64_info->loc_hash_table);
+    {
+      htab_traverse (ia64_info->loc_hash_table,
+		     elfNN_ia64_local_dyn_info_free, NULL);
+      htab_delete (ia64_info->loc_hash_table);
+    }
   if (ia64_info->loc_hash_memory)
     objalloc_free ((struct objalloc *) ia64_info->loc_hash_memory);
+  elf_link_hash_traverse (&ia64_info->root,
+			  elfNN_ia64_global_dyn_info_free, NULL);
   _bfd_generic_link_hash_table_free (hash);
 }
 
@@ -1984,11 +2061,14 @@ elfNN_ia64_global_dyn_sym_thunk (xentry,
   struct elfNN_ia64_dyn_sym_traverse_data *data
     = (struct elfNN_ia64_dyn_sym_traverse_data *) xdata;
   struct elfNN_ia64_dyn_sym_info *dyn_i;
+  unsigned int count;
 
   if (entry->root.root.type == bfd_link_hash_warning)
     entry = (struct elfNN_ia64_link_hash_entry *) entry->root.root.u.i.link;
 
-  for (dyn_i = entry->info; dyn_i; dyn_i = dyn_i->next)
+  for (count = entry->count, dyn_i = entry->info;
+       count != 0;
+       count--, dyn_i++)
     if (! (*data->func) (dyn_i, data->data))
       return FALSE;
   return TRUE;
@@ -2004,11 +2084,14 @@ elfNN_ia64_local_dyn_sym_thunk (slot, xd
   struct elfNN_ia64_dyn_sym_traverse_data *data
     = (struct elfNN_ia64_dyn_sym_traverse_data *) xdata;
   struct elfNN_ia64_dyn_sym_info *dyn_i;
+  unsigned int count;
 
-  for (dyn_i = entry->info; dyn_i; dyn_i = dyn_i->next)
+  for (count = entry->count, dyn_i = entry->info;
+       count != 0;
+       count--, dyn_i++)
     if (! (*data->func) (dyn_i, data->data))
-      return 0;
-  return 1;
+      return FALSE;
+  return TRUE;
 }
 
 static void
@@ -2117,6 +2200,114 @@ get_local_sym_hash (ia64_info, abfd, rel
   return ret;
 }
 
+/* Used to sort elfNN_ia64_dyn_sym_info array.  */
+
+static int
+addend_compare (const void *xp, const void *yp)
+{
+  const struct elfNN_ia64_dyn_sym_info *x
+    = (const struct elfNN_ia64_dyn_sym_info *) xp;
+  const struct elfNN_ia64_dyn_sym_info *y
+    = (const struct elfNN_ia64_dyn_sym_info *) yp;
+
+  return x->addend - y->addend;
+}
+
+/* Sort elfNN_ia64_dyn_sym_info array and remove duplicates.  */
+
+static unsigned int
+sort_dyn_sym_info (struct elfNN_ia64_dyn_sym_info *info,
+		   unsigned int count)
+{
+  bfd_vma curr, prev;
+  unsigned int i, dup, diff, dest, src, len;
+
+  qsort (info, count, sizeof (*info), addend_compare);
+
+  /* Find the first duplicate.  */
+  prev = info [0].addend;
+  for (i = 1; i < count; i++)
+    {
+      curr = info [i].addend;
+      if (curr == prev)
+	break;
+      prev = curr;
+    }
+
+  /* Remove duplicates.  */
+  if (i < count)
+    {
+      /* We need to move a block of elements to here.  */
+      dest = i++;
+      while (i < count)
+	{
+	  curr = info [i].addend;
+
+	  /* Move a block of elements whose first one is different from
+	     the previous.  */
+	  if (curr == prev)
+	    {
+	      for (src = i + 1; src < count; src++)
+		if (info [src].addend != curr)
+		  break;
+	    }
+	  else
+	    src = i;
+
+	  if (src >= count)
+	    break;
+
+	  /* Find the next duplicate.  */
+	  prev = info [src].addend;
+	  for (dup = src + 1; dup < count; dup++)
+	    {
+	      curr = info [dup].addend;
+	      if (curr == prev)
+		break;
+	      prev = curr;
+	    }
+
+	  /* How much to move.  */
+	  len = dup - src;
+	  i = dup + 1;
+
+	  if (len == 1 && dup < count)
+	    {
+	      /* If we only move 1 element, we combine it with the next
+		 one.  Find the next different one.  */
+	      for (diff = dup + 1, src++; diff < count; diff++, src++)
+		if (info [diff].addend != curr)
+		  break;
+
+	      if (diff < count)
+		{
+		  /* Find the next duplicate.  */
+		  prev = info [diff].addend;
+		  for (dup = diff + 1; dup < count; dup++)
+		    {
+		      curr = info [dup].addend;
+		      if (curr == prev)
+			break;
+		      prev = curr;
+		      diff++;
+		    }
+
+		  len = diff - src + 1;
+		  i = diff + 1;
+		}
+	    }
+
+	  memmove (&info [dest], &info [src], len * sizeof (*info));
+
+	  dest += len;
+	}
+
+      count = dest;
+    }
+
+  return count;
+}
+
 /* Find and/or create a descriptor for dynamic symbol info.  This will
    vary based on global or local symbol, and the addend to the reloc.  */
 
@@ -2128,12 +2319,22 @@ get_dyn_sym_info (ia64_info, h, abfd, re
      const Elf_Internal_Rela *rel;
      bfd_boolean create;
 {
-  struct elfNN_ia64_dyn_sym_info **pp;
-  struct elfNN_ia64_dyn_sym_info *dyn_i;
+  struct elfNN_ia64_dyn_sym_info **info_p, *info, *dyn_i, key;
+  unsigned int *count_p, *sorted_count_p, *size_p;
+  unsigned int count, sorted_count, size;
   bfd_vma addend = rel ? rel->r_addend : 0;
+  bfd_size_type amt;
 
   if (h)
-    pp = &((struct elfNN_ia64_link_hash_entry *)h)->info;
+    {
+      struct elfNN_ia64_link_hash_entry *global_h;
+
+      global_h = (struct elfNN_ia64_link_hash_entry *) h;
+      info_p = &global_h->info;
+      count_p = &global_h->count;
+      sorted_count_p = &global_h->sorted_count;
+      size_p = &global_h->size;
+    }
   else
     {
       struct elfNN_ia64_local_hash_entry *loc_h;
@@ -2145,18 +2346,119 @@ get_dyn_sym_info (ia64_info, h, abfd, re
 	  return NULL;
 	}
 
-      pp = &loc_h->info;
-    }
+      info_p = &loc_h->info;
+      count_p = &loc_h->count;
+      sorted_count_p = &loc_h->sorted_count;
+      size_p = &loc_h->size;
+    }
+
+  count = *count_p;
+  sorted_count = *sorted_count_p;
+  size = *size_p;
+  info = *info_p;
+  if (create)
+    {
+      /* When we create the array, we don't check for duplicates,
+         except in the previously sorted section if one exists, and
+	 against the last inserted entry.  This allows insertions to
+	 be fast.  */
+      if (info)
+	{
+	  if (sorted_count)
+	    {
+	      /* Try bsearch first on the sorted section.  */
+	      key.addend = addend;
+	      dyn_i = bsearch (&key, info, sorted_count,
+			       sizeof (*info), addend_compare);
+
+	      if (dyn_i)
+		{
+		  return dyn_i;
+		}
+	    }
 
-  for (dyn_i = *pp; dyn_i && dyn_i->addend != addend; dyn_i = *pp)
-    pp = &dyn_i->next;
+	  /* Do a quick check for the last inserted entry.  */
+	  dyn_i = info + count - 1;
+	  if (dyn_i->addend == addend)
+	    {
+	      return dyn_i;
+	    }
+	}
 
-  if (dyn_i == NULL && create)
-    {
-      dyn_i = ((struct elfNN_ia64_dyn_sym_info *)
-	       bfd_zalloc (abfd, (bfd_size_type) sizeof *dyn_i));
-      *pp = dyn_i;
+      if (size == 0)
+	{
+	  /* It is the very first element. We create the array of size
+	     1.  */
+	  size = 1;
+	  amt = size * sizeof (*info);
+	  info = bfd_malloc (amt);
+	}
+      else if (size <= count)
+	{
+	  /* We double the array size every time when we reach the
+	     size limit.  */
+	  size += size;
+	  amt = size * sizeof (*info);
+	  info = bfd_realloc (info, amt);
+	}
+      else
+	goto has_space;
+
+      if (info == NULL)
+	return NULL;
+      *size_p = size;
+      *info_p = info;
+
+has_space:
+      /* Append the new one to the array.  */
+      dyn_i = info + count;
+      memset (dyn_i, 0, sizeof (*dyn_i));
       dyn_i->addend = addend;
+      
+      /* We increment count only since the new ones are unsorted and
+	 may have duplicate.  */
+      (*count_p)++;
+    }
+  else
+    {
+      /* It is a lookup without insertion. we sort and eliminate
+	 duplicates if there is an unsorted section.  Typically, this
+	 will only happen once, because we do all insertions before
+	 lookups.  We then use bsearch to do a lookup.  This also
+	 allows lookups to be fast.  So we have fast insertion
+	 (O(log N) due to duplicate check), fast lookup (O(log N)) and
+	 one sort (O(N log N) expected time).  Previously, all lookups
+	 were O(N) because of the use of the linked list.  Also, all
+	 insertions were O(N) because of the check for duplicates.
+	 There are some complications here because the array size grows
+	 occasionally, which may add an O(N) factor, but this should be
+	 rare.  Also,  we free the excess array allocation, which
+	 requires a copy which is O(N), but this only happens once.
+	 were O(N) because of the check for duplicates.  */
+      if (count != sorted_count)
+	{
+	  count = sort_dyn_sym_info (info, count);
+	  *count_p = count;
+	  *sorted_count_p = count;
+	}
+
+      /* Free unused memory.  */
+      if (size != count)
+	{
+	  amt = count * sizeof (*info);
+	  info = bfd_malloc (amt);
+	  if (info != NULL)
+	    {
+	      memcpy (info, *info_p, amt);
+	      free (*info_p);
+	      *size_p = count;
+	      *info_p = info;
+	    }
+	}
+
+      key.addend = addend;
+      dyn_i = bsearch (&key, info, count,
+		       sizeof (*info), addend_compare);
     }
 
   return dyn_i;
@@ -2382,6 +2684,23 @@ elfNN_ia64_check_relocs (abfd, info, sec
   Elf_Internal_Shdr *symtab_hdr;
   const Elf_Internal_Rela *rel;
   asection *got, *fptr, *srel, *pltoff;
+  enum {
+    NEED_GOT = 1,
+    NEED_GOTX = 2,
+    NEED_FPTR = 4,
+    NEED_PLTOFF = 8,
+    NEED_MIN_PLT = 16,
+    NEED_FULL_PLT = 32,
+    NEED_DYNREL = 64,
+    NEED_LTOFF_FPTR = 128,
+    NEED_TPREL = 256,
+    NEED_DTPMOD = 512,
+    NEED_DTPREL = 1024
+  };
+  int need_entry;
+  struct elf_link_hash_entry *h;
+  unsigned long r_symndx;
+  bfd_boolean maybe_dynamic;
 
   if (info->relocatable)
     return TRUE;
@@ -2392,29 +2711,181 @@ elfNN_ia64_check_relocs (abfd, info, sec
   got = fptr = srel = pltoff = NULL;
 
   relend = relocs + sec->reloc_count;
+
+  /* We scan relocations first to create dynamic relocation arrays.  We
+     modified get_dyn_sym_info to allow fast insertion and support fast
+     lookup in the next loop.  */
   for (rel = relocs; rel < relend; ++rel)
     {
-      enum {
-	NEED_GOT = 1,
-	NEED_GOTX = 2,
-	NEED_FPTR = 4,
-	NEED_PLTOFF = 8,
-	NEED_MIN_PLT = 16,
-	NEED_FULL_PLT = 32,
-	NEED_DYNREL = 64,
-	NEED_LTOFF_FPTR = 128,
-	NEED_TPREL = 256,
-	NEED_DTPMOD = 512,
-	NEED_DTPREL = 1024
-      };
+      r_symndx = ELFNN_R_SYM (rel->r_info);
+      if (r_symndx >= symtab_hdr->sh_info)
+	{
+	  long indx = r_symndx - symtab_hdr->sh_info;
+	  h = elf_sym_hashes (abfd)[indx];
+	  while (h->root.type == bfd_link_hash_indirect
+		 || h->root.type == bfd_link_hash_warning)
+	    h = (struct elf_link_hash_entry *) h->root.u.i.link;
+	}
+      else
+	h = NULL;
 
-      struct elf_link_hash_entry *h = NULL;
-      unsigned long r_symndx = ELFNN_R_SYM (rel->r_info);
+      /* We can only get preliminary data on whether a symbol is
+	 locally or externally defined, as not all of the input files
+	 have yet been processed.  Do something with what we know, as
+	 this may help reduce memory usage and processing time later.  */
+      maybe_dynamic = (h && ((!info->executable
+			      && (!info->symbolic
+				  || info->unresolved_syms_in_shared_libs == RM_IGNORE))
+			     || !h->def_regular
+			     || h->root.type == bfd_link_hash_defweak));
+
+      need_entry = 0;
+      switch (ELFNN_R_TYPE (rel->r_info))
+	{
+	case R_IA64_TPREL64MSB:
+	case R_IA64_TPREL64LSB:
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_LTOFF_TPREL22:
+	  need_entry = NEED_TPREL;
+	  if (info->shared)
+	    info->flags |= DF_STATIC_TLS;
+	  break;
+
+	case R_IA64_DTPREL32MSB:
+	case R_IA64_DTPREL32LSB:
+	case R_IA64_DTPREL64MSB:
+	case R_IA64_DTPREL64LSB:
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_LTOFF_DTPREL22:
+	  need_entry = NEED_DTPREL;
+	  break;
+
+	case R_IA64_DTPMOD64MSB:
+	case R_IA64_DTPMOD64LSB:
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_LTOFF_DTPMOD22:
+	  need_entry = NEED_DTPMOD;
+	  break;
+
+	case R_IA64_LTOFF_FPTR22:
+	case R_IA64_LTOFF_FPTR64I:
+	case R_IA64_LTOFF_FPTR32MSB:
+	case R_IA64_LTOFF_FPTR32LSB:
+	case R_IA64_LTOFF_FPTR64MSB:
+	case R_IA64_LTOFF_FPTR64LSB:
+	  need_entry = NEED_FPTR | NEED_GOT | NEED_LTOFF_FPTR;
+	  break;
+
+	case R_IA64_FPTR64I:
+	case R_IA64_FPTR32MSB:
+	case R_IA64_FPTR32LSB:
+	case R_IA64_FPTR64MSB:
+	case R_IA64_FPTR64LSB:
+	  if (info->shared || h)
+	    need_entry = NEED_FPTR | NEED_DYNREL;
+	  else
+	    need_entry = NEED_FPTR;
+	  break;
+
+	case R_IA64_LTOFF22:
+	case R_IA64_LTOFF64I:
+	  need_entry = NEED_GOT;
+	  break;
+
+	case R_IA64_LTOFF22X:
+	  need_entry = NEED_GOTX;
+	  break;
+
+	case R_IA64_PLTOFF22:
+	case R_IA64_PLTOFF64I:
+	case R_IA64_PLTOFF64MSB:
+	case R_IA64_PLTOFF64LSB:
+	  need_entry = NEED_PLTOFF;
+	  if (h)
+	    {
+	      if (maybe_dynamic)
+		need_entry |= NEED_MIN_PLT;
+	    }
+	  else
+	    {
+	      (*info->callbacks->warning)
+		(info, _("@pltoff reloc against local symbol"), 0,
+		 abfd, 0, (bfd_vma) 0);
+	    }
+	  break;
+
+	case R_IA64_PCREL21B:
+        case R_IA64_PCREL60B:
+	  /* Depending on where this symbol is defined, we may or may not
+	     need a full plt entry.  Only skip if we know we'll not need
+	     the entry -- static or symbolic, and the symbol definition
+	     has already been seen.  */
+	  if (maybe_dynamic && rel->r_addend == 0)
+	    need_entry = NEED_FULL_PLT;
+	  break;
+
+	case R_IA64_IMM14:
+	case R_IA64_IMM22:
+	case R_IA64_IMM64:
+	case R_IA64_DIR32MSB:
+	case R_IA64_DIR32LSB:
+	case R_IA64_DIR64MSB:
+	case R_IA64_DIR64LSB:
+	  /* Shared objects will always need at least a REL relocation.  */
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_IPLTMSB:
+	case R_IA64_IPLTLSB:
+	  /* Shared objects will always need at least a REL relocation.  */
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_PCREL22:
+	case R_IA64_PCREL64I:
+	case R_IA64_PCREL32MSB:
+	case R_IA64_PCREL32LSB:
+	case R_IA64_PCREL64MSB:
+	case R_IA64_PCREL64LSB:
+	  if (maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+	}
+
+      if (!need_entry)
+	continue;
+
+      if ((need_entry & NEED_FPTR) != 0
+	  && rel->r_addend)
+	{
+	  (*info->callbacks->warning)
+	    (info, _("non-zero addend in @fptr reloc"), 0,
+	     abfd, 0, (bfd_vma) 0);
+	}
+
+      if (get_dyn_sym_info (ia64_info, h, abfd, rel, TRUE) == NULL)
+	return FALSE;
+    }
+
+  /* Now, we only do lookup without insertion, which is very fast
+     with the modified get_dyn_sym_info.  */ 
+  for (rel = relocs; rel < relend; ++rel)
+    {
       struct elfNN_ia64_dyn_sym_info *dyn_i;
-      int need_entry;
-      bfd_boolean maybe_dynamic;
       int dynrel_type = R_IA64_NONE;
 
+      r_symndx = ELFNN_R_SYM (rel->r_info);
       if (r_symndx >= symtab_hdr->sh_info)
 	{
 	  /* We're dealing with a global symbol -- find its hash entry
@@ -2427,18 +2898,18 @@ elfNN_ia64_check_relocs (abfd, info, sec
 
 	  h->ref_regular = 1;
 	}
+      else
+	h = NULL;
 
       /* We can only get preliminary data on whether a symbol is
 	 locally or externally defined, as not all of the input files
 	 have yet been processed.  Do something with what we know, as
 	 this may help reduce memory usage and processing time later.  */
-      maybe_dynamic = FALSE;
-      if (h && ((!info->executable
-		 && (!info->symbolic
-		     || info->unresolved_syms_in_shared_libs == RM_IGNORE))
-		|| !h->def_regular
-		|| h->root.type == bfd_link_hash_defweak))
-	maybe_dynamic = TRUE;
+      maybe_dynamic = (h && ((!info->executable
+			      && (!info->symbolic
+				  || info->unresolved_syms_in_shared_libs == RM_IGNORE))
+			     || !h->def_regular
+			     || h->root.type == bfd_link_hash_defweak));
 
       need_entry = 0;
       switch (ELFNN_R_TYPE (rel->r_info))
@@ -2522,12 +2993,6 @@ elfNN_ia64_check_relocs (abfd, info, sec
 	      if (maybe_dynamic)
 		need_entry |= NEED_MIN_PLT;
 	    }
-	  else
-	    {
-	      (*info->callbacks->warning)
-		(info, _("@pltoff reloc against local symbol"), 0,
-		 abfd, 0, (bfd_vma) 0);
-	    }
 	  break;
 
 	case R_IA64_PCREL21B:
@@ -2576,15 +3041,7 @@ elfNN_ia64_check_relocs (abfd, info, sec
       if (!need_entry)
 	continue;
 
-      if ((need_entry & NEED_FPTR) != 0
-	  && rel->r_addend)
-	{
-	  (*info->callbacks->warning)
-	    (info, _("non-zero addend in @fptr reloc"), 0,
-	     abfd, 0, (bfd_vma) 0);
-	}
-
-      dyn_i = get_dyn_sym_info (ia64_info, h, abfd, rel, TRUE);
+      dyn_i = get_dyn_sym_info (ia64_info, h, abfd, rel, FALSE);
 
       /* Record whether or not this is a local symbol.  */
       dyn_i->h = h;
@@ -4159,8 +4616,11 @@ elfNN_ia64_relocate_section (output_bfd,
 	      if (loc_h && ! loc_h->sec_merge_done)
 		{
 		  struct elfNN_ia64_dyn_sym_info *dynent;
+		  unsigned int count;
 
-		  for (dynent = loc_h->info; dynent; dynent = dynent->next)
+		  for (count = loc_h->count, dynent = loc_h->info;
+		       count != 0;
+		       count--, dynent++)
 		    {
 		      msec = sym_sec;
 		      dynent->addend =
@@ -4175,6 +4635,10 @@ elfNN_ia64_relocate_section (output_bfd,
 					- sym_sec->output_section->vma
 					- sym_sec->output_offset;
 		    }
+		  
+		  qsort (loc_h->info, loc_h->count,
+			 sizeof (*loc_h->info), addend_compare);
+
 		  loc_h->sec_merge_done = 1;
 		}
 	    }

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: PATCH: ld/2442: ia64 ld slow with many local relocs (O(N^2) in  get_dyn_sym_info)
  2006-04-05 20:07       ` H. J. Lu
@ 2006-04-05 21:01         ` James E Wilson
  2006-04-05 22:26           ` H. J. Lu
  0 siblings, 1 reply; 9+ messages in thread
From: James E Wilson @ 2006-04-05 21:01 UTC (permalink / raw)
  To: H. J. Lu; +Cc: binutils

On Wed, 2006-04-05 at 13:04, H. J. Lu wrote:
> Thanks for your review. When you spent so much time on the problem,
> you forgot that it wasn't as clear as what you though. I incorporated
> your comments in the patch. Does it look OK?

Yes, it looks OK.

As for the long comment you added before the sort_dyn_sym_info call, I
would put it before the get_dyn_sym_info function, since it describes
the whole function, and why we have this extra complexity.
-- 
Jim Wilson, GNU Tools Support, http://www.specifix.com

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: PATCH: ld/2442: ia64 ld slow with many local relocs (O(N^2) in  get_dyn_sym_info)
  2006-04-05 21:01         ` James E Wilson
@ 2006-04-05 22:26           ` H. J. Lu
  0 siblings, 0 replies; 9+ messages in thread
From: H. J. Lu @ 2006-04-05 22:26 UTC (permalink / raw)
  To: James E Wilson; +Cc: binutils

On Wed, Apr 05, 2006 at 01:30:38PM -0700, James E Wilson wrote:
> On Wed, 2006-04-05 at 13:04, H. J. Lu wrote:
> > Thanks for your review. When you spent so much time on the problem,
> > you forgot that it wasn't as clear as what you though. I incorporated
> > your comments in the patch. Does it look OK?
> 
> Yes, it looks OK.
> 
> As for the long comment you added before the sort_dyn_sym_info call, I
> would put it before the get_dyn_sym_info function, since it describes
> the whole function, and why we have this extra complexity.

Hi Jim,

I will check this in shortly.

Thanks.


H.J.
---
2006-04-05  H.J. Lu  <hongjiu.lu@intel.com>
	    James E Wilson  <wilson@specifixinc.com>

	PR ld/2442
	* elfxx-ia64.c (elfNN_ia64_dyn_sym_info): Remove next.
	(elfNN_ia64_local_hash_entry): Add count, sorted_count and
	size.
	(elfNN_ia64_link_hash_entry): Likewise.
	(elfNN_ia64_new_elf_hash_entry): Initialize count, sorted_count
	and size.
	(elfNN_ia64_hash_copy_indirect): Updated elfNN_ia64_dyn_sym_info
	processing.
	(elfNN_ia64_hash_hide_symbol): Likewise.
	(elfNN_ia64_global_dyn_sym_thunk): Likewise.
	(elfNN_ia64_local_dyn_sym_thunk): Likewise.
	(elfNN_ia64_global_dyn_info_free): New function.
	(elfNN_ia64_local_dyn_info_free): Likewise.
	(elfNN_ia64_hash_table_free): Free local and global
	elfNN_ia64_dyn_sym_info.
	(addend_compare): New function.
	(sort_dyn_sym_info): Likewise.
	(get_dyn_sym_info): Updated to use binary search for addend.
	(elfNN_ia64_check_relocs): Scan relocations to create dynamic
	relocation arrays first.

--- bfd/elfxx-ia64.c.hash	2006-04-05 11:11:55.000000000 -0700
+++ bfd/elfxx-ia64.c	2006-04-05 13:57:58.000000000 -0700
@@ -80,9 +80,6 @@ struct elfNN_ia64_dyn_sym_info
   /* The addend for which this entry is relevant.  */
   bfd_vma addend;
 
-  /* Next addend in the list.  */
-  struct elfNN_ia64_dyn_sym_info *next;
-
   bfd_vma got_offset;
   bfd_vma fptr_offset;
   bfd_vma pltoff_offset;
@@ -133,6 +130,13 @@ struct elfNN_ia64_local_hash_entry
 {
   int id;
   unsigned int r_sym;
+  /* The number of elements in elfNN_ia64_dyn_sym_info array.  */
+  unsigned int count;
+  /* The number of sorted elements in elfNN_ia64_dyn_sym_info array.  */
+  unsigned int sorted_count;
+  /* The size of elfNN_ia64_dyn_sym_info array.  */
+  unsigned int size;
+  /* The array of elfNN_ia64_dyn_sym_info.  */
   struct elfNN_ia64_dyn_sym_info *info;
 
   /* TRUE if this hash entry's addends was translated for
@@ -143,6 +147,13 @@ struct elfNN_ia64_local_hash_entry
 struct elfNN_ia64_link_hash_entry
 {
   struct elf_link_hash_entry root;
+  /* The number of elements in elfNN_ia64_dyn_sym_info array.  */
+  unsigned int count;
+  /* The number of sorted elements in elfNN_ia64_dyn_sym_info array.  */
+  unsigned int sorted_count;
+  /* The size of elfNN_ia64_dyn_sym_info array.  */
+  unsigned int size;
+  /* The array of elfNN_ia64_dyn_sym_info.  */
   struct elfNN_ia64_dyn_sym_info *info;
 };
 
@@ -1813,6 +1824,9 @@ elfNN_ia64_new_elf_hash_entry (entry, ta
 				     table, string));
 
   ret->info = NULL;
+  ret->count = 0;
+  ret->sorted_count = 0;
+  ret->size = 0;
   return (struct bfd_hash_entry *) ret;
 }
 
@@ -1843,16 +1857,25 @@ elfNN_ia64_hash_copy_indirect (info, xdi
   if (ind->info != NULL)
     {
       struct elfNN_ia64_dyn_sym_info *dyn_i;
-      struct elfNN_ia64_dyn_sym_info **pdyn;
+      unsigned int count;
+
+      if (dir->info)
+	free (dir->info);
+
+      dir->info = ind->info;
+      dir->count = ind->count;
+      dir->sorted_count = ind->sorted_count;
+      dir->size = ind->size;
 
-      pdyn = &dir->info;
-      while ((dyn_i = *pdyn) != NULL)
-	pdyn = &dyn_i->next;
-      *pdyn = dyn_i = ind->info;
       ind->info = NULL;
+      ind->count = 0;
+      ind->sorted_count = 0;
+      ind->size = 0;
 
       /* Fix up the dyn_sym_info pointers to the global symbol.  */
-      for (; dyn_i; dyn_i = dyn_i->next)
+      for (count = dir->count, dyn_i = dir->info;
+	   count != 0;
+	   count--, dyn_i++)
 	dyn_i->h = &dir->root;
     }
 
@@ -1878,12 +1901,15 @@ elfNN_ia64_hash_hide_symbol (info, xh, f
 {
   struct elfNN_ia64_link_hash_entry *h;
   struct elfNN_ia64_dyn_sym_info *dyn_i;
+  unsigned int count;
 
   h = (struct elfNN_ia64_link_hash_entry *)xh;
 
   _bfd_elf_link_hash_hide_symbol (info, &h->root, force_local);
 
-  for (dyn_i = h->info; dyn_i; dyn_i = dyn_i->next)
+  for (count = h->count, dyn_i = h->info;
+       count != 0;
+       count--, dyn_i++)
     {
       dyn_i->want_plt2 = 0;
       dyn_i->want_plt = 0;
@@ -1951,6 +1977,51 @@ elfNN_ia64_hash_table_create (abfd)
   return &ret->root.root;
 }
 
+/* Free the global elfNN_ia64_dyn_sym_info array.  */
+
+static bfd_boolean
+elfNN_ia64_global_dyn_info_free (void **xentry,
+				PTR unused ATTRIBUTE_UNUSED)
+{
+  struct elfNN_ia64_link_hash_entry *entry
+    = (struct elfNN_ia64_link_hash_entry *) xentry;
+
+  if (entry->root.root.type == bfd_link_hash_warning)
+    entry = (struct elfNN_ia64_link_hash_entry *) entry->root.root.u.i.link;
+
+  if (entry->info)
+    {
+      free (entry->info);
+      entry->info = NULL;
+      entry->count = 0;
+      entry->sorted_count = 0;
+      entry->size = 0;
+    }
+
+  return TRUE;
+}
+
+/* Free the local elfNN_ia64_dyn_sym_info array.  */
+
+static bfd_boolean
+elfNN_ia64_local_dyn_info_free (void **slot,
+				PTR unused ATTRIBUTE_UNUSED)
+{
+  struct elfNN_ia64_local_hash_entry *entry
+    = (struct elfNN_ia64_local_hash_entry *) *slot;
+
+  if (entry->info)
+    {
+      free (entry->info);
+      entry->info = NULL;
+      entry->count = 0;
+      entry->sorted_count = 0;
+      entry->size = 0;
+    }
+
+  return TRUE;
+}
+
 /* Destroy IA-64 linker hash table.  */
 
 static void
@@ -1960,9 +2031,15 @@ elfNN_ia64_hash_table_free (hash)
   struct elfNN_ia64_link_hash_table *ia64_info
     = (struct elfNN_ia64_link_hash_table *) hash;
   if (ia64_info->loc_hash_table)
-    htab_delete (ia64_info->loc_hash_table);
+    {
+      htab_traverse (ia64_info->loc_hash_table,
+		     elfNN_ia64_local_dyn_info_free, NULL);
+      htab_delete (ia64_info->loc_hash_table);
+    }
   if (ia64_info->loc_hash_memory)
     objalloc_free ((struct objalloc *) ia64_info->loc_hash_memory);
+  elf_link_hash_traverse (&ia64_info->root,
+			  elfNN_ia64_global_dyn_info_free, NULL);
   _bfd_generic_link_hash_table_free (hash);
 }
 
@@ -1984,11 +2061,14 @@ elfNN_ia64_global_dyn_sym_thunk (xentry,
   struct elfNN_ia64_dyn_sym_traverse_data *data
     = (struct elfNN_ia64_dyn_sym_traverse_data *) xdata;
   struct elfNN_ia64_dyn_sym_info *dyn_i;
+  unsigned int count;
 
   if (entry->root.root.type == bfd_link_hash_warning)
     entry = (struct elfNN_ia64_link_hash_entry *) entry->root.root.u.i.link;
 
-  for (dyn_i = entry->info; dyn_i; dyn_i = dyn_i->next)
+  for (count = entry->count, dyn_i = entry->info;
+       count != 0;
+       count--, dyn_i++)
     if (! (*data->func) (dyn_i, data->data))
       return FALSE;
   return TRUE;
@@ -2004,11 +2084,14 @@ elfNN_ia64_local_dyn_sym_thunk (slot, xd
   struct elfNN_ia64_dyn_sym_traverse_data *data
     = (struct elfNN_ia64_dyn_sym_traverse_data *) xdata;
   struct elfNN_ia64_dyn_sym_info *dyn_i;
+  unsigned int count;
 
-  for (dyn_i = entry->info; dyn_i; dyn_i = dyn_i->next)
+  for (count = entry->count, dyn_i = entry->info;
+       count != 0;
+       count--, dyn_i++)
     if (! (*data->func) (dyn_i, data->data))
-      return 0;
-  return 1;
+      return FALSE;
+  return TRUE;
 }
 
 static void
@@ -2117,8 +2200,129 @@ get_local_sym_hash (ia64_info, abfd, rel
   return ret;
 }
 
+/* Used to sort elfNN_ia64_dyn_sym_info array.  */
+
+static int
+addend_compare (const void *xp, const void *yp)
+{
+  const struct elfNN_ia64_dyn_sym_info *x
+    = (const struct elfNN_ia64_dyn_sym_info *) xp;
+  const struct elfNN_ia64_dyn_sym_info *y
+    = (const struct elfNN_ia64_dyn_sym_info *) yp;
+
+  return x->addend - y->addend;
+}
+
+/* Sort elfNN_ia64_dyn_sym_info array and remove duplicates.  */
+
+static unsigned int
+sort_dyn_sym_info (struct elfNN_ia64_dyn_sym_info *info,
+		   unsigned int count)
+{
+  bfd_vma curr, prev;
+  unsigned int i, dup, diff, dest, src, len;
+
+  qsort (info, count, sizeof (*info), addend_compare);
+
+  /* Find the first duplicate.  */
+  prev = info [0].addend;
+  for (i = 1; i < count; i++)
+    {
+      curr = info [i].addend;
+      if (curr == prev)
+	break;
+      prev = curr;
+    }
+
+  /* Remove duplicates.  */
+  if (i < count)
+    {
+      /* We need to move a block of elements to here.  */
+      dest = i++;
+      while (i < count)
+	{
+	  curr = info [i].addend;
+
+	  /* Move a block of elements whose first one is different from
+	     the previous.  */
+	  if (curr == prev)
+	    {
+	      for (src = i + 1; src < count; src++)
+		if (info [src].addend != curr)
+		  break;
+	    }
+	  else
+	    src = i;
+
+	  if (src >= count)
+	    break;
+
+	  /* Find the next duplicate.  */
+	  prev = info [src].addend;
+	  for (dup = src + 1; dup < count; dup++)
+	    {
+	      curr = info [dup].addend;
+	      if (curr == prev)
+		break;
+	      prev = curr;
+	    }
+
+	  /* How much to move.  */
+	  len = dup - src;
+	  i = dup + 1;
+
+	  if (len == 1 && dup < count)
+	    {
+	      /* If we only move 1 element, we combine it with the next
+		 one.  Find the next different one.  */
+	      for (diff = dup + 1, src++; diff < count; diff++, src++)
+		if (info [diff].addend != curr)
+		  break;
+
+	      if (diff < count)
+		{
+		  /* Find the next duplicate.  */
+		  prev = info [diff].addend;
+		  for (dup = diff + 1; dup < count; dup++)
+		    {
+		      curr = info [dup].addend;
+		      if (curr == prev)
+			break;
+		      prev = curr;
+		      diff++;
+		    }
+
+		  len = diff - src + 1;
+		  i = diff + 1;
+		}
+	    }
+
+	  memmove (&info [dest], &info [src], len * sizeof (*info));
+
+	  dest += len;
+	}
+
+      count = dest;
+    }
+
+  return count;
+}
+
 /* Find and/or create a descriptor for dynamic symbol info.  This will
-   vary based on global or local symbol, and the addend to the reloc.  */
+   vary based on global or local symbol, and the addend to the reloc.
+
+   We don't sort when inserting.  Also, we sort and eliminate
+   duplicates if there is an unsorted section.  Typically, this will
+   only happen once, because we do all insertions before lookups.  We
+   then use bsearch to do a lookup.  This also allows lookups to be
+   fast.  So we have fast insertion (O(log N) due to duplicate check),
+   fast lookup (O(log N)) and one sort (O(N log N) expected time).
+   Previously, all lookups were O(N) because of the use of the linked
+   list and also all insertions were O(N) because of the check for
+   duplicates.  There are some complications here because the array
+   size grows occasionally, which may add an O(N) factor, but this
+   should be rare.  Also,  we free the excess array allocation, which
+   requires a copy which is O(N), but this only happens once.  */
 
 static struct elfNN_ia64_dyn_sym_info *
 get_dyn_sym_info (ia64_info, h, abfd, rel, create)
@@ -2128,12 +2332,22 @@ get_dyn_sym_info (ia64_info, h, abfd, re
      const Elf_Internal_Rela *rel;
      bfd_boolean create;
 {
-  struct elfNN_ia64_dyn_sym_info **pp;
-  struct elfNN_ia64_dyn_sym_info *dyn_i;
+  struct elfNN_ia64_dyn_sym_info **info_p, *info, *dyn_i, key;
+  unsigned int *count_p, *sorted_count_p, *size_p;
+  unsigned int count, sorted_count, size;
   bfd_vma addend = rel ? rel->r_addend : 0;
+  bfd_size_type amt;
 
   if (h)
-    pp = &((struct elfNN_ia64_link_hash_entry *)h)->info;
+    {
+      struct elfNN_ia64_link_hash_entry *global_h;
+
+      global_h = (struct elfNN_ia64_link_hash_entry *) h;
+      info_p = &global_h->info;
+      count_p = &global_h->count;
+      sorted_count_p = &global_h->sorted_count;
+      size_p = &global_h->size;
+    }
   else
     {
       struct elfNN_ia64_local_hash_entry *loc_h;
@@ -2145,18 +2359,107 @@ get_dyn_sym_info (ia64_info, h, abfd, re
 	  return NULL;
 	}
 
-      pp = &loc_h->info;
-    }
+      info_p = &loc_h->info;
+      count_p = &loc_h->count;
+      sorted_count_p = &loc_h->sorted_count;
+      size_p = &loc_h->size;
+    }
+
+  count = *count_p;
+  sorted_count = *sorted_count_p;
+  size = *size_p;
+  info = *info_p;
+  if (create)
+    {
+      /* When we create the array, we don't check for duplicates,
+         except in the previously sorted section if one exists, and
+	 against the last inserted entry.  This allows insertions to
+	 be fast.  */
+      if (info)
+	{
+	  if (sorted_count)
+	    {
+	      /* Try bsearch first on the sorted section.  */
+	      key.addend = addend;
+	      dyn_i = bsearch (&key, info, sorted_count,
+			       sizeof (*info), addend_compare);
 
-  for (dyn_i = *pp; dyn_i && dyn_i->addend != addend; dyn_i = *pp)
-    pp = &dyn_i->next;
+	      if (dyn_i)
+		{
+		  return dyn_i;
+		}
+	    }
 
-  if (dyn_i == NULL && create)
-    {
-      dyn_i = ((struct elfNN_ia64_dyn_sym_info *)
-	       bfd_zalloc (abfd, (bfd_size_type) sizeof *dyn_i));
-      *pp = dyn_i;
+	  /* Do a quick check for the last inserted entry.  */
+	  dyn_i = info + count - 1;
+	  if (dyn_i->addend == addend)
+	    {
+	      return dyn_i;
+	    }
+	}
+
+      if (size == 0)
+	{
+	  /* It is the very first element. We create the array of size
+	     1.  */
+	  size = 1;
+	  amt = size * sizeof (*info);
+	  info = bfd_malloc (amt);
+	}
+      else if (size <= count)
+	{
+	  /* We double the array size every time when we reach the
+	     size limit.  */
+	  size += size;
+	  amt = size * sizeof (*info);
+	  info = bfd_realloc (info, amt);
+	}
+      else
+	goto has_space;
+
+      if (info == NULL)
+	return NULL;
+      *size_p = size;
+      *info_p = info;
+
+has_space:
+      /* Append the new one to the array.  */
+      dyn_i = info + count;
+      memset (dyn_i, 0, sizeof (*dyn_i));
       dyn_i->addend = addend;
+      
+      /* We increment count only since the new ones are unsorted and
+	 may have duplicate.  */
+      (*count_p)++;
+    }
+  else
+    {
+      /* It is a lookup without insertion.  Sort array if part of the
+	 array isn't sorted.  */
+      if (count != sorted_count)
+	{
+	  count = sort_dyn_sym_info (info, count);
+	  *count_p = count;
+	  *sorted_count_p = count;
+	}
+
+      /* Free unused memory.  */
+      if (size != count)
+	{
+	  amt = count * sizeof (*info);
+	  info = bfd_malloc (amt);
+	  if (info != NULL)
+	    {
+	      memcpy (info, *info_p, amt);
+	      free (*info_p);
+	      *size_p = count;
+	      *info_p = info;
+	    }
+	}
+
+      key.addend = addend;
+      dyn_i = bsearch (&key, info, count,
+		       sizeof (*info), addend_compare);
     }
 
   return dyn_i;
@@ -2382,6 +2685,23 @@ elfNN_ia64_check_relocs (abfd, info, sec
   Elf_Internal_Shdr *symtab_hdr;
   const Elf_Internal_Rela *rel;
   asection *got, *fptr, *srel, *pltoff;
+  enum {
+    NEED_GOT = 1,
+    NEED_GOTX = 2,
+    NEED_FPTR = 4,
+    NEED_PLTOFF = 8,
+    NEED_MIN_PLT = 16,
+    NEED_FULL_PLT = 32,
+    NEED_DYNREL = 64,
+    NEED_LTOFF_FPTR = 128,
+    NEED_TPREL = 256,
+    NEED_DTPMOD = 512,
+    NEED_DTPREL = 1024
+  };
+  int need_entry;
+  struct elf_link_hash_entry *h;
+  unsigned long r_symndx;
+  bfd_boolean maybe_dynamic;
 
   if (info->relocatable)
     return TRUE;
@@ -2392,29 +2712,181 @@ elfNN_ia64_check_relocs (abfd, info, sec
   got = fptr = srel = pltoff = NULL;
 
   relend = relocs + sec->reloc_count;
+
+  /* We scan relocations first to create dynamic relocation arrays.  We
+     modified get_dyn_sym_info to allow fast insertion and support fast
+     lookup in the next loop.  */
   for (rel = relocs; rel < relend; ++rel)
     {
-      enum {
-	NEED_GOT = 1,
-	NEED_GOTX = 2,
-	NEED_FPTR = 4,
-	NEED_PLTOFF = 8,
-	NEED_MIN_PLT = 16,
-	NEED_FULL_PLT = 32,
-	NEED_DYNREL = 64,
-	NEED_LTOFF_FPTR = 128,
-	NEED_TPREL = 256,
-	NEED_DTPMOD = 512,
-	NEED_DTPREL = 1024
-      };
+      r_symndx = ELFNN_R_SYM (rel->r_info);
+      if (r_symndx >= symtab_hdr->sh_info)
+	{
+	  long indx = r_symndx - symtab_hdr->sh_info;
+	  h = elf_sym_hashes (abfd)[indx];
+	  while (h->root.type == bfd_link_hash_indirect
+		 || h->root.type == bfd_link_hash_warning)
+	    h = (struct elf_link_hash_entry *) h->root.u.i.link;
+	}
+      else
+	h = NULL;
 
-      struct elf_link_hash_entry *h = NULL;
-      unsigned long r_symndx = ELFNN_R_SYM (rel->r_info);
+      /* We can only get preliminary data on whether a symbol is
+	 locally or externally defined, as not all of the input files
+	 have yet been processed.  Do something with what we know, as
+	 this may help reduce memory usage and processing time later.  */
+      maybe_dynamic = (h && ((!info->executable
+			      && (!info->symbolic
+				  || info->unresolved_syms_in_shared_libs == RM_IGNORE))
+			     || !h->def_regular
+			     || h->root.type == bfd_link_hash_defweak));
+
+      need_entry = 0;
+      switch (ELFNN_R_TYPE (rel->r_info))
+	{
+	case R_IA64_TPREL64MSB:
+	case R_IA64_TPREL64LSB:
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_LTOFF_TPREL22:
+	  need_entry = NEED_TPREL;
+	  if (info->shared)
+	    info->flags |= DF_STATIC_TLS;
+	  break;
+
+	case R_IA64_DTPREL32MSB:
+	case R_IA64_DTPREL32LSB:
+	case R_IA64_DTPREL64MSB:
+	case R_IA64_DTPREL64LSB:
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_LTOFF_DTPREL22:
+	  need_entry = NEED_DTPREL;
+	  break;
+
+	case R_IA64_DTPMOD64MSB:
+	case R_IA64_DTPMOD64LSB:
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_LTOFF_DTPMOD22:
+	  need_entry = NEED_DTPMOD;
+	  break;
+
+	case R_IA64_LTOFF_FPTR22:
+	case R_IA64_LTOFF_FPTR64I:
+	case R_IA64_LTOFF_FPTR32MSB:
+	case R_IA64_LTOFF_FPTR32LSB:
+	case R_IA64_LTOFF_FPTR64MSB:
+	case R_IA64_LTOFF_FPTR64LSB:
+	  need_entry = NEED_FPTR | NEED_GOT | NEED_LTOFF_FPTR;
+	  break;
+
+	case R_IA64_FPTR64I:
+	case R_IA64_FPTR32MSB:
+	case R_IA64_FPTR32LSB:
+	case R_IA64_FPTR64MSB:
+	case R_IA64_FPTR64LSB:
+	  if (info->shared || h)
+	    need_entry = NEED_FPTR | NEED_DYNREL;
+	  else
+	    need_entry = NEED_FPTR;
+	  break;
+
+	case R_IA64_LTOFF22:
+	case R_IA64_LTOFF64I:
+	  need_entry = NEED_GOT;
+	  break;
+
+	case R_IA64_LTOFF22X:
+	  need_entry = NEED_GOTX;
+	  break;
+
+	case R_IA64_PLTOFF22:
+	case R_IA64_PLTOFF64I:
+	case R_IA64_PLTOFF64MSB:
+	case R_IA64_PLTOFF64LSB:
+	  need_entry = NEED_PLTOFF;
+	  if (h)
+	    {
+	      if (maybe_dynamic)
+		need_entry |= NEED_MIN_PLT;
+	    }
+	  else
+	    {
+	      (*info->callbacks->warning)
+		(info, _("@pltoff reloc against local symbol"), 0,
+		 abfd, 0, (bfd_vma) 0);
+	    }
+	  break;
+
+	case R_IA64_PCREL21B:
+        case R_IA64_PCREL60B:
+	  /* Depending on where this symbol is defined, we may or may not
+	     need a full plt entry.  Only skip if we know we'll not need
+	     the entry -- static or symbolic, and the symbol definition
+	     has already been seen.  */
+	  if (maybe_dynamic && rel->r_addend == 0)
+	    need_entry = NEED_FULL_PLT;
+	  break;
+
+	case R_IA64_IMM14:
+	case R_IA64_IMM22:
+	case R_IA64_IMM64:
+	case R_IA64_DIR32MSB:
+	case R_IA64_DIR32LSB:
+	case R_IA64_DIR64MSB:
+	case R_IA64_DIR64LSB:
+	  /* Shared objects will always need at least a REL relocation.  */
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_IPLTMSB:
+	case R_IA64_IPLTLSB:
+	  /* Shared objects will always need at least a REL relocation.  */
+	  if (info->shared || maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+
+	case R_IA64_PCREL22:
+	case R_IA64_PCREL64I:
+	case R_IA64_PCREL32MSB:
+	case R_IA64_PCREL32LSB:
+	case R_IA64_PCREL64MSB:
+	case R_IA64_PCREL64LSB:
+	  if (maybe_dynamic)
+	    need_entry = NEED_DYNREL;
+	  break;
+	}
+
+      if (!need_entry)
+	continue;
+
+      if ((need_entry & NEED_FPTR) != 0
+	  && rel->r_addend)
+	{
+	  (*info->callbacks->warning)
+	    (info, _("non-zero addend in @fptr reloc"), 0,
+	     abfd, 0, (bfd_vma) 0);
+	}
+
+      if (get_dyn_sym_info (ia64_info, h, abfd, rel, TRUE) == NULL)
+	return FALSE;
+    }
+
+  /* Now, we only do lookup without insertion, which is very fast
+     with the modified get_dyn_sym_info.  */ 
+  for (rel = relocs; rel < relend; ++rel)
+    {
       struct elfNN_ia64_dyn_sym_info *dyn_i;
-      int need_entry;
-      bfd_boolean maybe_dynamic;
       int dynrel_type = R_IA64_NONE;
 
+      r_symndx = ELFNN_R_SYM (rel->r_info);
       if (r_symndx >= symtab_hdr->sh_info)
 	{
 	  /* We're dealing with a global symbol -- find its hash entry
@@ -2427,18 +2899,18 @@ elfNN_ia64_check_relocs (abfd, info, sec
 
 	  h->ref_regular = 1;
 	}
+      else
+	h = NULL;
 
       /* We can only get preliminary data on whether a symbol is
 	 locally or externally defined, as not all of the input files
 	 have yet been processed.  Do something with what we know, as
 	 this may help reduce memory usage and processing time later.  */
-      maybe_dynamic = FALSE;
-      if (h && ((!info->executable
-		 && (!info->symbolic
-		     || info->unresolved_syms_in_shared_libs == RM_IGNORE))
-		|| !h->def_regular
-		|| h->root.type == bfd_link_hash_defweak))
-	maybe_dynamic = TRUE;
+      maybe_dynamic = (h && ((!info->executable
+			      && (!info->symbolic
+				  || info->unresolved_syms_in_shared_libs == RM_IGNORE))
+			     || !h->def_regular
+			     || h->root.type == bfd_link_hash_defweak));
 
       need_entry = 0;
       switch (ELFNN_R_TYPE (rel->r_info))
@@ -2522,12 +2994,6 @@ elfNN_ia64_check_relocs (abfd, info, sec
 	      if (maybe_dynamic)
 		need_entry |= NEED_MIN_PLT;
 	    }
-	  else
-	    {
-	      (*info->callbacks->warning)
-		(info, _("@pltoff reloc against local symbol"), 0,
-		 abfd, 0, (bfd_vma) 0);
-	    }
 	  break;
 
 	case R_IA64_PCREL21B:
@@ -2576,15 +3042,7 @@ elfNN_ia64_check_relocs (abfd, info, sec
       if (!need_entry)
 	continue;
 
-      if ((need_entry & NEED_FPTR) != 0
-	  && rel->r_addend)
-	{
-	  (*info->callbacks->warning)
-	    (info, _("non-zero addend in @fptr reloc"), 0,
-	     abfd, 0, (bfd_vma) 0);
-	}
-
-      dyn_i = get_dyn_sym_info (ia64_info, h, abfd, rel, TRUE);
+      dyn_i = get_dyn_sym_info (ia64_info, h, abfd, rel, FALSE);
 
       /* Record whether or not this is a local symbol.  */
       dyn_i->h = h;
@@ -4159,8 +4617,11 @@ elfNN_ia64_relocate_section (output_bfd,
 	      if (loc_h && ! loc_h->sec_merge_done)
 		{
 		  struct elfNN_ia64_dyn_sym_info *dynent;
+		  unsigned int count;
 
-		  for (dynent = loc_h->info; dynent; dynent = dynent->next)
+		  for (count = loc_h->count, dynent = loc_h->info;
+		       count != 0;
+		       count--, dynent++)
 		    {
 		      msec = sym_sec;
 		      dynent->addend =
@@ -4175,6 +4636,10 @@ elfNN_ia64_relocate_section (output_bfd,
 					- sym_sec->output_section->vma
 					- sym_sec->output_offset;
 		    }
+		  
+		  qsort (loc_h->info, loc_h->count,
+			 sizeof (*loc_h->info), addend_compare);
+
 		  loc_h->sec_merge_done = 1;
 		}
 	    }

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2006-04-05 21:01 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-03-31  2:18 RFC: PATCH: ld/2442: ia64 ld slow with many local relocs (O(N^2) in get_dyn_sym_info) H. J. Lu
2006-03-31 16:35 ` H. J. Lu
2006-04-02 18:33   ` H. J. Lu
2006-04-04 19:02     ` James E Wilson
2006-04-04 20:31       ` Andreas Schwab
2006-04-04 20:48         ` James E Wilson
2006-04-05 20:07       ` H. J. Lu
2006-04-05 21:01         ` James E Wilson
2006-04-05 22:26           ` H. J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).