public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] ld: pe: Improve performance of object file exclude symbol directives
@ 2022-09-02 10:59 Martin Storsjö
  2022-09-05 12:54 ` Nick Clifton
  0 siblings, 1 reply; 8+ messages in thread
From: Martin Storsjö @ 2022-09-02 10:59 UTC (permalink / raw)
  To: binutils

Store the list of excluded symbols in a sorted list, speeding up
checking for duplicates when inserting new entries.

This is done in the same way as is done for exports and imports
(while the previous implementation was done with a linked list,
based on the implementation for aligncomm).

When linking object files with excluded symbols, there can potentially
be very large numbers of excluded symbols (just like builds with
exports can have a large number of exported symbols).

This improves the link performance somewhat, when linking with large
numbers of excluded symbols.

The later actual use of the excluded symbols within pe-dll.c
handles them via an unordered linked list still, though.
---
 ld/deffile.h  |   2 +-
 ld/deffilep.y | 122 ++++++++++++++++++++++++++++++++++++++------------
 ld/pe-dll.c   |   7 ++-
 3 files changed, 97 insertions(+), 34 deletions(-)

diff --git a/ld/deffile.h b/ld/deffile.h
index a247639628e..e6e4cf8eebc 100644
--- a/ld/deffile.h
+++ b/ld/deffile.h
@@ -62,7 +62,6 @@ typedef struct def_file_aligncomm {
 } def_file_aligncomm;
 
 typedef struct def_file_exclude_symbol {
-  struct def_file_exclude_symbol *next;	/* Chain pointer.  */
   char *symbol_name;		/* Name of excluded symbol.  */
 } def_file_exclude_symbol;
 
@@ -101,6 +100,7 @@ typedef struct def_file {
   def_file_aligncomm *aligncomms;
 
   /* From EXCLUDE_SYMBOLS or embedded directives. */
+  int num_exclude_symbols;
   def_file_exclude_symbol *exclude_symbols;
 
 } def_file;
diff --git a/ld/deffilep.y b/ld/deffilep.y
index 27db336e0f5..97f9fa4abaf 100644
--- a/ld/deffilep.y
+++ b/ld/deffilep.y
@@ -497,13 +497,13 @@ def_file_free (def_file *fdef)
       free (c);
     }
 
-  while (fdef->exclude_symbols)
+  if (fdef->exclude_symbols)
     {
-      def_file_exclude_symbol *e = fdef->exclude_symbols;
-
-      fdef->exclude_symbols = fdef->exclude_symbols->next;
-      free (e->symbol_name);
-      free (e);
+      for (i = 0; i < fdef->num_exclude_symbols; i++)
+	{
+	  free (fdef->exclude_symbols[i].symbol_name);
+	}
+      free (fdef->exclude_symbols);
     }
 
   free (fdef);
@@ -949,6 +949,91 @@ def_file_add_import_at (def_file *fdef,
   return i;
 }
 
+/* Search the position of the identical element, or returns the position
+   of the next higher element. If last valid element is smaller, then MAX
+   is returned.  */
+
+static int
+find_exclude_in_list (def_file_exclude_symbol *b, int max,
+		      const char *name, int *is_ident)
+{
+  int e, l, r, p;
+
+  *is_ident = 0;
+  if (!max)
+    return 0;
+  if ((e = strcmp (b[0].symbol_name, name)) <= 0)
+    {
+      if (!e)
+	*is_ident = 1;
+      return 0;
+    }
+  if (max == 1)
+    return 1;
+  if ((e = strcmp (b[max - 1].symbol_name, name)) > 0)
+    return max;
+  else if (!e || max == 2)
+    {
+      if (!e)
+	*is_ident = 1;
+      return max - 1;
+    }
+  l = 0; r = max - 1;
+  while (l < r)
+    {
+      p = (l + r) / 2;
+      e = strcmp (b[p].symbol_name, name);
+      if (!e)
+	{
+	  *is_ident = 1;
+	  return p;
+	}
+      else if (e < 0)
+	r = p - 1;
+      else if (e > 0)
+	l = p + 1;
+    }
+  if ((e = strcmp (b[l].symbol_name, name)) > 0)
+    ++l;
+  else if (!e)
+    *is_ident = 1;
+  return l;
+}
+
+static def_file_exclude_symbol *
+def_file_add_exclude_symbol (def_file *fdef, const char *name, int *is_dup)
+{
+  def_file_exclude_symbol *e;
+  int pos;
+  int max_exclude_symbols = ROUND_UP(fdef->num_exclude_symbols, 32);
+
+  /* We need to avoid duplicates.  */
+  *is_dup = 0;
+  pos = find_exclude_in_list (fdef->exclude_symbols, fdef->num_exclude_symbols,
+			      name, is_dup);
+
+  if (*is_dup != 0)
+    return (fdef->exclude_symbols + pos);
+
+  if (fdef->num_exclude_symbols >= max_exclude_symbols)
+    {
+      max_exclude_symbols = ROUND_UP(fdef->num_exclude_symbols + 1, 32);
+      if (fdef->exclude_symbols)
+	fdef->exclude_symbols = xrealloc (fdef->exclude_symbols,
+					  max_exclude_symbols * sizeof (def_file_exclude_symbol));
+      else
+	fdef->exclude_symbols = xmalloc (max_exclude_symbols * sizeof (def_file_exclude_symbol));
+    }
+
+  e = fdef->exclude_symbols + pos;
+  if (pos != fdef->num_exclude_symbols)
+    memmove (&e[1], e, (sizeof (def_file_exclude_symbol) * (fdef->num_exclude_symbols - pos)));
+  memset (e, 0, sizeof (def_file_exclude_symbol));
+  e->symbol_name = xstrdup (name);
+  fdef->num_exclude_symbols++;
+  return e;
+}
+
 struct
 {
   char *param;
@@ -1280,30 +1365,9 @@ def_aligncomm (char *str, int align)
 static void
 def_exclude_symbols (char *str)
 {
-  def_file_exclude_symbol *c, *p;
-
-  p = NULL;
-  c = def->exclude_symbols;
-  while (c != NULL)
-    {
-      int e = strcmp (c->symbol_name, str);
-      if (!e)
-        return;
-      c = (p = c)->next;
-    }
+  int is_dup = 0;
 
-  c = xmalloc (sizeof (def_file_exclude_symbol));
-  c->symbol_name = xstrdup (str);
-  if (!p)
-    {
-      c->next = def->exclude_symbols;
-      def->exclude_symbols = c;
-    }
-  else
-    {
-      c->next = p->next;
-      p->next = c;
-    }
+  def_file_add_exclude_symbol (def, str, &is_dup);
 }
 
 static void
diff --git a/ld/pe-dll.c b/ld/pe-dll.c
index fbf180ec0f2..806715b2be2 100644
--- a/ld/pe-dll.c
+++ b/ld/pe-dll.c
@@ -720,11 +720,10 @@ process_def_file_and_drectve (bfd *abfd ATTRIBUTE_UNUSED, struct bfd_link_info *
 
   if (pe_def_file->exclude_symbols)
     {
-      def_file_exclude_symbol *ac = pe_def_file->exclude_symbols;
-      while (ac)
+      for (i = 0; i < pe_def_file->num_exclude_symbols; i++)
 	{
-	  pe_dll_add_excludes (ac->symbol_name, EXCLUDESYMS);
-	  ac = ac->next;
+	  pe_dll_add_excludes (pe_def_file->exclude_symbols[i].symbol_name,
+			       EXCLUDESYMS);
 	}
     }
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] ld: pe: Improve performance of object file exclude symbol directives
  2022-09-02 10:59 [PATCH] ld: pe: Improve performance of object file exclude symbol directives Martin Storsjö
@ 2022-09-05 12:54 ` Nick Clifton
  2022-09-06  7:40   ` Jan Beulich
  2022-09-06  9:39   ` Martin Storsjö
  0 siblings, 2 replies; 8+ messages in thread
From: Nick Clifton @ 2022-09-05 12:54 UTC (permalink / raw)
  To: Martin Storsjö, binutils

Hi Martin,

> Store the list of excluded symbols in a sorted list, speeding up
> checking for duplicates when inserting new entries.

An excellent idea.

>     /* From EXCLUDE_SYMBOLS or embedded directives. */
> +  int num_exclude_symbols;

Presumably there can never be a negative number of excluded symbols,
so using "unsigned int" here might be more appropriate.


> +  if (fdef->exclude_symbols)

Given that the for() loop to follow checks fdef->num_exclude_symbols,
this if() statement is redundant.

> +      for (i = 0; i < fdef->num_exclude_symbols; i++)
> +	   free (fdef->exclude_symbols[i].symbol_name);
> +      free (fdef->exclude_symbols);

Calling free(NULL) is allowed, so the if() statement is still not needed.

> +/* Search the position of the identical element, or returns the position
> +   of the next higher element. If last valid element is smaller, then MAX
> +   is returned.  */

The comment should explain the use of the IS_IDENT parameter.

It should probably also note that MAX is expected to be the same as the number
of entries in the B array.

> +static int
> +find_exclude_in_list (def_file_exclude_symbol *b, int max,
> +		      const char *name, int *is_ident)

Since we are dealing with array indicies here, I would suggest that the function
should return an unsigned int.  Likewise the MAX parameter should be unsigned.
Plus is_ident should be a boolean pointer, not an integer pointer.


> +static def_file_exclude_symbol *
> +def_file_add_exclude_symbol (def_file *fdef, const char *name, int *is_dup)

I think that IS_DUP should be a "boolean *" not a "int *".


> +  int max_exclude_symbols = ROUND_UP(fdef->num_exclude_symbols, 32);

Magic numbers are bad.  Please replace 32 with a defined constant.

> +  if (fdef->num_exclude_symbols >= max_exclude_symbols)

This is sneaky and not very intuitive.  I would prefer it if you had
two fields in the fdef structure, one for the number of allocated
slots in the exclude_symbols array and one for the number of used slots.

Then testing for when the array needs to be extended would be a case
of comparing these two fields.  Using two fields might take up more
space in the structure, but it sure makes a lot more sense to me.


> +      max_exclude_symbols = ROUND_UP(fdef->num_exclude_symbols + 1, 32);

Given that the point of this patch is to improve performance when there
are a large number of excluded symbols, incrementing the array by 32 slots
at a time seems counter intuitive.  I would suggest a bigger number, eg 1024
or 10240.


> +  e = fdef->exclude_symbols + pos;
> +  if (pos != fdef->num_exclude_symbols)
> +    memmove (&e[1], e, (sizeof (def_file_exclude_symbol) * (fdef->num_exclude_symbols - pos)));

What the ?  OK, so what is going on here ?  At the very least I think that
you are going to need to add a comment to this piece of code.


Cheers
   Nick

Cheers
   Nick



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] ld: pe: Improve performance of object file exclude symbol directives
  2022-09-05 12:54 ` Nick Clifton
@ 2022-09-06  7:40   ` Jan Beulich
  2022-09-06 11:42     ` Nick Clifton
  2022-09-06  9:39   ` Martin Storsjö
  1 sibling, 1 reply; 8+ messages in thread
From: Jan Beulich @ 2022-09-06  7:40 UTC (permalink / raw)
  To: Nick Clifton, Martin Storsjö; +Cc: binutils

On 05.09.2022 14:54, Nick Clifton via Binutils wrote:
>> +      max_exclude_symbols = ROUND_UP(fdef->num_exclude_symbols + 1, 32);
> 
> Given that the point of this patch is to improve performance when there
> are a large number of excluded symbols, incrementing the array by 32 slots
> at a time seems counter intuitive.  I would suggest a bigger number, eg 1024
> or 10240.

Perhaps double the value, thus not overly much impacting the case of there
being a moderate number of excludes?

Jan

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] ld: pe: Improve performance of object file exclude symbol directives
  2022-09-05 12:54 ` Nick Clifton
  2022-09-06  7:40   ` Jan Beulich
@ 2022-09-06  9:39   ` Martin Storsjö
  1 sibling, 0 replies; 8+ messages in thread
From: Martin Storsjö @ 2022-09-06  9:39 UTC (permalink / raw)
  To: Nick Clifton; +Cc: binutils

On Mon, 5 Sep 2022, Nick Clifton wrote:

> Hi Martin,
>
>> Store the list of excluded symbols in a sorted list, speeding up
>> checking for duplicates when inserting new entries.
>
> An excellent idea.

FWIW, regarding most of this review here: This file already has got two 
copies of the exact same routines for inserting structs into a sorted list 
of structs (find_export_in_list, def_file_add_export, find_import_in_list, 
def_file_add_import), so for this patch I just made a third copy of the 
same routines, with the exact same behaviours, but adapted for the 
exclude_symbol structs.

I'd be happy to adjust things based on your review, but then I'll probably 
follow up with a separate patch to adapt the other existing 
implementations based on this feedback.

>
>>     /* From EXCLUDE_SYMBOLS or embedded directives. */
>> +  int num_exclude_symbols;
>
> Presumably there can never be a negative number of excluded symbols,
> so using "unsigned int" here might be more appropriate.

Sure, I can change that.

>
>
>> +  if (fdef->exclude_symbols)
>
> Given that the for() loop to follow checks fdef->num_exclude_symbols,
> this if() statement is redundant.

Sure, I can remove it. (The existing exports/imports lists of structs have 
the exact same structure.)

>> +      for (i = 0; i < fdef->num_exclude_symbols; i++)
>> +	   free (fdef->exclude_symbols[i].symbol_name);
>> +      free (fdef->exclude_symbols);
>
> Calling free(NULL) is allowed, so the if() statement is still not needed.

Sure. (Also preexisting from exports/imports.)

>> +/* Search the position of the identical element, or returns the position
>> +   of the next higher element. If last valid element is smaller, then MAX
>> +   is returned.  */
>
> The comment should explain the use of the IS_IDENT parameter.
>
> It should probably also note that MAX is expected to be the same as the 
> number
> of entries in the B array.

Ok, I can expand the comment to explain these.

>> +static int
>> +find_exclude_in_list (def_file_exclude_symbol *b, int max,
>> +		      const char *name, int *is_ident)
>
> Since we are dealing with array indicies here, I would suggest that the 
> function
> should return an unsigned int.  Likewise the MAX parameter should be 
> unsigned.
> Plus is_ident should be a boolean pointer, not an integer pointer.

Ok, I can change that. (Ditto for the preexisting functions.)

>
>> +static def_file_exclude_symbol *
>> +def_file_add_exclude_symbol (def_file *fdef, const char *name, int 
>> *is_dup)
>
> I think that IS_DUP should be a "boolean *" not a "int *".

Ok

>
>> +  int max_exclude_symbols = ROUND_UP(fdef->num_exclude_symbols, 32);
>
> Magic numbers are bad.  Please replace 32 with a defined constant.
>
>> +  if (fdef->num_exclude_symbols >= max_exclude_symbols)
>
> This is sneaky and not very intuitive.  I would prefer it if you had
> two fields in the fdef structure, one for the number of allocated
> slots in the exclude_symbols array and one for the number of used slots.
>
> Then testing for when the array needs to be extended would be a case
> of comparing these two fields.  Using two fields might take up more
> space in the structure, but it sure makes a lot more sense to me.

This was also designed according to the existing code - but yes, I agree 
that using a separate variable keeping track of the allocation is much 
safer and clearer. (It took a while to think through to see how this works 
for the initial allocation and all that.)

>> +      max_exclude_symbols = ROUND_UP(fdef->num_exclude_symbols + 1, 32);
>
> Given that the point of this patch is to improve performance when there
> are a large number of excluded symbols, incrementing the array by 32 slots
> at a time seems counter intuitive.  I would suggest a bigger number, eg 1024
> or 10240.

Sure, or doubling as Jan suggested.

>> +  e = fdef->exclude_symbols + pos;
>> +  if (pos != fdef->num_exclude_symbols)
>> +    memmove (&e[1], e, (sizeof (def_file_exclude_symbol) * 
>> (fdef->num_exclude_symbols - pos)));
>
> What the ?  OK, so what is going on here ?  At the very least I think that
> you are going to need to add a comment to this piece of code.

Sure. (This is also preexisting code for the other lists.) As this inserts 
into a sorted list of structs, it moves the later structs forward when 
inserting in the middle of the list.

// Martin


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] ld: pe: Improve performance of object file exclude symbol directives
  2022-09-06  7:40   ` Jan Beulich
@ 2022-09-06 11:42     ` Nick Clifton
  2022-09-06 11:54       ` Jan Beulich
  0 siblings, 1 reply; 8+ messages in thread
From: Nick Clifton @ 2022-09-06 11:42 UTC (permalink / raw)
  To: Jan Beulich, Martin Storsjö; +Cc: binutils

Hi Jan,

> On 05.09.2022 14:54, Nick Clifton via Binutils wrote:
>>> +      max_exclude_symbols = ROUND_UP(fdef->num_exclude_symbols + 1, 32);
>>
>> Given that the point of this patch is to improve performance when there
>> are a large number of excluded symbols, incrementing the array by 32 slots
>> at a time seems counter intuitive.  I would suggest a bigger number, eg 1024
>> or 10240.
> 
> Perhaps double the value, thus not overly much impacting the case of there
> being a moderate number of excludes?

To be honest I have no idea what a "large number of excludes" might look like.
So maybe 32 is actually a sensible increment.  Doubling the increment every
time the limit is reached could lead to resource exhaustion issues in extreme
cases, but I doubt if that will ever happen in real life, so that works for me
too.

Cheers
   Nick



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] ld: pe: Improve performance of object file exclude symbol directives
  2022-09-06 11:42     ` Nick Clifton
@ 2022-09-06 11:54       ` Jan Beulich
  2022-09-06 11:59         ` Martin Storsjö
  2022-09-06 13:06         ` Nick Clifton
  0 siblings, 2 replies; 8+ messages in thread
From: Jan Beulich @ 2022-09-06 11:54 UTC (permalink / raw)
  To: Nick Clifton; +Cc: binutils, Martin Storsjö

On 06.09.2022 13:42, Nick Clifton wrote:
>> On 05.09.2022 14:54, Nick Clifton via Binutils wrote:
>>>> +      max_exclude_symbols = ROUND_UP(fdef->num_exclude_symbols + 1, 32);
>>>
>>> Given that the point of this patch is to improve performance when there
>>> are a large number of excluded symbols, incrementing the array by 32 slots
>>> at a time seems counter intuitive.  I would suggest a bigger number, eg 1024
>>> or 10240.
>>
>> Perhaps double the value, thus not overly much impacting the case of there
>> being a moderate number of excludes?
> 
> To be honest I have no idea what a "large number of excludes" might look like.
> So maybe 32 is actually a sensible increment.  Doubling the increment every
> time the limit is reached could lead to resource exhaustion issues in extreme
> cases, but I doubt if that will ever happen in real life, so that works for me
> too.

Well, first I was thinking of a hybrid approach - double until reaching 1024,
then increment further by 1024. But then this seemed to be going a little too
far, so I suggested the simpler alternative. Thoughts?

Jan


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] ld: pe: Improve performance of object file exclude symbol directives
  2022-09-06 11:54       ` Jan Beulich
@ 2022-09-06 11:59         ` Martin Storsjö
  2022-09-06 13:06         ` Nick Clifton
  1 sibling, 0 replies; 8+ messages in thread
From: Martin Storsjö @ 2022-09-06 11:59 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Nick Clifton, binutils

On Tue, 6 Sep 2022, Jan Beulich wrote:

> On 06.09.2022 13:42, Nick Clifton wrote:
>>> On 05.09.2022 14:54, Nick Clifton via Binutils wrote:
>>>>> +      max_exclude_symbols = ROUND_UP(fdef->num_exclude_symbols + 1, 32);
>>>>
>>>> Given that the point of this patch is to improve performance when there
>>>> are a large number of excluded symbols, incrementing the array by 32 slots
>>>> at a time seems counter intuitive.  I would suggest a bigger number, eg 1024
>>>> or 10240.
>>>
>>> Perhaps double the value, thus not overly much impacting the case of there
>>> being a moderate number of excludes?
>>
>> To be honest I have no idea what a "large number of excludes" might look like.
>> So maybe 32 is actually a sensible increment.  Doubling the increment every
>> time the limit is reached could lead to resource exhaustion issues in extreme
>> cases, but I doubt if that will ever happen in real life, so that works for me
>> too.
>
> Well, first I was thinking of a hybrid approach - double until reaching 1024,
> then increment further by 1024. But then this seemed to be going a little too
> far, so I suggested the simpler alternative. Thoughts?

FWIW, for the case I'm looking at, the build runs through 52k embedded 
-exclude-symbols: directives, and out of those, there are 29k unique 
symbols excluded.

// Martin


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] ld: pe: Improve performance of object file exclude symbol directives
  2022-09-06 11:54       ` Jan Beulich
  2022-09-06 11:59         ` Martin Storsjö
@ 2022-09-06 13:06         ` Nick Clifton
  1 sibling, 0 replies; 8+ messages in thread
From: Nick Clifton @ 2022-09-06 13:06 UTC (permalink / raw)
  To: Jan Beulich; +Cc: binutils, Martin Storsjö

Hi Jan,

> Well, first I was thinking of a hybrid approach - double until reaching 1024,
> then increment further by 1024. But then this seemed to be going a little too
> far, so I suggested the simpler alternative. Thoughts?

I came to exactly the same conclusion.  We are definitely at risk of over-engineering
this problem.

My feeling is that the best thing to do is to choose a constant, any constant,
set it in a #define FOO <N>  and then use FOO in all the right places.  Then, if
at a later date we discover that <N> is too small/big we can replace it with
another value, or even a call to a function.

Cheers
   Nick



^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-09-06 13:06 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-02 10:59 [PATCH] ld: pe: Improve performance of object file exclude symbol directives Martin Storsjö
2022-09-05 12:54 ` Nick Clifton
2022-09-06  7:40   ` Jan Beulich
2022-09-06 11:42     ` Nick Clifton
2022-09-06 11:54       ` Jan Beulich
2022-09-06 11:59         ` Martin Storsjö
2022-09-06 13:06         ` Nick Clifton
2022-09-06  9:39   ` Martin Storsjö

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).