PATCH: use hashtab for pseudo op table

public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed

* PATCH: use hashtab for pseudo op table
@ 2005-04-29  8:21 Ben Elliston
  2005-04-29  8:52 ` Zack Weinberg
  0 siblings, 1 reply; 17+ messages in thread
From: Ben Elliston @ 2005-04-29  8:21 UTC (permalink / raw)
  To: binutils

[-- Attachment #1: Type: text/plain, Size: 5728 bytes --]

This patch is a first step toward eliminating the use of gas/hash.c in favour of
libiberty's hashtab.  Tested with an all-targets `make check' in the gas directory.

Okay for mainline?

2005-04-29  Ben Elliston  <bje@au.ibm.com>

	* read.c (po_hash): Change type to htab_t.
	(pop_override_ok): Remove unneeded global variable.
	(pop_insert): Use htab functions for insertion.
	(pobegin): Use htab_create_alloc. Remove uses of pop_override_ok.
	(hash_pseudo): New static function.
	(pseudo_equal_p): Likewise.
	(read_print_statistics): Remove.
	(read_a_source_file): Use htab_find.
	(s_macro): Likewise.
	* as.c (dump_statistics): Don't call read_print_statistics ().

Index: as.c
===================================================================
RCS file: /home/bje/src-cvs/src/gas/as.c,v
retrieving revision 1.61
diff -u -p -r1.61 as.c
--- as.c	29 Apr 2005 00:22:26 -0000	1.61
+++ as.c	29 Apr 2005 07:54:18 -0000
@@ -926,7 +926,6 @@ dump_statistics (void)
   subsegs_print_statistics (stderr);
   write_print_statistics (stderr);
   symbol_print_statistics (stderr);
-  read_print_statistics (stderr);

 #ifdef tc_print_statistics
   tc_print_statistics (stderr);
Index: read.c
===================================================================
RCS file: /home/bje/src-cvs/src/gas/read.c,v
retrieving revision 1.100
diff -u -p -r1.100 read.c
--- read.c	29 Apr 2005 00:22:26 -0000	1.100
+++ read.c	29 Apr 2005 07:54:18 -0000
@@ -41,6 +41,7 @@ Software Foundation, 59 Temple Place - S
 #include "listing.h"
 #include "ecoff.h"
 #include "dw2gencfi.h"
+#include "hashtab.h"

 #ifndef TC_START_LABEL
 #define TC_START_LABEL(x,y) (x == ':')
@@ -264,7 +265,7 @@ address_bytes (void)

 /* Set up pseudo-op tables.  */

-static struct hash_control *po_hash;
+static htab_t po_hash;

 static const pseudo_typeS potable[] = {
   {"abort", s_abort, 0},
@@ -460,20 +461,27 @@ get_absolute_expression (void)
   return get_absolute_expr (&exp);
 }

-static int pop_override_ok = 0;
 static const char *pop_table_name;

 void
 pop_insert (const pseudo_typeS *table)
 {
-  const char *errtxt;
+  void **slot;
   const pseudo_typeS *pop;
+  pseudo_typeS *found;
+
   for (pop = table; pop->poc_name; pop++)
     {
-      errtxt = hash_insert (po_hash, pop->poc_name, (char *) pop);
-      if (errtxt && (!pop_override_ok || strcmp (errtxt, "exists")))
-	as_fatal (_("error constructing %s pseudo-op table: %s"), pop_table_name,
-		  errtxt);
+      found = htab_find (po_hash, (const void *) pop);
+      if (found)
+	continue;
+
+      slot = htab_find_slot (po_hash, (const void *) pop, INSERT);
+      if (!slot)
+	as_fatal (_("error allocating %s in %s pseudo-op table"),
+		  pop->poc_name, pop_table_name);
+
+      *(const pseudo_typeS **) slot = pop;
     }
 }

@@ -489,10 +497,28 @@ pop_insert (const pseudo_typeS *table)
 #define cfi_pop_insert()	pop_insert(cfi_pseudo_table)
 #endif

+
+static hashval_t
+hash_pseudo (const PTR p)
+{
+  return htab_hash_string (((pseudo_typeS *) p)->poc_name);
+}
+
+/* Returns non-zero if P1 and P2 are equal.  */
+
+static int
+pseudo_equal_p (const PTR p1, const PTR p2)
+{
+  pseudo_typeS *pt1 = (pseudo_typeS *) p1;
+  pseudo_typeS *pt2 = (pseudo_typeS *) p2;
+  return (strcmp (pt1->poc_name, pt2->poc_name) == 0);
+}
+
 static void
 pobegin (void)
 {
-  po_hash = hash_new ();
+  po_hash = htab_create_alloc (50, hash_pseudo, pseudo_equal_p,
+			       NULL, calloc, free);

   /* Do the target-specific pseudo ops.  */
   pop_table_name = "md";
@@ -500,7 +526,6 @@ pobegin (void)

   /* Now object specific.  Skip any that were in the target table.  */
   pop_table_name = "obj";
-  pop_override_ok = 1;
   obj_pop_insert ();

   /* Now portable ones.  Skip any that we've seen already.  */
@@ -509,7 +534,6 @@ pobegin (void)

 #ifdef TARGET_USE_CFIPOP
   pop_table_name = "cfi";
-  pop_override_ok = 1;
   cfi_pop_insert ();
 #endif
 }
@@ -822,7 +846,9 @@ read_a_source_file (char *name)
 		    {
 		      /* The MRI assembler and the m88k use pseudo-ops
 			 without a period.  */
-		      pop = (pseudo_typeS *) hash_find (po_hash, s);
+		      pseudo_typeS ptype;
+		      ptype.poc_name = s;
+		      pop = (pseudo_typeS *) htab_find (po_hash, &ptype);
 		      if (pop != NULL && pop->poc_handler == NULL)
 			pop = NULL;
 		    }
@@ -836,8 +862,11 @@ read_a_source_file (char *name)
 			 We lookup the pseudo-op table with s+1 because we
 			 already know that the pseudo-op begins with a '.'.  */

+		      pseudo_typeS ptype;
+		      ptype.poc_name = s + 1;
+
 		      if (pop == NULL)
-			pop = (pseudo_typeS *) hash_find (po_hash, s + 1);
+			pop = (pseudo_typeS *) htab_find (po_hash, &ptype);
 		      if (pop && !pop->poc_handler)
 			pop = NULL;

@@ -2365,6 +2394,10 @@ s_macro (int ignore ATTRIBUTE_UNUSED)
     as_bad_where (file, line, err, name);
   else
     {
+      pseudo_typeS ptype, ptype1;
+      ptype.poc_name = name;
+      ptype1.poc_name = name + 1;
+
       if (line_label != NULL)
 	{
 	  S_SET_SEGMENT (line_label, absolute_section);
@@ -2373,10 +2406,10 @@ s_macro (int ignore ATTRIBUTE_UNUSED)
 	}

       if (((NO_PSEUDO_DOT || flag_m68k_mri)
-	   && hash_find (po_hash, name) != NULL)
+	   && htab_find (po_hash, &ptype) != NULL)
 	  || (!flag_m68k_mri
 	      && *name == '.'
-	      && hash_find (po_hash, name + 1) != NULL))
+	      && htab_find (po_hash, &ptype1) != NULL))
 	as_warn_where (file,
 		 line,
 		 _("attempt to redefine pseudo-op `%s' ignored"),
@@ -5296,12 +5329,6 @@ s_ignore (int arg ATTRIBUTE_UNUSED)
       ++input_line_pointer;
     }
   ++input_line_pointer;
-}
-
-void
-read_print_statistics (FILE *file)
-{
-  hash_print_statistics (file, "pseudo-op table", po_hash);
 }

 /* Inserts the given line into the input stream.

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 256 bytes --]

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: use hashtab for pseudo op table
  2005-04-29  8:21 PATCH: use hashtab for pseudo op table Ben Elliston
@ 2005-04-29  8:52 ` Zack Weinberg
  2005-05-05 10:00   ` Nick Clifton
  0 siblings, 1 reply; 17+ messages in thread
From: Zack Weinberg @ 2005-04-29  8:52 UTC (permalink / raw)
  To: Ben Elliston; +Cc: binutils

I do not think replacing gas/hash.c with hashtab.c is a good idea, for
reasons laid out (somewhat cursorily) in
<http://sources.redhat.com/ml/binutils/2005-04/msg00056.html>.  I can
expand if anyone wants to hear it.

zw

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: use hashtab for pseudo op table
  2005-04-29  8:52 ` Zack Weinberg
@ 2005-05-05 10:00   ` Nick Clifton
  2005-05-05 10:57     ` Alan Modra
  0 siblings, 1 reply; 17+ messages in thread
From: Nick Clifton @ 2005-05-05 10:00 UTC (permalink / raw)
  To: Zack Weinberg, Ben Elliston; +Cc: binutils

Hi Ben, Hi Zack,

 > Zack Weinberg wrote:
> I do not think replacing gas/hash.c with hashtab.c is a good idea, for
> reasons laid out (somewhat cursorily) in
> <http://sources.redhat.com/ml/binutils/2005-04/msg00056.html>.  I can
> expand if anyone wants to hear it.

I agree - Ben, what is your motivation for removing hash.c ?

I am partially concerned because there has been a recent surge of 
interest in improving the performance of gas, and it sounds like 
switching over to using the libiberty hashtab implementation is going to 
be a performance hit.

Cheers
   Nick

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: use hashtab for pseudo op table
  2005-05-05 10:00   ` Nick Clifton
@ 2005-05-05 10:57     ` Alan Modra
  2005-05-05 11:46       ` Nick Clifton
  2005-05-05 15:24       ` H. J. Lu
  0 siblings, 2 replies; 17+ messages in thread
From: Alan Modra @ 2005-05-05 10:57 UTC (permalink / raw)
  To: Nick Clifton; +Cc: Zack Weinberg, Ben Elliston, binutils

On Thu, May 05, 2005 at 10:56:59AM +0100, Nick Clifton wrote:
> Hi Ben, Hi Zack,
> 
> > Zack Weinberg wrote:
> >I do not think replacing gas/hash.c with hashtab.c is a good idea, for
> >reasons laid out (somewhat cursorily) in
> ><http://sources.redhat.com/ml/binutils/2005-04/msg00056.html>.  I can
> >expand if anyone wants to hear it.
> 
> I agree - Ben, what is your motivation for removing hash.c ?

Probably because I was talking to Ben a week or so ago, and mentioned
that it's silly that we have so many hash table implementations.  We
have libiberty/hashtab.c, bfd/hash.c, and gas/hash.c.  Some of bfd
already uses libiberty/hashtab.c due to it's rather nice auto-resize,
and more of bfd should.  ie. I see libiberty/hashtab.c as the way of
the future.

-- 
Alan Modra
IBM OzLabs - Linux Technology Centre

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: use hashtab for pseudo op table
  2005-05-05 10:57     ` Alan Modra
@ 2005-05-05 11:46       ` Nick Clifton
  2005-05-05 12:37         ` Alan Modra
                           ` (2 more replies)
  2005-05-05 15:24       ` H. J. Lu
  1 sibling, 3 replies; 17+ messages in thread
From: Nick Clifton @ 2005-05-05 11:46 UTC (permalink / raw)
  To: Alan Modra; +Cc: Zack Weinberg, Ben Elliston, binutils

Hi Alan,

>>I agree - Ben, what is your motivation for removing hash.c ?

> Probably because I was talking to Ben a week or so ago, and mentioned
> that it's silly that we have so many hash table implementations.  We
> have libiberty/hashtab.c, bfd/hash.c, and gas/hash.c.  Some of bfd
> already uses libiberty/hashtab.c due to it's rather nice auto-resize,
> and more of bfd should.  ie. I see libiberty/hashtab.c as the way of
> the future.

Well removing multiple implementations of the same thing is certainly good.

I guess the question comes down to - should bfd keep or re-implement its 
own hashtab code to be used everywhere in binutils or should the one 
from libiberty be used ?  Zack pointed out the performance penalty of 
using libiberty's code, and I would like to get a feel for whether this 
would be a serious hindrance to using it.

Cheers
   Nick

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: use hashtab for pseudo op table
  2005-05-05 11:46       ` Nick Clifton
@ 2005-05-05 12:37         ` Alan Modra
  2005-05-05 17:14         ` Zack Weinberg
  2005-05-17 13:19         ` Nick Clifton
  2 siblings, 0 replies; 17+ messages in thread
From: Alan Modra @ 2005-05-05 12:37 UTC (permalink / raw)
  To: Nick Clifton; +Cc: Zack Weinberg, Ben Elliston, binutils

On Thu, May 05, 2005 at 12:02:02PM +0100, Nick Clifton wrote:
> Hi Alan,
> 
> >>I agree - Ben, what is your motivation for removing hash.c ?
> 
> >Probably because I was talking to Ben a week or so ago, and mentioned
> >that it's silly that we have so many hash table implementations.  We
> >have libiberty/hashtab.c, bfd/hash.c, and gas/hash.c.  Some of bfd
> >already uses libiberty/hashtab.c due to it's rather nice auto-resize,
> >and more of bfd should.  ie. I see libiberty/hashtab.c as the way of
> >the future.
> 
> Well removing multiple implementations of the same thing is certainly good.
> 
> I guess the question comes down to - should bfd keep or re-implement its 
> own hashtab code to be used everywhere in binutils or should the one 
> from libiberty be used ?  Zack pointed out the performance penalty of 
> using libiberty's code, and I would like to get a feel for whether this 
> would be a serious hindrance to using it.

I'm not convinced that the performance penaly Zack is worrying about is
real.  HJ mentioned recently that he found a 50% speedup in
sec_merge_hash_lookup by increasing the bfd hash table size.  Clearly
that was due to hash table collisions, and I'd guess that undersize hash
tables are hurting our performance on large ld jobs.  Auto-resizing to
help reduce collisions may well give a speed increase that eclipses the
penalty of extra function calls.  Of course, in the gas pseudo table
resizing isn't at all important, but then the speed of pseudo lookup in
gas isn't terribly important either.  I'd say we should focus on
improving the speed of ld on large jobs.

-- 
Alan Modra
IBM OzLabs - Linux Technology Centre

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: use hashtab for pseudo op table
  2005-05-05 10:57     ` Alan Modra
  2005-05-05 11:46       ` Nick Clifton
@ 2005-05-05 15:24       ` H. J. Lu
  1 sibling, 0 replies; 17+ messages in thread
From: H. J. Lu @ 2005-05-05 15:24 UTC (permalink / raw)
  To: Nick Clifton, Zack Weinberg, Ben Elliston, binutils

On Thu, May 05, 2005 at 08:18:21PM +0930, Alan Modra wrote:
> On Thu, May 05, 2005 at 10:56:59AM +0100, Nick Clifton wrote:
> > Hi Ben, Hi Zack,
> > 
> > > Zack Weinberg wrote:
> > >I do not think replacing gas/hash.c with hashtab.c is a good idea, for
> > >reasons laid out (somewhat cursorily) in
> > ><http://sources.redhat.com/ml/binutils/2005-04/msg00056.html>.  I can
> > >expand if anyone wants to hear it.
> > 
> > I agree - Ben, what is your motivation for removing hash.c ?
> 
> Probably because I was talking to Ben a week or so ago, and mentioned
> that it's silly that we have so many hash table implementations.  We
> have libiberty/hashtab.c, bfd/hash.c, and gas/hash.c.  Some of bfd
> already uses libiberty/hashtab.c due to it's rather nice auto-resize,
> and more of bfd should.  ie. I see libiberty/hashtab.c as the way of
> the future.

I think we tried libiberty/hashtab.c in bfd before. It didn't work
due to string merge which requires bfd/hash.c.


H.J.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: use hashtab for pseudo op table
  2005-05-05 11:46       ` Nick Clifton
  2005-05-05 12:37         ` Alan Modra
@ 2005-05-05 17:14         ` Zack Weinberg
  2005-05-05 17:58           ` Dave Korn
  2005-05-05 19:20           ` DJ Delorie
  2005-05-17 13:19         ` Nick Clifton
  2 siblings, 2 replies; 17+ messages in thread
From: Zack Weinberg @ 2005-05-05 17:14 UTC (permalink / raw)
  To: Nick Clifton; +Cc: Alan Modra, Ben Elliston, binutils

Nick Clifton <nickc@redhat.com> writes:
>
> I guess the question comes down to - should bfd keep or re-implement
> its own hashtab code to be used everywhere in binutils or should the
> one from libiberty be used ?  Zack pointed out the performance penalty
> of using libiberty's code, and I would like to get a feel for whether
> this would be a serious hindrance to using it.

The auto-resizing certainly would be nice to have.  I'm not just
bringing up the indirect function call penalty as a hypothetical,
though; over in GCC we got measurable speedups by switching back to a
custom hash table (libcpp/symtab.c) for identifier lookups.  I don't
know whether the assembler's parse pass bottlenecks in the same places
that GCC's parsers do, but it wouldn't surprise me -- parsers is
parsers.

I'm also concerned with hashtab.c's interface being significantly
messier.  Take this example from (my revision of) the ARM assembler
(with some irrelevant material left out):

  const struct asm_opcode *opcode;

  for (base = end = *str; *end != '\0'; end++)
    if (*end == ' ' || (unified_syntax && *end == '.'))
      break;

  opcode = hash_find_n (arm_ops_hsh, base, end - base);

If arm_ops_hsh were a libiberty hash table, I would instead have to
write

  const struct asm_opcode *opcode;
  struct asm_opcode key;
  char *namep;

  for (base = end = *str; *end != '\0'; end++)
    if (*end == ' ' || (unified_syntax && *end == '.'))
      break;

  namep = alloca (end - base + 1);
  memcpy (namep, base, end - base);
  namep[end - base] = 0;
  key.name = namep;

  opcode = htab_find (arm_ops_hsh, &key);

That's both less lucid and less efficient.  And it's not practical to
hide the mess in wrapper functions, because you have to get the type
of 'key' right ... gas/config/tc-arm.c has about a dozen little
structures stored in hash tables.

zw

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: PATCH: use hashtab for pseudo op table
  2005-05-05 17:14         ` Zack Weinberg
@ 2005-05-05 17:58           ` Dave Korn
  2005-05-05 18:59             ` Zack Weinberg
  2005-05-05 19:20           ` DJ Delorie
  1 sibling, 1 reply; 17+ messages in thread
From: Dave Korn @ 2005-05-05 17:58 UTC (permalink / raw)
  To: 'Zack Weinberg', 'Nick Clifton'
  Cc: 'Alan Modra', 'Ben Elliston', binutils

----Original Message----
>From: Zack Weinberg
>Sent: 05 May 2005 18:13

> The auto-resizing certainly would be nice to have.  I'm not just
> bringing up the indirect function call penalty as a hypothetical,
> though; over in GCC we got measurable speedups by switching back to a
> custom hash table (libcpp/symtab.c) for identifier lookups.

  You mentioned this before in the context of iterators, but surely passing
a function pointer into an iterator routine and getting that function called
once per hashtable item is no worse than having an iterator, calling a
function (not through a pointer admittedly) to advance it once per hashtable
item, and then processing the thing inline?  There's still one function call
per entry and one block of code that loops over all entries calling that
function.....

>  I don't
> know whether the assembler's parse pass bottlenecks in the same places
> that GCC's parsers do, but it wouldn't surprise me -- parsers is
> parsers.

  LOL, that is _soooooo_ not a valid conclusion!  The complexity, structure,
architecture, implementation and behaviour of a backtracking parser that can
make sense of C++ in no way compares to the simple minded "Every line begins
with an optional label, then has an opcode, then some operands, and might
end with a comment" style parsing of gas!  Parsers most definitely is not
just parsers: some of them is parsers, and some is just crude tokenizers and
string matchers, and some is absolute monsters!

> I'm also concerned with hashtab.c's interface being significantly
> messier. 

> That's both less lucid and less efficient.  And it's not practical to
> hide the mess in wrapper functions, because you have to get the type
> of 'key' right ... gas/config/tc-arm.c has about a dozen little
> structures stored in hash tables.

  So, perhaps a slight bit of work on the libiberty hashtab api, to offer a
few interfaces that take a) NUL-terminated strings b) fixed-size structs and
maybe even c) variable sized structs with parameters specifying an offset
and size within the struct where the sizeof the struct can be found, would
make you reconsider?

    cheers,
      DaveK
-- 
Can't think of a witty .sigline today....

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: use hashtab for pseudo op table
  2005-05-05 17:58           ` Dave Korn
@ 2005-05-05 18:59             ` Zack Weinberg
  2005-05-05 19:44               ` DJ Delorie
  2005-05-05 20:10               ` Richard Henderson
  0 siblings, 2 replies; 17+ messages in thread
From: Zack Weinberg @ 2005-05-05 18:59 UTC (permalink / raw)
  To: Dave Korn
  Cc: 'Nick Clifton', 'Alan Modra',
	'Ben Elliston',
	binutils

"Dave Korn" <dave.korn@artimi.com> writes:

>> I'm not just bringing up the indirect function call penalty as a
>> hypothetical, though; over in GCC we got measurable speedups by
>> switching back to a custom hash table (libcpp/symtab.c) for
>> identifier lookups.
>
> You mentioned this before in the context of iterators, but surely
> passing a function pointer into an iterator routine and getting that
> function called once per hashtable item is no worse than having an
> iterator, calling a function (not through a pointer admittedly) to
> advance it once per hashtable item, and then processing the thing
> inline?  There's still one function call per entry and one block of
> code that loops over all entries calling that function.....

Um, I don't know how iterators got into it.  I'm concerned about the
constant-factor cost of individual lookups.  The lookup routine in
libiberty/hashtab.c does one indirect function call to compute the
hash value, plus at one indirect function call to compare the key to
each candidate found from the hash value.  The lookup routine in
gas/hash.c, by contrast, calculates the hash value inline, and makes
direct function calls to strncmp.

> The complexity, structure, architecture, implementation and
> behaviour of a backtracking parser that can make sense of C++ in no
> way compares to the simple minded "Every line begins with an
> optional label, then has an opcode, then some operands, and might
> end with a comment" style parsing of gas!

Clearly I should have explained my thinking in detail rather than
going for the cheap joke.  Let's try this again.

I haven't spent much, er, okay, *any* time profiling GAS.  I have,
however, spent lots of time profiling cc1 and cc1plus.  Both of them
have parsers which are more complicated than GAS's -- as you say, in
the C++ case, absurdly more complicated.  And yet, despite that, the
lookup function for the identifier hash is *invariably* in the top
three on a flat profile.  That being the case, I find it very likely
that the lookup function for the opcode hash is going to dominate
profiles of GAS performance.

A crude experiment would seem to confirm this - I created a source
file containing 250,000 occurrences of 'mov r0,r1' and compiled it
with a profiled ARM gas (from the tree containing my Thumb-2 work).
Here's the top five:

Each sample counts as 0.01 seconds.
  %   cumulative   self              self     total
 time   seconds   seconds    calls   s/call   s/call  name
 11.47      1.13     1.13       85     0.01     0.02  do_scrub_chars
 11.17      2.23     1.10   751985     0.00     0.00  hash_lookup
  9.14      3.13     0.90        3     0.30     0.30  write_relocs
  8.53      3.97     0.84   250003     0.00     0.00  strncpy
  7.72      4.73     0.76        1     0.76     7.30  read_a_source_file

[The strncpy calls are all from read_a_source_file.]

> So, perhaps a slight bit of work on the libiberty hashtab api, to
> offer a few interfaces that take a) NUL-terminated strings b)
> fixed-size structs and maybe even c) variable sized structs with
> parameters specifying an offset and size within the struct where the
> sizeof the struct can be found, would make you reconsider?

I'd feel better about the interface, but I'd still be concerned about
the constant factors.  And I want support for not-NUL-terminated
strings with length passed in separately, too.

zw

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: use hashtab for pseudo op table
  2005-05-05 17:14         ` Zack Weinberg
  2005-05-05 17:58           ` Dave Korn
@ 2005-05-05 19:20           ` DJ Delorie
  1 sibling, 0 replies; 17+ messages in thread
From: DJ Delorie @ 2005-05-05 19:20 UTC (permalink / raw)
  To: zack; +Cc: nickc, amodra, bje, binutils


> The auto-resizing certainly would be nice to have.

http://sourceware.org/ml/binutils/2004-06/msg00136.html

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: use hashtab for pseudo op table
  2005-05-05 18:59             ` Zack Weinberg
@ 2005-05-05 19:44               ` DJ Delorie
  2005-05-05 20:10               ` Richard Henderson
  1 sibling, 0 replies; 17+ messages in thread
From: DJ Delorie @ 2005-05-05 19:44 UTC (permalink / raw)
  To: zack; +Cc: binutils

> I'd feel better about the interface, but I'd still be concerned
> about the constant factors.  And I want support for
> not-NUL-terminated strings with length passed in separately, too.

Perhaps we could hard-type the hash tables when we create them, and
have the hashtab internals "just know" about a few popular kinds of
hashes?  Then, when we create those types of hash tables, we can
optimize them better.

The default type would use callbacks, but specific types for
NUL-terminated strings, length/memory pairs, and whatnot could be
added too.  We just have to be careful not to break the existing API
when we change things.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: use hashtab for pseudo op table
  2005-05-05 18:59             ` Zack Weinberg
  2005-05-05 19:44               ` DJ Delorie
@ 2005-05-05 20:10               ` Richard Henderson
  2005-05-06  6:17                 ` Zack Weinberg
  1 sibling, 1 reply; 17+ messages in thread
From: Richard Henderson @ 2005-05-05 20:10 UTC (permalink / raw)
  To: Zack Weinberg
  Cc: Dave Korn, 'Nick Clifton', 'Alan Modra',
	'Ben Elliston',
	binutils

On Thu, May 05, 2005 at 11:58:06AM -0700, Zack Weinberg wrote:
> > So, perhaps a slight bit of work on the libiberty hashtab api, to
> > offer a few interfaces that take a) NUL-terminated strings b)
> > fixed-size structs and maybe even c) variable sized structs with
> > parameters specifying an offset and size within the struct where the
> > sizeof the struct can be found, would make you reconsider?
> 
> I'd feel better about the interface, but I'd still be concerned about
> the constant factors.  And I want support for not-NUL-terminated
> strings with length passed in separately, too.

The libiberty hash table doesn't need any changes to support this.
The comparison function is asymmetric.  It's therefore possible to
compare two completely different items.

For instance:

struct opcode
{
  const char *name;
  // other stuff
};

struct opcode_key
{
  const char *start;
  size_t len;
};

static int
eq_opcode_name (const void *xelt, const void *xkey)
{
  const struct opcode *elt = (const struct opcode *) xelt;
  const struct opcode_key *key = (const struct opcode_key *) xkey;

  if (strncmp (elt->name, key->start, key->len) == 0)
    return elt->name[key->len] == '\0';
  else
    return 0;
}

const struct opcode *
find_opcode (const char *start, size_t len)
{
  struct opcode_key key;
  hashval_t hash;

  key.start = name;
  key.len = len;
  hash = // something like htab_hash_string, but with length

  return htab_find_with_hash (htab, &key, hash);
}


That said, I agree that the overhead of the indirection in the libiberty
version will probably be noticable.


r~

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: use hashtab for pseudo op table
  2005-05-05 20:10               ` Richard Henderson
@ 2005-05-06  6:17                 ` Zack Weinberg
  2005-05-06  6:44                   ` Richard Henderson
  0 siblings, 1 reply; 17+ messages in thread
From: Zack Weinberg @ 2005-05-06  6:17 UTC (permalink / raw)
  To: Richard Henderson
  Cc: Dave Korn, 'Nick Clifton', 'Alan Modra',
	'Ben Elliston',
	binutils

Richard Henderson <rth@redhat.com> writes:

> On Thu, May 05, 2005 at 11:58:06AM -0700, Zack Weinberg wrote:
>> > So, perhaps a slight bit of work on the libiberty hashtab api, to
>> > offer a few interfaces that take a) NUL-terminated strings b)
>> > fixed-size structs and maybe even c) variable sized structs with
>> > parameters specifying an offset and size within the struct where the
>> > sizeof the struct can be found, would make you reconsider?
>> 
>> I'd feel better about the interface, but I'd still be concerned about
>> the constant factors.  And I want support for not-NUL-terminated
>> strings with length passed in separately, too.
>
> The libiberty hash table doesn't need any changes to support this.
> The comparison function is asymmetric.  It's therefore possible to
> compare two completely different items.

Won't horrible things happen when it tries to resize the table, if you
do that?

zw

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: use hashtab for pseudo op table
  2005-05-06  6:17                 ` Zack Weinberg
@ 2005-05-06  6:44                   ` Richard Henderson
  2005-05-06  7:36                     ` Zack Weinberg
  0 siblings, 1 reply; 17+ messages in thread
From: Richard Henderson @ 2005-05-06  6:44 UTC (permalink / raw)
  To: Zack Weinberg
  Cc: Dave Korn, 'Nick Clifton', 'Alan Modra',
	'Ben Elliston',
	binutils

On Thu, May 05, 2005 at 11:16:19PM -0700, Zack Weinberg wrote:
> Won't horrible things happen when it tries to resize the table, if you
> do that?

No, there are two hash functions.  The one you give at hash table 
creation knows about the object stored in.  That's why you have to
call the "*_with_hash" function directly.

We do this in several places in gcc itself.


r~

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: use hashtab for pseudo op table
  2005-05-06  6:44                   ` Richard Henderson
@ 2005-05-06  7:36                     ` Zack Weinberg
  0 siblings, 0 replies; 17+ messages in thread
From: Zack Weinberg @ 2005-05-06  7:36 UTC (permalink / raw)
  To: Richard Henderson
  Cc: Dave Korn, 'Nick Clifton', 'Alan Modra',
	'Ben Elliston',
	binutils

Richard Henderson <rth@redhat.com> writes:

> On Thu, May 05, 2005 at 11:16:19PM -0700, Zack Weinberg wrote:
>> Won't horrible things happen when it tries to resize the table, if you
>> do that?
>
> No, there are two hash functions.  The one you give at hash table 
> creation knows about the object stored in.  That's why you have to
> call the "*_with_hash" function directly.

Oh, right.  That trick doesn't make me very happy, because someone's
bound to forget.  In the GAS context, though, it might be safe enough
if it were all hidden behind wrappers.

zw

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: PATCH: use hashtab for pseudo op table
  2005-05-05 11:46       ` Nick Clifton
  2005-05-05 12:37         ` Alan Modra
  2005-05-05 17:14         ` Zack Weinberg
@ 2005-05-17 13:19         ` Nick Clifton
  2 siblings, 0 replies; 17+ messages in thread
From: Nick Clifton @ 2005-05-17 13:19 UTC (permalink / raw)
  To: Ben Elliston; +Cc: binutils

Hi Ben,

   [Sorry about the delay in summarizing this]

   I think that the result of the discussion on this topic is that a 
patch to remove the hashing code in gas/hash.c would be acceptable if:

   * It switched gas over to using the code in bfd/hash.c or, even 
better, switched over both bfd and gas to using the code in libiberty.

and

   * It does not introduce any regressions.  (Duh :)

   * It does not introduce a performance penalty to the assembler or linker.

Ensuring this last item might take a bit of work in order to find a few 
big applications and time them, but I think that it would be worthwhile. 
  We already have working hash functions for the assembler and linker 
and I do not want to replace them with something that does not work or 
is less efficient.

Cheers
   Nick

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2005-05-17 13:14 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-04-29  8:21 PATCH: use hashtab for pseudo op table Ben Elliston
2005-04-29  8:52 ` Zack Weinberg
2005-05-05 10:00   ` Nick Clifton
2005-05-05 10:57     ` Alan Modra
2005-05-05 11:46       ` Nick Clifton
2005-05-05 12:37         ` Alan Modra
2005-05-05 17:14         ` Zack Weinberg
2005-05-05 17:58           ` Dave Korn
2005-05-05 18:59             ` Zack Weinberg
2005-05-05 19:44               ` DJ Delorie
2005-05-05 20:10               ` Richard Henderson
2005-05-06  6:17                 ` Zack Weinberg
2005-05-06  6:44                   ` Richard Henderson
2005-05-06  7:36                     ` Zack Weinberg
2005-05-05 19:20           ` DJ Delorie
2005-05-17 13:19         ` Nick Clifton
2005-05-05 15:24       ` H. J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).