public inbox for dwz@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] Use xxHash hashing algorithm.
@ 2022-01-05  8:17 Martin Liška
  2022-03-31 11:29 ` Martin Liška
  2022-06-25 19:44 ` Mark Wielaard
  0 siblings, 2 replies; 13+ messages in thread
From: Martin Liška @ 2022-01-05  8:17 UTC (permalink / raw)
  To: dwz

The algorithm provides the following speed-up with -O2:

- 1/5: sysdig (60M)
dwz_O2                : 9.7
dwz_O2_xxhash         : 8.5 (87.7%)
- 2/5: rtags (58M)
dwz_O2                : 17.6
dwz_O2_xxhash         : 15.8 (89.5%)
- 3/5: libetonyek (91M)
dwz_O2                : 10.8
dwz_O2_xxhash         : 9.4 (86.7%)
- 4/5: krita (685M)
dwz_O2                : 96.0
dwz_O2_xxhash         : 85.6 (89.1%)
- 5/5: gcc (1.2G)
dwz_O2                : 58.6
dwz_O2_xxhash         : 54.1 (92.4%)
---
  Makefile  |   4 +-
  dwz.c     | 103 +++++++++++++++++++++++-----------------
  hashtab.c | 139 ------------------------------------------------------
  hashtab.h |   6 ++-
  4 files changed, 65 insertions(+), 187 deletions(-)

diff --git a/Makefile b/Makefile
index 9394ef4..89546c2 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ CFLAGS = -O2 -g
  DWZ_VERSION := $(shell cat $(srcdir)/VERSION)
  CFLAGS_VERSION = -DDWZ_VERSION='"$(DWZ_VERSION)"'
  CFLAGS_COPYRIGHT = $(shell cat $(srcdir)/COPYRIGHT_YEARS)
-CFLAGS_COMMON = -Wall -W -D_FILE_OFFSET_BITS=64
+CFLAGS_COMMON = -Wall -W -D_FILE_OFFSET_BITS=64 -DXXH_INLINE_ALL=1
  override CFLAGS += $(CFLAGS_COMMON) $(CFLAGS_VERSION) $(CFLAGS_COPYRIGHT)
  
  prefix = /usr
@@ -17,7 +17,7 @@ bindir = $(exec_prefix)/bin
  datarootdir = $(prefix)/share
  mandir = $(datarootdir)/man
  OBJECTS = args.o dwz.o hashtab.o pool.o sha1.o dwarfnames.o
-LIBS=-lelf
+LIBS=-lelf -lxxhash
  dwz: $(OBJECTS)
  	$(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
  args.o: native.o
diff --git a/dwz.c b/dwz.c
index a3b289f..cc68f4a 100644
--- a/dwz.c
+++ b/dwz.c
@@ -112,6 +112,21 @@
  # define NT_GNU_BUILD_ID 3
  #endif
  
+/* xxHash state object.  */
+static XXH64_state_t state;
+
+/* Clear xxHash state to zero.  */
+#define hash_init_state() XXH64_reset(&state, 0)
+
+/* Update hash STATE with VALUE.  */
+#define hash_update_state(value) XXH64_update(&state, &value, sizeof value)
+
+/* Update hash STATE with OBJECT that has a provided SIZE.  */
+#define hash_update_state_object(object, size) XXH64_update(&state, object, size)
+
+/* Get digest once we are done with a state.  */
+#define hash_digest() XXH64_digest(&state)
+
  /* Print memory amount M (in kb) in both exact and human readable, like so:
     1382508 (1.3G).  */
  static void
@@ -1187,16 +1202,18 @@ compute_abbrev_hash (struct abbrev_tag *t)
  {
    unsigned int i;
  
-  t->hash = iterative_hash_object (t->tag, 0);
-  t->hash = iterative_hash_object (t->nattr, t->hash);
-  t->hash = iterative_hash_object (t->children, t->hash);
+  hash_init_state ();
+  hash_update_state (t->tag);
+  hash_update_state (t->nattr);
+  hash_update_state (t->children);
    for (i = 0; i < t->nattr; i++)
      {
-      t->hash = iterative_hash_object (t->attr[i].attr, t->hash);
-      t->hash = iterative_hash_object (t->attr[i].form, t->hash);
+      hash_update_state (t->attr[i].attr);
+      hash_update_state (t->attr[i].form);
        if (t->attr[i].form == DW_FORM_implicit_const)
-	t->hash = iterative_hash_object (t->values[i], t->hash);
+	hash_update_state (t->values[i]);
      }
+  t->hash = hash_digest ();
  }
  
  /* Maximum number of attributes in a DIE.  */
@@ -3402,22 +3419,17 @@ checksum_die (DSO *dso, dw_cu_ref cu, dw_die_ref top_die, dw_die_ref die)
  		  struct dw_file *cu_file = &cu->cu_files[value - 1];
  		  size_t file_len = strlen (cu_file->file);
  		  s = t->attr[i].attr;
-		  die->u.p1.die_hash
-		    = iterative_hash_object (s, die->u.p1.die_hash);
-		  die->u.p1.die_hash
-		    = iterative_hash_object (cu_file->time,
-					     die->u.p1.die_hash);
-		  die->u.p1.die_hash
-		    = iterative_hash_object (cu_file->size,
-					     die->u.p1.die_hash);
-		  die->u.p1.die_hash
-		    = iterative_hash (cu_file->file, file_len + 1,
-				      die->u.p1.die_hash);
+		  hash_init_state ();
+		  hash_update_state (die->u.p1.die_hash);
+		  hash_update_state (s);
+		  hash_update_state (cu_file->time);
+		  hash_update_state (cu_file->size);
+		  hash_update_state_object (cu_file->file, file_len + 1);
  		  if (cu_file->dir)
-		    die->u.p1.die_hash
-		      = iterative_hash (cu_file->dir,
-					strlen (cu_file->dir) + 1,
-					die->u.p1.die_hash);
+		    {
+		      hash_update_state_object (cu_file->dir,
+						strlen (cu_file->dir) + 1);
+		    }
  		  /* Ignore DW_AT_comp_dir for DW_AT_*_file <built-in>
  		     etc. if immediately followed by DW_AT_*_line 0.  */
  		  else if (cu_file->file_angle_brackets_encapsulated_no_slash
@@ -3427,7 +3439,13 @@ checksum_die (DSO *dso, dw_cu_ref cu, dw_die_ref top_die, dw_die_ref die)
  				  ? DW_AT_decl_line : DW_AT_call_line)
  			   && t->attr[i + 1].form == DW_FORM_data1
  			   && *new_ptr == 0)
-		    break;
+		    {
+		      die->u.p1.die_hash = hash_digest ();
+		      break;
+		    }
+
+		  die->u.p1.die_hash = hash_digest ();
+
  		  if (cu->cu_comp_dir
  		      && (cu_file->dir ? cu_file->dir[0]
  				       : cu_file->file[0]) != '/')
@@ -5541,7 +5559,7 @@ strp_eq2 (const void *p, const void *q)
  static hashval_t
  strp_hash3 (const void *p)
  {
-  return iterative_hash (p, strlen (p), 0);
+  return hash (p, strlen (p));
  }
  
  /* Corresponding equality function in strp_htab.  */
@@ -5571,7 +5589,7 @@ note_strp_offset (unsigned int off)
  
        p = debug_sections[DEBUG_STR].data + off;
        len = strlen ((char *) p);
-      hash = iterative_hash (p, len, 0);
+      hash = hash (p, len);
        if (alt_strp_htab)
  	{
  	  if (htab_find_with_hash (alt_strp_htab, p, hash))
@@ -5653,7 +5671,7 @@ lookup_strp_offset (unsigned int off)
  
        p = debug_sections[DEBUG_STR].data + off;
        len = strlen ((char *) p);
-      hash = iterative_hash (p, len, 0);
+      hash = hash (p, len);
        if (alt_strp_htab)
  	{
  	  unsigned char *q = (unsigned char *)
@@ -5694,7 +5712,7 @@ note_strp_offset2 (unsigned int off)
  	{
  	  p = debug_sections[DEBUG_STR].data + off;
  	  len = strlen ((char *) p);
-	  hash = iterative_hash (p, len, 0);
+	  hash = hash (p, len);
  	  if (htab_find_with_hash (alt_strp_htab, p, hash))
  	    return dwarf_5 ? DW_FORM_strp_sup : DW_FORM_GNU_strp_alt;
  	}
@@ -5704,7 +5722,7 @@ note_strp_offset2 (unsigned int off)
      return DW_FORM_strp;
    p = debug_sections[DEBUG_STR].data + off;
    q = (unsigned char *) strchr ((char *) p, '\0');
-  hash = iterative_hash (p, q - p, 0);
+  hash = hash (p, q - p);
    se.off = off;
    se.new_off = hash & ~1U;
    struct strp_entry *s = (struct strp_entry *)
@@ -5860,7 +5878,7 @@ finalize_strp (bool build_tail_offset_list)
  
  	memcpy (p, debug_sections[DEBUG_STR].data + arr[i]->off, len);
  	slot = htab_find_slot_with_hash (strp_htab, p,
-					 iterative_hash (p, len - 1, 0),
+					 hash (p, len - 1),
  					 INSERT);
  	if (slot == NULL)
  	  dwz_oom ();
@@ -5876,7 +5894,7 @@ finalize_strp (bool build_tail_offset_list)
  	      if (tail_offset_list != NULL)
  		tail_offset_list[k++] = arr[j]->new_off;
  	      slot = htab_find_slot_with_hash (strp_htab, q,
-					       iterative_hash (q, l - 1, 0),
+					       hash (q, l - 1),
  					       INSERT);
  	      if (slot == NULL)
  		dwz_oom ();
@@ -9849,17 +9867,16 @@ line_htab_lookup (dw_cu_ref cu, unsigned int id)
  {
    void **slot;
    struct line_entry le;
-  hashval_t h;
-
    if (id == 0)
      return 0;
    assert (id <= cu->cu_nfiles);
    le.file = &cu->cu_files[id - 1];
-  h = iterative_hash_object (le.file->time, 0);
-  h = iterative_hash_object (le.file->size, h);
-  h = iterative_hash (le.file->file, strlen (le.file->file) + 1, h);
+  hash_init_state ();
+  hash_update_state (le.file->time);
+  hash_update_state (le.file->size);
+  hash_update_state_object (le.file->file, strlen (le.file->file) + 1);
    if (le.file->dir)
-    h = iterative_hash (le.file->dir, strlen (le.file->dir) + 1, h);
+    hash_update_state_object (le.file->dir, strlen (le.file->dir) + 1);
    if (line_htab == NULL)
      {
        line_htab = htab_try_create (50, line_hash, line_eq, NULL);
@@ -9867,15 +9884,15 @@ line_htab_lookup (dw_cu_ref cu, unsigned int id)
  	dwz_oom ();
        max_line_id = 1;
      }
-  le.hash = h;
-  slot = htab_find_slot_with_hash (line_htab, &le, h, INSERT);
+  le.hash = hash_digest ();;
+  slot = htab_find_slot_with_hash (line_htab, &le, le.hash, INSERT);
    if (slot == NULL)
      dwz_oom ();
    if (*slot == NULL)
      {
        struct line_entry *l = pool_alloc (line_entry, sizeof (*l));
        l->file = le.file;
-      l->hash = h;
+      l->hash = le.hash;
        l->new_id = max_line_id++;
        *slot = (void *) l;
        return l->new_id;
@@ -10347,7 +10364,7 @@ handle_macro (void)
  		  /* This should only happen if there were multiple
  		     same transparent units within a single object file.  */
  		  && htab_find_with_hash (strp_htab, p,
-					  iterative_hash (p, len, 0)) == NULL)
+					  hash (p, len)) == NULL)
  		can_share = false;
  	      s = ptr;
  	      break;
@@ -15756,7 +15773,7 @@ optimize_multifile (unsigned int *die_count)
  	  hashval_t hash;
  
  	  q = (unsigned char *) strchr ((char *) p, '\0');
-	  hash = iterative_hash (p, q - p, 0);
+	  hash = hash (p, q - p);
  	  se.off = p - debug_sections[DEBUG_STR].data;
  	  se.new_off = hash & ~1U;
  	  slot = htab_find_slot_with_hash (strp_htab, &se, se.new_off, INSERT);
@@ -16076,8 +16093,7 @@ read_multifile (int fd, unsigned int die_count)
  	    {
  	      q = (unsigned char *) strchr ((char *) p, '\0') + 1;
  	      slot = htab_find_slot_with_hash (strp_htab, p,
-					       iterative_hash (p, q - p - 1,
-							       0), INSERT);
+					       hash (p, q - p - 1), INSERT);
  	      if (slot == NULL)
  		dwz_oom ();
  	      assert (*slot == NULL);
@@ -16090,8 +16106,7 @@ read_multifile (int fd, unsigned int die_count)
  		  p = debug_sections[DEBUG_STR].data + *pi;
  		  q = (unsigned char *) strchr ((char *) p, '\0');
  		  slot = htab_find_slot_with_hash (strp_htab, p,
-						   iterative_hash (p, q - p,
-								   0), INSERT);
+						   hash (p, q - p), INSERT);
  		  if (slot == NULL)
  		    dwz_oom ();
  		  assert (*slot == NULL);
diff --git a/hashtab.c b/hashtab.c
index 41eab30..a9b9d13 100644
--- a/hashtab.c
+++ b/hashtab.c
@@ -626,142 +626,3 @@ htab_restore (htab, name, restorefn)
    fclose (f);
  }
  #endif
-
-/* DERIVED FROM:
---------------------------------------------------------------------
-lookup2.c, by Bob Jenkins, December 1996, Public Domain.
-hash(), hash2(), hash3, and mix() are externally useful functions.
-Routines to test the hash are included if SELF_TEST is defined.
-You can use this free for any purpose.  It has no warranty.
---------------------------------------------------------------------
-*/
-
-/*
---------------------------------------------------------------------
-mix -- mix 3 32-bit values reversibly.
-For every delta with one or two bit set, and the deltas of all three
-  high bits or all three low bits, whether the original value of a,b,c
-  is almost all zero or is uniformly distributed,
-* If mix() is run forward or backward, at least 32 bits in a,b,c
-  have at least 1/4 probability of changing.
-* If mix() is run forward, every bit of c will change between 1/3 and
-  2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
-mix() was built out of 36 single-cycle latency instructions in a
-  structure that could supported 2x parallelism, like so:
-      a -= b;
-      a -= c; x = (c>>13);
-      b -= c; a ^= x;
-      b -= a; x = (a<<8);
-      c -= a; b ^= x;
-      c -= b; x = (b>>13);
-      ...
-  Unfortunately, superscalar Pentiums and Sparcs can't take advantage
-  of that parallelism.  They've also turned some of those single-cycle
-  latency instructions into multi-cycle latency instructions.  Still,
-  this is the fastest good hash I could find.  There were about 2^^68
-  to choose from.  I only looked at a billion or so.
---------------------------------------------------------------------
-*/
-/* same, but slower, works on systems that might have 8 byte hashval_t's */
-#define mix(a,b,c) \
-{ \
-  a -= b; a -= c; a ^= (c>>13); \
-  b -= c; b -= a; b ^= (a<< 8); \
-  c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
-  a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
-  b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
-  c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
-  a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
-  b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
-  c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
-}
-
-/*
---------------------------------------------------------------------
-hash() -- hash a variable-length key into a 32-bit value
-  k     : the key (the unaligned variable-length array of bytes)
-  len   : the length of the key, counting by bytes
-  level : can be any 4-byte value
-Returns a 32-bit value.  Every bit of the key affects every bit of
-the return value.  Every 1-bit and 2-bit delta achieves avalanche.
-About 36+6len instructions.
-
-The best hash table sizes are powers of 2.  There is no need to do
-mod a prime (mod is sooo slow!).  If you need less than 32 bits,
-use a bitmask.  For example, if you need only 10 bits, do
-  h = (h & hashmask(10));
-In which case, the hash table should have hashsize(10) elements.
-
-If you are hashing n strings (ub1 **)k, do it like this:
-  for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
-
-By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
-code any way you wish, private, educational, or commercial.  It's free.
-
-See http://burtleburtle.net/bob/hash/evahash.html
-Use for hash table lookup, or anything where one collision in 2^32 is
-acceptable.  Do NOT use for cryptographic purposes.
---------------------------------------------------------------------
-*/
-
-hashval_t
-iterative_hash (const void *k_in /* the key */,
-                register size_t  length /* the length of the key */,
-                register hashval_t initval /* the previous hash, or
-                                              an arbitrary value */)
-{
-  register const unsigned char *k = (const unsigned char *)k_in;
-  register hashval_t a,b,c,len;
-
-  /* Set up the internal state */
-  len = length;
-  a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
-  c = initval;           /* the previous hash value */
-
-  /*---------------------------------------- handle most of the key */
-#ifndef WORDS_BIGENDIAN
-  /* On a little-endian machine, if the data is 4-byte aligned we can hash
-     by word for better speed.  This gives nondeterministic results on
-     big-endian machines.  */
-  if (sizeof (hashval_t) == 4 && (((size_t)k)&3) == 0)
-    while (len >= 12)    /* aligned */
-      {
-	a += *(hashval_t *)(k+0);
-	b += *(hashval_t *)(k+4);
-	c += *(hashval_t *)(k+8);
-	mix(a,b,c);
-	k += 12; len -= 12;
-      }
-  else /* unaligned */
-#endif
-    while (len >= 12)
-      {
-	a += (k[0] +((hashval_t)k[1]<<8) +((hashval_t)k[2]<<16) +((hashval_t)k[3]<<24));
-	b += (k[4] +((hashval_t)k[5]<<8) +((hashval_t)k[6]<<16) +((hashval_t)k[7]<<24));
-	c += (k[8] +((hashval_t)k[9]<<8) +((hashval_t)k[10]<<16)+((hashval_t)k[11]<<24));
-	mix(a,b,c);
-	k += 12; len -= 12;
-      }
-
-  /*------------------------------------- handle the last 11 bytes */
-  c += length;
-  switch(len)              /* all the case statements fall through */
-    {
-    case 11: c+=((hashval_t)k[10]<<24);	/* fall through */
-    case 10: c+=((hashval_t)k[9]<<16);	/* fall through */
-    case 9 : c+=((hashval_t)k[8]<<8);	/* fall through */
-      /* the first byte of c is reserved for the length */
-    case 8 : b+=((hashval_t)k[7]<<24);	/* fall through */
-    case 7 : b+=((hashval_t)k[6]<<16);	/* fall through */
-    case 6 : b+=((hashval_t)k[5]<<8);	/* fall through */
-    case 5 : b+=k[4];			/* fall through */
-    case 4 : a+=((hashval_t)k[3]<<24);	/* fall through */
-    case 3 : a+=((hashval_t)k[2]<<16);	/* fall through */
-    case 2 : a+=((hashval_t)k[1]<<8);	/* fall through */
-    case 1 : a+=k[0];
-      /* case 0: nothing left to add */
-    }
-  mix(a,b,c);
-  /*-------------------------------------------- report the result */
-  return c;
-}
diff --git a/hashtab.h b/hashtab.h
index cb3da01..509e627 100644
--- a/hashtab.h
+++ b/hashtab.h
@@ -153,9 +153,11 @@ extern void	htab_restore (htab_t, const char *, htab_restorefn);
  
  #endif
  
-/* An iterative hash function for arbitrary data.  */
-extern hashval_t iterative_hash (const void *, size_t, hashval_t);
+#include <xxhash.h>
+
  /* Shorthand for hashing something with an intrinsic size.  */
+#define hash(IN,LEN) XXH64(IN, LEN, 0)
+#define iterative_hash(IN,LEN,INIT) XXH64(IN, LEN, INIT)
  #define iterative_hash_object(OB,INIT) iterative_hash (&OB, sizeof (OB), INIT)
  
  #ifdef __cplusplus
-- 
2.34.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2022-07-11  7:48 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-05  8:17 [PATCH] Use xxHash hashing algorithm Martin Liška
2022-03-31 11:29 ` Martin Liška
2022-04-28 10:06   ` Martin Liška
2022-06-20 13:29     ` Martin Liška
2022-06-25 19:44 ` Mark Wielaard
2022-06-27 13:41   ` Martin Liška
2022-06-30 16:13     ` Mark Wielaard
2022-07-04 13:31       ` Martin Liška
2022-07-04 15:40         ` Mark Wielaard
2022-07-07 12:29           ` Martin Liška
2022-07-07 13:36             ` xxhash devel on buildbot workers (Was: [PATCH] Use xxHash hashing algorithm) Mark Wielaard
2022-07-07 15:04               ` xxhash devel on buildbot workers Thomas Fitzsimmons
2022-07-11  7:48               ` xxhash devel on buildbot workers (Was: [PATCH] Use xxHash hashing algorithm) Dan Horák

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).