public inbox for dwz@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] Use xxHash hashing algorithm.
@ 2022-01-05  8:17 Martin Liška
  2022-03-31 11:29 ` Martin Liška
  2022-06-25 19:44 ` Mark Wielaard
  0 siblings, 2 replies; 13+ messages in thread
From: Martin Liška @ 2022-01-05  8:17 UTC (permalink / raw)
  To: dwz

The algorithm provides the following speed-up with -O2:

- 1/5: sysdig (60M)
dwz_O2                : 9.7
dwz_O2_xxhash         : 8.5 (87.7%)
- 2/5: rtags (58M)
dwz_O2                : 17.6
dwz_O2_xxhash         : 15.8 (89.5%)
- 3/5: libetonyek (91M)
dwz_O2                : 10.8
dwz_O2_xxhash         : 9.4 (86.7%)
- 4/5: krita (685M)
dwz_O2                : 96.0
dwz_O2_xxhash         : 85.6 (89.1%)
- 5/5: gcc (1.2G)
dwz_O2                : 58.6
dwz_O2_xxhash         : 54.1 (92.4%)
---
  Makefile  |   4 +-
  dwz.c     | 103 +++++++++++++++++++++++-----------------
  hashtab.c | 139 ------------------------------------------------------
  hashtab.h |   6 ++-
  4 files changed, 65 insertions(+), 187 deletions(-)

diff --git a/Makefile b/Makefile
index 9394ef4..89546c2 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ CFLAGS = -O2 -g
  DWZ_VERSION := $(shell cat $(srcdir)/VERSION)
  CFLAGS_VERSION = -DDWZ_VERSION='"$(DWZ_VERSION)"'
  CFLAGS_COPYRIGHT = $(shell cat $(srcdir)/COPYRIGHT_YEARS)
-CFLAGS_COMMON = -Wall -W -D_FILE_OFFSET_BITS=64
+CFLAGS_COMMON = -Wall -W -D_FILE_OFFSET_BITS=64 -DXXH_INLINE_ALL=1
  override CFLAGS += $(CFLAGS_COMMON) $(CFLAGS_VERSION) $(CFLAGS_COPYRIGHT)
  
  prefix = /usr
@@ -17,7 +17,7 @@ bindir = $(exec_prefix)/bin
  datarootdir = $(prefix)/share
  mandir = $(datarootdir)/man
  OBJECTS = args.o dwz.o hashtab.o pool.o sha1.o dwarfnames.o
-LIBS=-lelf
+LIBS=-lelf -lxxhash
  dwz: $(OBJECTS)
  	$(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
  args.o: native.o
diff --git a/dwz.c b/dwz.c
index a3b289f..cc68f4a 100644
--- a/dwz.c
+++ b/dwz.c
@@ -112,6 +112,21 @@
  # define NT_GNU_BUILD_ID 3
  #endif
  
+/* xxHash state object.  */
+static XXH64_state_t state;
+
+/* Clear xxHash state to zero.  */
+#define hash_init_state() XXH64_reset(&state, 0)
+
+/* Update hash STATE with VALUE.  */
+#define hash_update_state(value) XXH64_update(&state, &value, sizeof value)
+
+/* Update hash STATE with OBJECT that has a provided SIZE.  */
+#define hash_update_state_object(object, size) XXH64_update(&state, object, size)
+
+/* Get digest once we are done with a state.  */
+#define hash_digest() XXH64_digest(&state)
+
  /* Print memory amount M (in kb) in both exact and human readable, like so:
     1382508 (1.3G).  */
  static void
@@ -1187,16 +1202,18 @@ compute_abbrev_hash (struct abbrev_tag *t)
  {
    unsigned int i;
  
-  t->hash = iterative_hash_object (t->tag, 0);
-  t->hash = iterative_hash_object (t->nattr, t->hash);
-  t->hash = iterative_hash_object (t->children, t->hash);
+  hash_init_state ();
+  hash_update_state (t->tag);
+  hash_update_state (t->nattr);
+  hash_update_state (t->children);
    for (i = 0; i < t->nattr; i++)
      {
-      t->hash = iterative_hash_object (t->attr[i].attr, t->hash);
-      t->hash = iterative_hash_object (t->attr[i].form, t->hash);
+      hash_update_state (t->attr[i].attr);
+      hash_update_state (t->attr[i].form);
        if (t->attr[i].form == DW_FORM_implicit_const)
-	t->hash = iterative_hash_object (t->values[i], t->hash);
+	hash_update_state (t->values[i]);
      }
+  t->hash = hash_digest ();
  }
  
  /* Maximum number of attributes in a DIE.  */
@@ -3402,22 +3419,17 @@ checksum_die (DSO *dso, dw_cu_ref cu, dw_die_ref top_die, dw_die_ref die)
  		  struct dw_file *cu_file = &cu->cu_files[value - 1];
  		  size_t file_len = strlen (cu_file->file);
  		  s = t->attr[i].attr;
-		  die->u.p1.die_hash
-		    = iterative_hash_object (s, die->u.p1.die_hash);
-		  die->u.p1.die_hash
-		    = iterative_hash_object (cu_file->time,
-					     die->u.p1.die_hash);
-		  die->u.p1.die_hash
-		    = iterative_hash_object (cu_file->size,
-					     die->u.p1.die_hash);
-		  die->u.p1.die_hash
-		    = iterative_hash (cu_file->file, file_len + 1,
-				      die->u.p1.die_hash);
+		  hash_init_state ();
+		  hash_update_state (die->u.p1.die_hash);
+		  hash_update_state (s);
+		  hash_update_state (cu_file->time);
+		  hash_update_state (cu_file->size);
+		  hash_update_state_object (cu_file->file, file_len + 1);
  		  if (cu_file->dir)
-		    die->u.p1.die_hash
-		      = iterative_hash (cu_file->dir,
-					strlen (cu_file->dir) + 1,
-					die->u.p1.die_hash);
+		    {
+		      hash_update_state_object (cu_file->dir,
+						strlen (cu_file->dir) + 1);
+		    }
  		  /* Ignore DW_AT_comp_dir for DW_AT_*_file <built-in>
  		     etc. if immediately followed by DW_AT_*_line 0.  */
  		  else if (cu_file->file_angle_brackets_encapsulated_no_slash
@@ -3427,7 +3439,13 @@ checksum_die (DSO *dso, dw_cu_ref cu, dw_die_ref top_die, dw_die_ref die)
  				  ? DW_AT_decl_line : DW_AT_call_line)
  			   && t->attr[i + 1].form == DW_FORM_data1
  			   && *new_ptr == 0)
-		    break;
+		    {
+		      die->u.p1.die_hash = hash_digest ();
+		      break;
+		    }
+
+		  die->u.p1.die_hash = hash_digest ();
+
  		  if (cu->cu_comp_dir
  		      && (cu_file->dir ? cu_file->dir[0]
  				       : cu_file->file[0]) != '/')
@@ -5541,7 +5559,7 @@ strp_eq2 (const void *p, const void *q)
  static hashval_t
  strp_hash3 (const void *p)
  {
-  return iterative_hash (p, strlen (p), 0);
+  return hash (p, strlen (p));
  }
  
  /* Corresponding equality function in strp_htab.  */
@@ -5571,7 +5589,7 @@ note_strp_offset (unsigned int off)
  
        p = debug_sections[DEBUG_STR].data + off;
        len = strlen ((char *) p);
-      hash = iterative_hash (p, len, 0);
+      hash = hash (p, len);
        if (alt_strp_htab)
  	{
  	  if (htab_find_with_hash (alt_strp_htab, p, hash))
@@ -5653,7 +5671,7 @@ lookup_strp_offset (unsigned int off)
  
        p = debug_sections[DEBUG_STR].data + off;
        len = strlen ((char *) p);
-      hash = iterative_hash (p, len, 0);
+      hash = hash (p, len);
        if (alt_strp_htab)
  	{
  	  unsigned char *q = (unsigned char *)
@@ -5694,7 +5712,7 @@ note_strp_offset2 (unsigned int off)
  	{
  	  p = debug_sections[DEBUG_STR].data + off;
  	  len = strlen ((char *) p);
-	  hash = iterative_hash (p, len, 0);
+	  hash = hash (p, len);
  	  if (htab_find_with_hash (alt_strp_htab, p, hash))
  	    return dwarf_5 ? DW_FORM_strp_sup : DW_FORM_GNU_strp_alt;
  	}
@@ -5704,7 +5722,7 @@ note_strp_offset2 (unsigned int off)
      return DW_FORM_strp;
    p = debug_sections[DEBUG_STR].data + off;
    q = (unsigned char *) strchr ((char *) p, '\0');
-  hash = iterative_hash (p, q - p, 0);
+  hash = hash (p, q - p);
    se.off = off;
    se.new_off = hash & ~1U;
    struct strp_entry *s = (struct strp_entry *)
@@ -5860,7 +5878,7 @@ finalize_strp (bool build_tail_offset_list)
  
  	memcpy (p, debug_sections[DEBUG_STR].data + arr[i]->off, len);
  	slot = htab_find_slot_with_hash (strp_htab, p,
-					 iterative_hash (p, len - 1, 0),
+					 hash (p, len - 1),
  					 INSERT);
  	if (slot == NULL)
  	  dwz_oom ();
@@ -5876,7 +5894,7 @@ finalize_strp (bool build_tail_offset_list)
  	      if (tail_offset_list != NULL)
  		tail_offset_list[k++] = arr[j]->new_off;
  	      slot = htab_find_slot_with_hash (strp_htab, q,
-					       iterative_hash (q, l - 1, 0),
+					       hash (q, l - 1),
  					       INSERT);
  	      if (slot == NULL)
  		dwz_oom ();
@@ -9849,17 +9867,16 @@ line_htab_lookup (dw_cu_ref cu, unsigned int id)
  {
    void **slot;
    struct line_entry le;
-  hashval_t h;
-
    if (id == 0)
      return 0;
    assert (id <= cu->cu_nfiles);
    le.file = &cu->cu_files[id - 1];
-  h = iterative_hash_object (le.file->time, 0);
-  h = iterative_hash_object (le.file->size, h);
-  h = iterative_hash (le.file->file, strlen (le.file->file) + 1, h);
+  hash_init_state ();
+  hash_update_state (le.file->time);
+  hash_update_state (le.file->size);
+  hash_update_state_object (le.file->file, strlen (le.file->file) + 1);
    if (le.file->dir)
-    h = iterative_hash (le.file->dir, strlen (le.file->dir) + 1, h);
+    hash_update_state_object (le.file->dir, strlen (le.file->dir) + 1);
    if (line_htab == NULL)
      {
        line_htab = htab_try_create (50, line_hash, line_eq, NULL);
@@ -9867,15 +9884,15 @@ line_htab_lookup (dw_cu_ref cu, unsigned int id)
  	dwz_oom ();
        max_line_id = 1;
      }
-  le.hash = h;
-  slot = htab_find_slot_with_hash (line_htab, &le, h, INSERT);
+  le.hash = hash_digest ();;
+  slot = htab_find_slot_with_hash (line_htab, &le, le.hash, INSERT);
    if (slot == NULL)
      dwz_oom ();
    if (*slot == NULL)
      {
        struct line_entry *l = pool_alloc (line_entry, sizeof (*l));
        l->file = le.file;
-      l->hash = h;
+      l->hash = le.hash;
        l->new_id = max_line_id++;
        *slot = (void *) l;
        return l->new_id;
@@ -10347,7 +10364,7 @@ handle_macro (void)
  		  /* This should only happen if there were multiple
  		     same transparent units within a single object file.  */
  		  && htab_find_with_hash (strp_htab, p,
-					  iterative_hash (p, len, 0)) == NULL)
+					  hash (p, len)) == NULL)
  		can_share = false;
  	      s = ptr;
  	      break;
@@ -15756,7 +15773,7 @@ optimize_multifile (unsigned int *die_count)
  	  hashval_t hash;
  
  	  q = (unsigned char *) strchr ((char *) p, '\0');
-	  hash = iterative_hash (p, q - p, 0);
+	  hash = hash (p, q - p);
  	  se.off = p - debug_sections[DEBUG_STR].data;
  	  se.new_off = hash & ~1U;
  	  slot = htab_find_slot_with_hash (strp_htab, &se, se.new_off, INSERT);
@@ -16076,8 +16093,7 @@ read_multifile (int fd, unsigned int die_count)
  	    {
  	      q = (unsigned char *) strchr ((char *) p, '\0') + 1;
  	      slot = htab_find_slot_with_hash (strp_htab, p,
-					       iterative_hash (p, q - p - 1,
-							       0), INSERT);
+					       hash (p, q - p - 1), INSERT);
  	      if (slot == NULL)
  		dwz_oom ();
  	      assert (*slot == NULL);
@@ -16090,8 +16106,7 @@ read_multifile (int fd, unsigned int die_count)
  		  p = debug_sections[DEBUG_STR].data + *pi;
  		  q = (unsigned char *) strchr ((char *) p, '\0');
  		  slot = htab_find_slot_with_hash (strp_htab, p,
-						   iterative_hash (p, q - p,
-								   0), INSERT);
+						   hash (p, q - p), INSERT);
  		  if (slot == NULL)
  		    dwz_oom ();
  		  assert (*slot == NULL);
diff --git a/hashtab.c b/hashtab.c
index 41eab30..a9b9d13 100644
--- a/hashtab.c
+++ b/hashtab.c
@@ -626,142 +626,3 @@ htab_restore (htab, name, restorefn)
    fclose (f);
  }
  #endif
-
-/* DERIVED FROM:
---------------------------------------------------------------------
-lookup2.c, by Bob Jenkins, December 1996, Public Domain.
-hash(), hash2(), hash3, and mix() are externally useful functions.
-Routines to test the hash are included if SELF_TEST is defined.
-You can use this free for any purpose.  It has no warranty.
---------------------------------------------------------------------
-*/
-
-/*
---------------------------------------------------------------------
-mix -- mix 3 32-bit values reversibly.
-For every delta with one or two bit set, and the deltas of all three
-  high bits or all three low bits, whether the original value of a,b,c
-  is almost all zero or is uniformly distributed,
-* If mix() is run forward or backward, at least 32 bits in a,b,c
-  have at least 1/4 probability of changing.
-* If mix() is run forward, every bit of c will change between 1/3 and
-  2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
-mix() was built out of 36 single-cycle latency instructions in a
-  structure that could supported 2x parallelism, like so:
-      a -= b;
-      a -= c; x = (c>>13);
-      b -= c; a ^= x;
-      b -= a; x = (a<<8);
-      c -= a; b ^= x;
-      c -= b; x = (b>>13);
-      ...
-  Unfortunately, superscalar Pentiums and Sparcs can't take advantage
-  of that parallelism.  They've also turned some of those single-cycle
-  latency instructions into multi-cycle latency instructions.  Still,
-  this is the fastest good hash I could find.  There were about 2^^68
-  to choose from.  I only looked at a billion or so.
---------------------------------------------------------------------
-*/
-/* same, but slower, works on systems that might have 8 byte hashval_t's */
-#define mix(a,b,c) \
-{ \
-  a -= b; a -= c; a ^= (c>>13); \
-  b -= c; b -= a; b ^= (a<< 8); \
-  c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
-  a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
-  b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
-  c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
-  a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
-  b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
-  c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
-}
-
-/*
---------------------------------------------------------------------
-hash() -- hash a variable-length key into a 32-bit value
-  k     : the key (the unaligned variable-length array of bytes)
-  len   : the length of the key, counting by bytes
-  level : can be any 4-byte value
-Returns a 32-bit value.  Every bit of the key affects every bit of
-the return value.  Every 1-bit and 2-bit delta achieves avalanche.
-About 36+6len instructions.
-
-The best hash table sizes are powers of 2.  There is no need to do
-mod a prime (mod is sooo slow!).  If you need less than 32 bits,
-use a bitmask.  For example, if you need only 10 bits, do
-  h = (h & hashmask(10));
-In which case, the hash table should have hashsize(10) elements.
-
-If you are hashing n strings (ub1 **)k, do it like this:
-  for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
-
-By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
-code any way you wish, private, educational, or commercial.  It's free.
-
-See http://burtleburtle.net/bob/hash/evahash.html
-Use for hash table lookup, or anything where one collision in 2^32 is
-acceptable.  Do NOT use for cryptographic purposes.
---------------------------------------------------------------------
-*/
-
-hashval_t
-iterative_hash (const void *k_in /* the key */,
-                register size_t  length /* the length of the key */,
-                register hashval_t initval /* the previous hash, or
-                                              an arbitrary value */)
-{
-  register const unsigned char *k = (const unsigned char *)k_in;
-  register hashval_t a,b,c,len;
-
-  /* Set up the internal state */
-  len = length;
-  a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
-  c = initval;           /* the previous hash value */
-
-  /*---------------------------------------- handle most of the key */
-#ifndef WORDS_BIGENDIAN
-  /* On a little-endian machine, if the data is 4-byte aligned we can hash
-     by word for better speed.  This gives nondeterministic results on
-     big-endian machines.  */
-  if (sizeof (hashval_t) == 4 && (((size_t)k)&3) == 0)
-    while (len >= 12)    /* aligned */
-      {
-	a += *(hashval_t *)(k+0);
-	b += *(hashval_t *)(k+4);
-	c += *(hashval_t *)(k+8);
-	mix(a,b,c);
-	k += 12; len -= 12;
-      }
-  else /* unaligned */
-#endif
-    while (len >= 12)
-      {
-	a += (k[0] +((hashval_t)k[1]<<8) +((hashval_t)k[2]<<16) +((hashval_t)k[3]<<24));
-	b += (k[4] +((hashval_t)k[5]<<8) +((hashval_t)k[6]<<16) +((hashval_t)k[7]<<24));
-	c += (k[8] +((hashval_t)k[9]<<8) +((hashval_t)k[10]<<16)+((hashval_t)k[11]<<24));
-	mix(a,b,c);
-	k += 12; len -= 12;
-      }
-
-  /*------------------------------------- handle the last 11 bytes */
-  c += length;
-  switch(len)              /* all the case statements fall through */
-    {
-    case 11: c+=((hashval_t)k[10]<<24);	/* fall through */
-    case 10: c+=((hashval_t)k[9]<<16);	/* fall through */
-    case 9 : c+=((hashval_t)k[8]<<8);	/* fall through */
-      /* the first byte of c is reserved for the length */
-    case 8 : b+=((hashval_t)k[7]<<24);	/* fall through */
-    case 7 : b+=((hashval_t)k[6]<<16);	/* fall through */
-    case 6 : b+=((hashval_t)k[5]<<8);	/* fall through */
-    case 5 : b+=k[4];			/* fall through */
-    case 4 : a+=((hashval_t)k[3]<<24);	/* fall through */
-    case 3 : a+=((hashval_t)k[2]<<16);	/* fall through */
-    case 2 : a+=((hashval_t)k[1]<<8);	/* fall through */
-    case 1 : a+=k[0];
-      /* case 0: nothing left to add */
-    }
-  mix(a,b,c);
-  /*-------------------------------------------- report the result */
-  return c;
-}
diff --git a/hashtab.h b/hashtab.h
index cb3da01..509e627 100644
--- a/hashtab.h
+++ b/hashtab.h
@@ -153,9 +153,11 @@ extern void	htab_restore (htab_t, const char *, htab_restorefn);
  
  #endif
  
-/* An iterative hash function for arbitrary data.  */
-extern hashval_t iterative_hash (const void *, size_t, hashval_t);
+#include <xxhash.h>
+
  /* Shorthand for hashing something with an intrinsic size.  */
+#define hash(IN,LEN) XXH64(IN, LEN, 0)
+#define iterative_hash(IN,LEN,INIT) XXH64(IN, LEN, INIT)
  #define iterative_hash_object(OB,INIT) iterative_hash (&OB, sizeof (OB), INIT)
  
  #ifdef __cplusplus
-- 
2.34.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Use xxHash hashing algorithm.
  2022-01-05  8:17 [PATCH] Use xxHash hashing algorithm Martin Liška
@ 2022-03-31 11:29 ` Martin Liška
  2022-04-28 10:06   ` Martin Liška
  2022-06-25 19:44 ` Mark Wielaard
  1 sibling, 1 reply; 13+ messages in thread
From: Martin Liška @ 2022-03-31 11:29 UTC (permalink / raw)
  To: dwz


PING^1

On 1/5/22 09:17, Martin Liška wrote:
> The algorithm provides the following speed-up with -O2:
> 
> - 1/5: sysdig (60M)
> dwz_O2                : 9.7
> dwz_O2_xxhash         : 8.5 (87.7%)
> - 2/5: rtags (58M)
> dwz_O2                : 17.6
> dwz_O2_xxhash         : 15.8 (89.5%)
> - 3/5: libetonyek (91M)
> dwz_O2                : 10.8
> dwz_O2_xxhash         : 9.4 (86.7%)
> - 4/5: krita (685M)
> dwz_O2                : 96.0
> dwz_O2_xxhash         : 85.6 (89.1%)
> - 5/5: gcc (1.2G)
> dwz_O2                : 58.6
> dwz_O2_xxhash         : 54.1 (92.4%)
> ---
>   Makefile  |   4 +-
>   dwz.c     | 103 +++++++++++++++++++++++-----------------
>   hashtab.c | 139 ------------------------------------------------------
>   hashtab.h |   6 ++-
>   4 files changed, 65 insertions(+), 187 deletions(-)
> 
> diff --git a/Makefile b/Makefile
> index 9394ef4..89546c2 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -8,7 +8,7 @@ CFLAGS = -O2 -g
>   DWZ_VERSION := $(shell cat $(srcdir)/VERSION)
>   CFLAGS_VERSION = -DDWZ_VERSION='"$(DWZ_VERSION)"'
>   CFLAGS_COPYRIGHT = $(shell cat $(srcdir)/COPYRIGHT_YEARS)
> -CFLAGS_COMMON = -Wall -W -D_FILE_OFFSET_BITS=64
> +CFLAGS_COMMON = -Wall -W -D_FILE_OFFSET_BITS=64 -DXXH_INLINE_ALL=1
>   override CFLAGS += $(CFLAGS_COMMON) $(CFLAGS_VERSION) $(CFLAGS_COPYRIGHT)
> 
>   prefix = /usr
> @@ -17,7 +17,7 @@ bindir = $(exec_prefix)/bin
>   datarootdir = $(prefix)/share
>   mandir = $(datarootdir)/man
>   OBJECTS = args.o dwz.o hashtab.o pool.o sha1.o dwarfnames.o
> -LIBS=-lelf
> +LIBS=-lelf -lxxhash
>   dwz: $(OBJECTS)
>       $(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
>   args.o: native.o
> diff --git a/dwz.c b/dwz.c
> index a3b289f..cc68f4a 100644
> --- a/dwz.c
> +++ b/dwz.c
> @@ -112,6 +112,21 @@
>   # define NT_GNU_BUILD_ID 3
>   #endif
> 
> +/* xxHash state object.  */
> +static XXH64_state_t state;
> +
> +/* Clear xxHash state to zero.  */
> +#define hash_init_state() XXH64_reset(&state, 0)
> +
> +/* Update hash STATE with VALUE.  */
> +#define hash_update_state(value) XXH64_update(&state, &value, sizeof value)
> +
> +/* Update hash STATE with OBJECT that has a provided SIZE.  */
> +#define hash_update_state_object(object, size) XXH64_update(&state, object, size)
> +
> +/* Get digest once we are done with a state.  */
> +#define hash_digest() XXH64_digest(&state)
> +
>   /* Print memory amount M (in kb) in both exact and human readable, like so:
>      1382508 (1.3G).  */
>   static void
> @@ -1187,16 +1202,18 @@ compute_abbrev_hash (struct abbrev_tag *t)
>   {
>     unsigned int i;
> 
> -  t->hash = iterative_hash_object (t->tag, 0);
> -  t->hash = iterative_hash_object (t->nattr, t->hash);
> -  t->hash = iterative_hash_object (t->children, t->hash);
> +  hash_init_state ();
> +  hash_update_state (t->tag);
> +  hash_update_state (t->nattr);
> +  hash_update_state (t->children);
>     for (i = 0; i < t->nattr; i++)
>       {
> -      t->hash = iterative_hash_object (t->attr[i].attr, t->hash);
> -      t->hash = iterative_hash_object (t->attr[i].form, t->hash);
> +      hash_update_state (t->attr[i].attr);
> +      hash_update_state (t->attr[i].form);
>         if (t->attr[i].form == DW_FORM_implicit_const)
> -    t->hash = iterative_hash_object (t->values[i], t->hash);
> +    hash_update_state (t->values[i]);
>       }
> +  t->hash = hash_digest ();
>   }
> 
>   /* Maximum number of attributes in a DIE.  */
> @@ -3402,22 +3419,17 @@ checksum_die (DSO *dso, dw_cu_ref cu, dw_die_ref top_die, dw_die_ref die)
>             struct dw_file *cu_file = &cu->cu_files[value - 1];
>             size_t file_len = strlen (cu_file->file);
>             s = t->attr[i].attr;
> -          die->u.p1.die_hash
> -            = iterative_hash_object (s, die->u.p1.die_hash);
> -          die->u.p1.die_hash
> -            = iterative_hash_object (cu_file->time,
> -                         die->u.p1.die_hash);
> -          die->u.p1.die_hash
> -            = iterative_hash_object (cu_file->size,
> -                         die->u.p1.die_hash);
> -          die->u.p1.die_hash
> -            = iterative_hash (cu_file->file, file_len + 1,
> -                      die->u.p1.die_hash);
> +          hash_init_state ();
> +          hash_update_state (die->u.p1.die_hash);
> +          hash_update_state (s);
> +          hash_update_state (cu_file->time);
> +          hash_update_state (cu_file->size);
> +          hash_update_state_object (cu_file->file, file_len + 1);
>             if (cu_file->dir)
> -            die->u.p1.die_hash
> -              = iterative_hash (cu_file->dir,
> -                    strlen (cu_file->dir) + 1,
> -                    die->u.p1.die_hash);
> +            {
> +              hash_update_state_object (cu_file->dir,
> +                        strlen (cu_file->dir) + 1);
> +            }
>             /* Ignore DW_AT_comp_dir for DW_AT_*_file <built-in>
>                etc. if immediately followed by DW_AT_*_line 0.  */
>             else if (cu_file->file_angle_brackets_encapsulated_no_slash
> @@ -3427,7 +3439,13 @@ checksum_die (DSO *dso, dw_cu_ref cu, dw_die_ref top_die, dw_die_ref die)
>                     ? DW_AT_decl_line : DW_AT_call_line)
>                  && t->attr[i + 1].form == DW_FORM_data1
>                  && *new_ptr == 0)
> -            break;
> +            {
> +              die->u.p1.die_hash = hash_digest ();
> +              break;
> +            }
> +
> +          die->u.p1.die_hash = hash_digest ();
> +
>             if (cu->cu_comp_dir
>                 && (cu_file->dir ? cu_file->dir[0]
>                          : cu_file->file[0]) != '/')
> @@ -5541,7 +5559,7 @@ strp_eq2 (const void *p, const void *q)
>   static hashval_t
>   strp_hash3 (const void *p)
>   {
> -  return iterative_hash (p, strlen (p), 0);
> +  return hash (p, strlen (p));
>   }
> 
>   /* Corresponding equality function in strp_htab.  */
> @@ -5571,7 +5589,7 @@ note_strp_offset (unsigned int off)
> 
>         p = debug_sections[DEBUG_STR].data + off;
>         len = strlen ((char *) p);
> -      hash = iterative_hash (p, len, 0);
> +      hash = hash (p, len);
>         if (alt_strp_htab)
>       {
>         if (htab_find_with_hash (alt_strp_htab, p, hash))
> @@ -5653,7 +5671,7 @@ lookup_strp_offset (unsigned int off)
> 
>         p = debug_sections[DEBUG_STR].data + off;
>         len = strlen ((char *) p);
> -      hash = iterative_hash (p, len, 0);
> +      hash = hash (p, len);
>         if (alt_strp_htab)
>       {
>         unsigned char *q = (unsigned char *)
> @@ -5694,7 +5712,7 @@ note_strp_offset2 (unsigned int off)
>       {
>         p = debug_sections[DEBUG_STR].data + off;
>         len = strlen ((char *) p);
> -      hash = iterative_hash (p, len, 0);
> +      hash = hash (p, len);
>         if (htab_find_with_hash (alt_strp_htab, p, hash))
>           return dwarf_5 ? DW_FORM_strp_sup : DW_FORM_GNU_strp_alt;
>       }
> @@ -5704,7 +5722,7 @@ note_strp_offset2 (unsigned int off)
>       return DW_FORM_strp;
>     p = debug_sections[DEBUG_STR].data + off;
>     q = (unsigned char *) strchr ((char *) p, '\0');
> -  hash = iterative_hash (p, q - p, 0);
> +  hash = hash (p, q - p);
>     se.off = off;
>     se.new_off = hash & ~1U;
>     struct strp_entry *s = (struct strp_entry *)
> @@ -5860,7 +5878,7 @@ finalize_strp (bool build_tail_offset_list)
> 
>       memcpy (p, debug_sections[DEBUG_STR].data + arr[i]->off, len);
>       slot = htab_find_slot_with_hash (strp_htab, p,
> -                     iterative_hash (p, len - 1, 0),
> +                     hash (p, len - 1),
>                        INSERT);
>       if (slot == NULL)
>         dwz_oom ();
> @@ -5876,7 +5894,7 @@ finalize_strp (bool build_tail_offset_list)
>             if (tail_offset_list != NULL)
>           tail_offset_list[k++] = arr[j]->new_off;
>             slot = htab_find_slot_with_hash (strp_htab, q,
> -                           iterative_hash (q, l - 1, 0),
> +                           hash (q, l - 1),
>                              INSERT);
>             if (slot == NULL)
>           dwz_oom ();
> @@ -9849,17 +9867,16 @@ line_htab_lookup (dw_cu_ref cu, unsigned int id)
>   {
>     void **slot;
>     struct line_entry le;
> -  hashval_t h;
> -
>     if (id == 0)
>       return 0;
>     assert (id <= cu->cu_nfiles);
>     le.file = &cu->cu_files[id - 1];
> -  h = iterative_hash_object (le.file->time, 0);
> -  h = iterative_hash_object (le.file->size, h);
> -  h = iterative_hash (le.file->file, strlen (le.file->file) + 1, h);
> +  hash_init_state ();
> +  hash_update_state (le.file->time);
> +  hash_update_state (le.file->size);
> +  hash_update_state_object (le.file->file, strlen (le.file->file) + 1);
>     if (le.file->dir)
> -    h = iterative_hash (le.file->dir, strlen (le.file->dir) + 1, h);
> +    hash_update_state_object (le.file->dir, strlen (le.file->dir) + 1);
>     if (line_htab == NULL)
>       {
>         line_htab = htab_try_create (50, line_hash, line_eq, NULL);
> @@ -9867,15 +9884,15 @@ line_htab_lookup (dw_cu_ref cu, unsigned int id)
>       dwz_oom ();
>         max_line_id = 1;
>       }
> -  le.hash = h;
> -  slot = htab_find_slot_with_hash (line_htab, &le, h, INSERT);
> +  le.hash = hash_digest ();;
> +  slot = htab_find_slot_with_hash (line_htab, &le, le.hash, INSERT);
>     if (slot == NULL)
>       dwz_oom ();
>     if (*slot == NULL)
>       {
>         struct line_entry *l = pool_alloc (line_entry, sizeof (*l));
>         l->file = le.file;
> -      l->hash = h;
> +      l->hash = le.hash;
>         l->new_id = max_line_id++;
>         *slot = (void *) l;
>         return l->new_id;
> @@ -10347,7 +10364,7 @@ handle_macro (void)
>             /* This should only happen if there were multiple
>                same transparent units within a single object file.  */
>             && htab_find_with_hash (strp_htab, p,
> -                      iterative_hash (p, len, 0)) == NULL)
> +                      hash (p, len)) == NULL)
>           can_share = false;
>             s = ptr;
>             break;
> @@ -15756,7 +15773,7 @@ optimize_multifile (unsigned int *die_count)
>         hashval_t hash;
> 
>         q = (unsigned char *) strchr ((char *) p, '\0');
> -      hash = iterative_hash (p, q - p, 0);
> +      hash = hash (p, q - p);
>         se.off = p - debug_sections[DEBUG_STR].data;
>         se.new_off = hash & ~1U;
>         slot = htab_find_slot_with_hash (strp_htab, &se, se.new_off, INSERT);
> @@ -16076,8 +16093,7 @@ read_multifile (int fd, unsigned int die_count)
>           {
>             q = (unsigned char *) strchr ((char *) p, '\0') + 1;
>             slot = htab_find_slot_with_hash (strp_htab, p,
> -                           iterative_hash (p, q - p - 1,
> -                                   0), INSERT);
> +                           hash (p, q - p - 1), INSERT);
>             if (slot == NULL)
>           dwz_oom ();
>             assert (*slot == NULL);
> @@ -16090,8 +16106,7 @@ read_multifile (int fd, unsigned int die_count)
>             p = debug_sections[DEBUG_STR].data + *pi;
>             q = (unsigned char *) strchr ((char *) p, '\0');
>             slot = htab_find_slot_with_hash (strp_htab, p,
> -                           iterative_hash (p, q - p,
> -                                   0), INSERT);
> +                           hash (p, q - p), INSERT);
>             if (slot == NULL)
>               dwz_oom ();
>             assert (*slot == NULL);
> diff --git a/hashtab.c b/hashtab.c
> index 41eab30..a9b9d13 100644
> --- a/hashtab.c
> +++ b/hashtab.c
> @@ -626,142 +626,3 @@ htab_restore (htab, name, restorefn)
>     fclose (f);
>   }
>   #endif
> -
> -/* DERIVED FROM:
> ---------------------------------------------------------------------
> -lookup2.c, by Bob Jenkins, December 1996, Public Domain.
> -hash(), hash2(), hash3, and mix() are externally useful functions.
> -Routines to test the hash are included if SELF_TEST is defined.
> -You can use this free for any purpose.  It has no warranty.
> ---------------------------------------------------------------------
> -*/
> -
> -/*
> ---------------------------------------------------------------------
> -mix -- mix 3 32-bit values reversibly.
> -For every delta with one or two bit set, and the deltas of all three
> -  high bits or all three low bits, whether the original value of a,b,c
> -  is almost all zero or is uniformly distributed,
> -* If mix() is run forward or backward, at least 32 bits in a,b,c
> -  have at least 1/4 probability of changing.
> -* If mix() is run forward, every bit of c will change between 1/3 and
> -  2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
> -mix() was built out of 36 single-cycle latency instructions in a
> -  structure that could supported 2x parallelism, like so:
> -      a -= b;
> -      a -= c; x = (c>>13);
> -      b -= c; a ^= x;
> -      b -= a; x = (a<<8);
> -      c -= a; b ^= x;
> -      c -= b; x = (b>>13);
> -      ...
> -  Unfortunately, superscalar Pentiums and Sparcs can't take advantage
> -  of that parallelism.  They've also turned some of those single-cycle
> -  latency instructions into multi-cycle latency instructions.  Still,
> -  this is the fastest good hash I could find.  There were about 2^^68
> -  to choose from.  I only looked at a billion or so.
> ---------------------------------------------------------------------
> -*/
> -/* same, but slower, works on systems that might have 8 byte hashval_t's */
> -#define mix(a,b,c) \
> -{ \
> -  a -= b; a -= c; a ^= (c>>13); \
> -  b -= c; b -= a; b ^= (a<< 8); \
> -  c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
> -  a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
> -  b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
> -  c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
> -  a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
> -  b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
> -  c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
> -}
> -
> -/*
> ---------------------------------------------------------------------
> -hash() -- hash a variable-length key into a 32-bit value
> -  k     : the key (the unaligned variable-length array of bytes)
> -  len   : the length of the key, counting by bytes
> -  level : can be any 4-byte value
> -Returns a 32-bit value.  Every bit of the key affects every bit of
> -the return value.  Every 1-bit and 2-bit delta achieves avalanche.
> -About 36+6len instructions.
> -
> -The best hash table sizes are powers of 2.  There is no need to do
> -mod a prime (mod is sooo slow!).  If you need less than 32 bits,
> -use a bitmask.  For example, if you need only 10 bits, do
> -  h = (h & hashmask(10));
> -In which case, the hash table should have hashsize(10) elements.
> -
> -If you are hashing n strings (ub1 **)k, do it like this:
> -  for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
> -
> -By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
> -code any way you wish, private, educational, or commercial.  It's free.
> -
> -See http://burtleburtle.net/bob/hash/evahash.html
> -Use for hash table lookup, or anything where one collision in 2^32 is
> -acceptable.  Do NOT use for cryptographic purposes.
> ---------------------------------------------------------------------
> -*/
> -
> -hashval_t
> -iterative_hash (const void *k_in /* the key */,
> -                register size_t  length /* the length of the key */,
> -                register hashval_t initval /* the previous hash, or
> -                                              an arbitrary value */)
> -{
> -  register const unsigned char *k = (const unsigned char *)k_in;
> -  register hashval_t a,b,c,len;
> -
> -  /* Set up the internal state */
> -  len = length;
> -  a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
> -  c = initval;           /* the previous hash value */
> -
> -  /*---------------------------------------- handle most of the key */
> -#ifndef WORDS_BIGENDIAN
> -  /* On a little-endian machine, if the data is 4-byte aligned we can hash
> -     by word for better speed.  This gives nondeterministic results on
> -     big-endian machines.  */
> -  if (sizeof (hashval_t) == 4 && (((size_t)k)&3) == 0)
> -    while (len >= 12)    /* aligned */
> -      {
> -    a += *(hashval_t *)(k+0);
> -    b += *(hashval_t *)(k+4);
> -    c += *(hashval_t *)(k+8);
> -    mix(a,b,c);
> -    k += 12; len -= 12;
> -      }
> -  else /* unaligned */
> -#endif
> -    while (len >= 12)
> -      {
> -    a += (k[0] +((hashval_t)k[1]<<8) +((hashval_t)k[2]<<16) +((hashval_t)k[3]<<24));
> -    b += (k[4] +((hashval_t)k[5]<<8) +((hashval_t)k[6]<<16) +((hashval_t)k[7]<<24));
> -    c += (k[8] +((hashval_t)k[9]<<8) +((hashval_t)k[10]<<16)+((hashval_t)k[11]<<24));
> -    mix(a,b,c);
> -    k += 12; len -= 12;
> -      }
> -
> -  /*------------------------------------- handle the last 11 bytes */
> -  c += length;
> -  switch(len)              /* all the case statements fall through */
> -    {
> -    case 11: c+=((hashval_t)k[10]<<24);    /* fall through */
> -    case 10: c+=((hashval_t)k[9]<<16);    /* fall through */
> -    case 9 : c+=((hashval_t)k[8]<<8);    /* fall through */
> -      /* the first byte of c is reserved for the length */
> -    case 8 : b+=((hashval_t)k[7]<<24);    /* fall through */
> -    case 7 : b+=((hashval_t)k[6]<<16);    /* fall through */
> -    case 6 : b+=((hashval_t)k[5]<<8);    /* fall through */
> -    case 5 : b+=k[4];            /* fall through */
> -    case 4 : a+=((hashval_t)k[3]<<24);    /* fall through */
> -    case 3 : a+=((hashval_t)k[2]<<16);    /* fall through */
> -    case 2 : a+=((hashval_t)k[1]<<8);    /* fall through */
> -    case 1 : a+=k[0];
> -      /* case 0: nothing left to add */
> -    }
> -  mix(a,b,c);
> -  /*-------------------------------------------- report the result */
> -  return c;
> -}
> diff --git a/hashtab.h b/hashtab.h
> index cb3da01..509e627 100644
> --- a/hashtab.h
> +++ b/hashtab.h
> @@ -153,9 +153,11 @@ extern void    htab_restore (htab_t, const char *, htab_restorefn);
> 
>   #endif
> 
> -/* An iterative hash function for arbitrary data.  */
> -extern hashval_t iterative_hash (const void *, size_t, hashval_t);
> +#include <xxhash.h>
> +
>   /* Shorthand for hashing something with an intrinsic size.  */
> +#define hash(IN,LEN) XXH64(IN, LEN, 0)
> +#define iterative_hash(IN,LEN,INIT) XXH64(IN, LEN, INIT)
>   #define iterative_hash_object(OB,INIT) iterative_hash (&OB, sizeof (OB), INIT)
> 
>   #ifdef __cplusplus


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Use xxHash hashing algorithm.
  2022-03-31 11:29 ` Martin Liška
@ 2022-04-28 10:06   ` Martin Liška
  2022-06-20 13:29     ` Martin Liška
  0 siblings, 1 reply; 13+ messages in thread
From: Martin Liška @ 2022-04-28 10:06 UTC (permalink / raw)
  To: dwz

PING^2

On 3/31/22 13:29, Martin Liška wrote:
> 
> PING^1
> 
> On 1/5/22 09:17, Martin Liška wrote:
>> The algorithm provides the following speed-up with -O2:
>>
>> - 1/5: sysdig (60M)
>> dwz_O2                : 9.7
>> dwz_O2_xxhash         : 8.5 (87.7%)
>> - 2/5: rtags (58M)
>> dwz_O2                : 17.6
>> dwz_O2_xxhash         : 15.8 (89.5%)
>> - 3/5: libetonyek (91M)
>> dwz_O2                : 10.8
>> dwz_O2_xxhash         : 9.4 (86.7%)
>> - 4/5: krita (685M)
>> dwz_O2                : 96.0
>> dwz_O2_xxhash         : 85.6 (89.1%)
>> - 5/5: gcc (1.2G)
>> dwz_O2                : 58.6
>> dwz_O2_xxhash         : 54.1 (92.4%)
>> ---
>>   Makefile  |   4 +-
>>   dwz.c     | 103 +++++++++++++++++++++++-----------------
>>   hashtab.c | 139 ------------------------------------------------------
>>   hashtab.h |   6 ++-
>>   4 files changed, 65 insertions(+), 187 deletions(-)
>>
>> diff --git a/Makefile b/Makefile
>> index 9394ef4..89546c2 100644
>> --- a/Makefile
>> +++ b/Makefile
>> @@ -8,7 +8,7 @@ CFLAGS = -O2 -g
>>   DWZ_VERSION := $(shell cat $(srcdir)/VERSION)
>>   CFLAGS_VERSION = -DDWZ_VERSION='"$(DWZ_VERSION)"'
>>   CFLAGS_COPYRIGHT = $(shell cat $(srcdir)/COPYRIGHT_YEARS)
>> -CFLAGS_COMMON = -Wall -W -D_FILE_OFFSET_BITS=64
>> +CFLAGS_COMMON = -Wall -W -D_FILE_OFFSET_BITS=64 -DXXH_INLINE_ALL=1
>>   override CFLAGS += $(CFLAGS_COMMON) $(CFLAGS_VERSION) $(CFLAGS_COPYRIGHT)
>>
>>   prefix = /usr
>> @@ -17,7 +17,7 @@ bindir = $(exec_prefix)/bin
>>   datarootdir = $(prefix)/share
>>   mandir = $(datarootdir)/man
>>   OBJECTS = args.o dwz.o hashtab.o pool.o sha1.o dwarfnames.o
>> -LIBS=-lelf
>> +LIBS=-lelf -lxxhash
>>   dwz: $(OBJECTS)
>>       $(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
>>   args.o: native.o
>> diff --git a/dwz.c b/dwz.c
>> index a3b289f..cc68f4a 100644
>> --- a/dwz.c
>> +++ b/dwz.c
>> @@ -112,6 +112,21 @@
>>   # define NT_GNU_BUILD_ID 3
>>   #endif
>>
>> +/* xxHash state object.  */
>> +static XXH64_state_t state;
>> +
>> +/* Clear xxHash state to zero.  */
>> +#define hash_init_state() XXH64_reset(&state, 0)
>> +
>> +/* Update hash STATE with VALUE.  */
>> +#define hash_update_state(value) XXH64_update(&state, &value, sizeof value)
>> +
>> +/* Update hash STATE with OBJECT that has a provided SIZE.  */
>> +#define hash_update_state_object(object, size) XXH64_update(&state, object, size)
>> +
>> +/* Get digest once we are done with a state.  */
>> +#define hash_digest() XXH64_digest(&state)
>> +
>>   /* Print memory amount M (in kb) in both exact and human readable, like so:
>>      1382508 (1.3G).  */
>>   static void
>> @@ -1187,16 +1202,18 @@ compute_abbrev_hash (struct abbrev_tag *t)
>>   {
>>     unsigned int i;
>>
>> -  t->hash = iterative_hash_object (t->tag, 0);
>> -  t->hash = iterative_hash_object (t->nattr, t->hash);
>> -  t->hash = iterative_hash_object (t->children, t->hash);
>> +  hash_init_state ();
>> +  hash_update_state (t->tag);
>> +  hash_update_state (t->nattr);
>> +  hash_update_state (t->children);
>>     for (i = 0; i < t->nattr; i++)
>>       {
>> -      t->hash = iterative_hash_object (t->attr[i].attr, t->hash);
>> -      t->hash = iterative_hash_object (t->attr[i].form, t->hash);
>> +      hash_update_state (t->attr[i].attr);
>> +      hash_update_state (t->attr[i].form);
>>         if (t->attr[i].form == DW_FORM_implicit_const)
>> -    t->hash = iterative_hash_object (t->values[i], t->hash);
>> +    hash_update_state (t->values[i]);
>>       }
>> +  t->hash = hash_digest ();
>>   }
>>
>>   /* Maximum number of attributes in a DIE.  */
>> @@ -3402,22 +3419,17 @@ checksum_die (DSO *dso, dw_cu_ref cu, dw_die_ref top_die, dw_die_ref die)
>>             struct dw_file *cu_file = &cu->cu_files[value - 1];
>>             size_t file_len = strlen (cu_file->file);
>>             s = t->attr[i].attr;
>> -          die->u.p1.die_hash
>> -            = iterative_hash_object (s, die->u.p1.die_hash);
>> -          die->u.p1.die_hash
>> -            = iterative_hash_object (cu_file->time,
>> -                         die->u.p1.die_hash);
>> -          die->u.p1.die_hash
>> -            = iterative_hash_object (cu_file->size,
>> -                         die->u.p1.die_hash);
>> -          die->u.p1.die_hash
>> -            = iterative_hash (cu_file->file, file_len + 1,
>> -                      die->u.p1.die_hash);
>> +          hash_init_state ();
>> +          hash_update_state (die->u.p1.die_hash);
>> +          hash_update_state (s);
>> +          hash_update_state (cu_file->time);
>> +          hash_update_state (cu_file->size);
>> +          hash_update_state_object (cu_file->file, file_len + 1);
>>             if (cu_file->dir)
>> -            die->u.p1.die_hash
>> -              = iterative_hash (cu_file->dir,
>> -                    strlen (cu_file->dir) + 1,
>> -                    die->u.p1.die_hash);
>> +            {
>> +              hash_update_state_object (cu_file->dir,
>> +                        strlen (cu_file->dir) + 1);
>> +            }
>>             /* Ignore DW_AT_comp_dir for DW_AT_*_file <built-in>
>>                etc. if immediately followed by DW_AT_*_line 0.  */
>>             else if (cu_file->file_angle_brackets_encapsulated_no_slash
>> @@ -3427,7 +3439,13 @@ checksum_die (DSO *dso, dw_cu_ref cu, dw_die_ref top_die, dw_die_ref die)
>>                     ? DW_AT_decl_line : DW_AT_call_line)
>>                  && t->attr[i + 1].form == DW_FORM_data1
>>                  && *new_ptr == 0)
>> -            break;
>> +            {
>> +              die->u.p1.die_hash = hash_digest ();
>> +              break;
>> +            }
>> +
>> +          die->u.p1.die_hash = hash_digest ();
>> +
>>             if (cu->cu_comp_dir
>>                 && (cu_file->dir ? cu_file->dir[0]
>>                          : cu_file->file[0]) != '/')
>> @@ -5541,7 +5559,7 @@ strp_eq2 (const void *p, const void *q)
>>   static hashval_t
>>   strp_hash3 (const void *p)
>>   {
>> -  return iterative_hash (p, strlen (p), 0);
>> +  return hash (p, strlen (p));
>>   }
>>
>>   /* Corresponding equality function in strp_htab.  */
>> @@ -5571,7 +5589,7 @@ note_strp_offset (unsigned int off)
>>
>>         p = debug_sections[DEBUG_STR].data + off;
>>         len = strlen ((char *) p);
>> -      hash = iterative_hash (p, len, 0);
>> +      hash = hash (p, len);
>>         if (alt_strp_htab)
>>       {
>>         if (htab_find_with_hash (alt_strp_htab, p, hash))
>> @@ -5653,7 +5671,7 @@ lookup_strp_offset (unsigned int off)
>>
>>         p = debug_sections[DEBUG_STR].data + off;
>>         len = strlen ((char *) p);
>> -      hash = iterative_hash (p, len, 0);
>> +      hash = hash (p, len);
>>         if (alt_strp_htab)
>>       {
>>         unsigned char *q = (unsigned char *)
>> @@ -5694,7 +5712,7 @@ note_strp_offset2 (unsigned int off)
>>       {
>>         p = debug_sections[DEBUG_STR].data + off;
>>         len = strlen ((char *) p);
>> -      hash = iterative_hash (p, len, 0);
>> +      hash = hash (p, len);
>>         if (htab_find_with_hash (alt_strp_htab, p, hash))
>>           return dwarf_5 ? DW_FORM_strp_sup : DW_FORM_GNU_strp_alt;
>>       }
>> @@ -5704,7 +5722,7 @@ note_strp_offset2 (unsigned int off)
>>       return DW_FORM_strp;
>>     p = debug_sections[DEBUG_STR].data + off;
>>     q = (unsigned char *) strchr ((char *) p, '\0');
>> -  hash = iterative_hash (p, q - p, 0);
>> +  hash = hash (p, q - p);
>>     se.off = off;
>>     se.new_off = hash & ~1U;
>>     struct strp_entry *s = (struct strp_entry *)
>> @@ -5860,7 +5878,7 @@ finalize_strp (bool build_tail_offset_list)
>>
>>       memcpy (p, debug_sections[DEBUG_STR].data + arr[i]->off, len);
>>       slot = htab_find_slot_with_hash (strp_htab, p,
>> -                     iterative_hash (p, len - 1, 0),
>> +                     hash (p, len - 1),
>>                        INSERT);
>>       if (slot == NULL)
>>         dwz_oom ();
>> @@ -5876,7 +5894,7 @@ finalize_strp (bool build_tail_offset_list)
>>             if (tail_offset_list != NULL)
>>           tail_offset_list[k++] = arr[j]->new_off;
>>             slot = htab_find_slot_with_hash (strp_htab, q,
>> -                           iterative_hash (q, l - 1, 0),
>> +                           hash (q, l - 1),
>>                              INSERT);
>>             if (slot == NULL)
>>           dwz_oom ();
>> @@ -9849,17 +9867,16 @@ line_htab_lookup (dw_cu_ref cu, unsigned int id)
>>   {
>>     void **slot;
>>     struct line_entry le;
>> -  hashval_t h;
>> -
>>     if (id == 0)
>>       return 0;
>>     assert (id <= cu->cu_nfiles);
>>     le.file = &cu->cu_files[id - 1];
>> -  h = iterative_hash_object (le.file->time, 0);
>> -  h = iterative_hash_object (le.file->size, h);
>> -  h = iterative_hash (le.file->file, strlen (le.file->file) + 1, h);
>> +  hash_init_state ();
>> +  hash_update_state (le.file->time);
>> +  hash_update_state (le.file->size);
>> +  hash_update_state_object (le.file->file, strlen (le.file->file) + 1);
>>     if (le.file->dir)
>> -    h = iterative_hash (le.file->dir, strlen (le.file->dir) + 1, h);
>> +    hash_update_state_object (le.file->dir, strlen (le.file->dir) + 1);
>>     if (line_htab == NULL)
>>       {
>>         line_htab = htab_try_create (50, line_hash, line_eq, NULL);
>> @@ -9867,15 +9884,15 @@ line_htab_lookup (dw_cu_ref cu, unsigned int id)
>>       dwz_oom ();
>>         max_line_id = 1;
>>       }
>> -  le.hash = h;
>> -  slot = htab_find_slot_with_hash (line_htab, &le, h, INSERT);
>> +  le.hash = hash_digest ();;
>> +  slot = htab_find_slot_with_hash (line_htab, &le, le.hash, INSERT);
>>     if (slot == NULL)
>>       dwz_oom ();
>>     if (*slot == NULL)
>>       {
>>         struct line_entry *l = pool_alloc (line_entry, sizeof (*l));
>>         l->file = le.file;
>> -      l->hash = h;
>> +      l->hash = le.hash;
>>         l->new_id = max_line_id++;
>>         *slot = (void *) l;
>>         return l->new_id;
>> @@ -10347,7 +10364,7 @@ handle_macro (void)
>>             /* This should only happen if there were multiple
>>                same transparent units within a single object file.  */
>>             && htab_find_with_hash (strp_htab, p,
>> -                      iterative_hash (p, len, 0)) == NULL)
>> +                      hash (p, len)) == NULL)
>>           can_share = false;
>>             s = ptr;
>>             break;
>> @@ -15756,7 +15773,7 @@ optimize_multifile (unsigned int *die_count)
>>         hashval_t hash;
>>
>>         q = (unsigned char *) strchr ((char *) p, '\0');
>> -      hash = iterative_hash (p, q - p, 0);
>> +      hash = hash (p, q - p);
>>         se.off = p - debug_sections[DEBUG_STR].data;
>>         se.new_off = hash & ~1U;
>>         slot = htab_find_slot_with_hash (strp_htab, &se, se.new_off, INSERT);
>> @@ -16076,8 +16093,7 @@ read_multifile (int fd, unsigned int die_count)
>>           {
>>             q = (unsigned char *) strchr ((char *) p, '\0') + 1;
>>             slot = htab_find_slot_with_hash (strp_htab, p,
>> -                           iterative_hash (p, q - p - 1,
>> -                                   0), INSERT);
>> +                           hash (p, q - p - 1), INSERT);
>>             if (slot == NULL)
>>           dwz_oom ();
>>             assert (*slot == NULL);
>> @@ -16090,8 +16106,7 @@ read_multifile (int fd, unsigned int die_count)
>>             p = debug_sections[DEBUG_STR].data + *pi;
>>             q = (unsigned char *) strchr ((char *) p, '\0');
>>             slot = htab_find_slot_with_hash (strp_htab, p,
>> -                           iterative_hash (p, q - p,
>> -                                   0), INSERT);
>> +                           hash (p, q - p), INSERT);
>>             if (slot == NULL)
>>               dwz_oom ();
>>             assert (*slot == NULL);
>> diff --git a/hashtab.c b/hashtab.c
>> index 41eab30..a9b9d13 100644
>> --- a/hashtab.c
>> +++ b/hashtab.c
>> @@ -626,142 +626,3 @@ htab_restore (htab, name, restorefn)
>>     fclose (f);
>>   }
>>   #endif
>> -
>> -/* DERIVED FROM:
>> ---------------------------------------------------------------------
>> -lookup2.c, by Bob Jenkins, December 1996, Public Domain.
>> -hash(), hash2(), hash3, and mix() are externally useful functions.
>> -Routines to test the hash are included if SELF_TEST is defined.
>> -You can use this free for any purpose.  It has no warranty.
>> ---------------------------------------------------------------------
>> -*/
>> -
>> -/*
>> ---------------------------------------------------------------------
>> -mix -- mix 3 32-bit values reversibly.
>> -For every delta with one or two bit set, and the deltas of all three
>> -  high bits or all three low bits, whether the original value of a,b,c
>> -  is almost all zero or is uniformly distributed,
>> -* If mix() is run forward or backward, at least 32 bits in a,b,c
>> -  have at least 1/4 probability of changing.
>> -* If mix() is run forward, every bit of c will change between 1/3 and
>> -  2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
>> -mix() was built out of 36 single-cycle latency instructions in a
>> -  structure that could supported 2x parallelism, like so:
>> -      a -= b;
>> -      a -= c; x = (c>>13);
>> -      b -= c; a ^= x;
>> -      b -= a; x = (a<<8);
>> -      c -= a; b ^= x;
>> -      c -= b; x = (b>>13);
>> -      ...
>> -  Unfortunately, superscalar Pentiums and Sparcs can't take advantage
>> -  of that parallelism.  They've also turned some of those single-cycle
>> -  latency instructions into multi-cycle latency instructions.  Still,
>> -  this is the fastest good hash I could find.  There were about 2^^68
>> -  to choose from.  I only looked at a billion or so.
>> ---------------------------------------------------------------------
>> -*/
>> -/* same, but slower, works on systems that might have 8 byte hashval_t's */
>> -#define mix(a,b,c) \
>> -{ \
>> -  a -= b; a -= c; a ^= (c>>13); \
>> -  b -= c; b -= a; b ^= (a<< 8); \
>> -  c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
>> -  a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
>> -  b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
>> -  c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
>> -  a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
>> -  b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
>> -  c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
>> -}
>> -
>> -/*
>> ---------------------------------------------------------------------
>> -hash() -- hash a variable-length key into a 32-bit value
>> -  k     : the key (the unaligned variable-length array of bytes)
>> -  len   : the length of the key, counting by bytes
>> -  level : can be any 4-byte value
>> -Returns a 32-bit value.  Every bit of the key affects every bit of
>> -the return value.  Every 1-bit and 2-bit delta achieves avalanche.
>> -About 36+6len instructions.
>> -
>> -The best hash table sizes are powers of 2.  There is no need to do
>> -mod a prime (mod is sooo slow!).  If you need less than 32 bits,
>> -use a bitmask.  For example, if you need only 10 bits, do
>> -  h = (h & hashmask(10));
>> -In which case, the hash table should have hashsize(10) elements.
>> -
>> -If you are hashing n strings (ub1 **)k, do it like this:
>> -  for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
>> -
>> -By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
>> -code any way you wish, private, educational, or commercial.  It's free.
>> -
>> -See http://burtleburtle.net/bob/hash/evahash.html
>> -Use for hash table lookup, or anything where one collision in 2^32 is
>> -acceptable.  Do NOT use for cryptographic purposes.
>> ---------------------------------------------------------------------
>> -*/
>> -
>> -hashval_t
>> -iterative_hash (const void *k_in /* the key */,
>> -                register size_t  length /* the length of the key */,
>> -                register hashval_t initval /* the previous hash, or
>> -                                              an arbitrary value */)
>> -{
>> -  register const unsigned char *k = (const unsigned char *)k_in;
>> -  register hashval_t a,b,c,len;
>> -
>> -  /* Set up the internal state */
>> -  len = length;
>> -  a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
>> -  c = initval;           /* the previous hash value */
>> -
>> -  /*---------------------------------------- handle most of the key */
>> -#ifndef WORDS_BIGENDIAN
>> -  /* On a little-endian machine, if the data is 4-byte aligned we can hash
>> -     by word for better speed.  This gives nondeterministic results on
>> -     big-endian machines.  */
>> -  if (sizeof (hashval_t) == 4 && (((size_t)k)&3) == 0)
>> -    while (len >= 12)    /* aligned */
>> -      {
>> -    a += *(hashval_t *)(k+0);
>> -    b += *(hashval_t *)(k+4);
>> -    c += *(hashval_t *)(k+8);
>> -    mix(a,b,c);
>> -    k += 12; len -= 12;
>> -      }
>> -  else /* unaligned */
>> -#endif
>> -    while (len >= 12)
>> -      {
>> -    a += (k[0] +((hashval_t)k[1]<<8) +((hashval_t)k[2]<<16) +((hashval_t)k[3]<<24));
>> -    b += (k[4] +((hashval_t)k[5]<<8) +((hashval_t)k[6]<<16) +((hashval_t)k[7]<<24));
>> -    c += (k[8] +((hashval_t)k[9]<<8) +((hashval_t)k[10]<<16)+((hashval_t)k[11]<<24));
>> -    mix(a,b,c);
>> -    k += 12; len -= 12;
>> -      }
>> -
>> -  /*------------------------------------- handle the last 11 bytes */
>> -  c += length;
>> -  switch(len)              /* all the case statements fall through */
>> -    {
>> -    case 11: c+=((hashval_t)k[10]<<24);    /* fall through */
>> -    case 10: c+=((hashval_t)k[9]<<16);    /* fall through */
>> -    case 9 : c+=((hashval_t)k[8]<<8);    /* fall through */
>> -      /* the first byte of c is reserved for the length */
>> -    case 8 : b+=((hashval_t)k[7]<<24);    /* fall through */
>> -    case 7 : b+=((hashval_t)k[6]<<16);    /* fall through */
>> -    case 6 : b+=((hashval_t)k[5]<<8);    /* fall through */
>> -    case 5 : b+=k[4];            /* fall through */
>> -    case 4 : a+=((hashval_t)k[3]<<24);    /* fall through */
>> -    case 3 : a+=((hashval_t)k[2]<<16);    /* fall through */
>> -    case 2 : a+=((hashval_t)k[1]<<8);    /* fall through */
>> -    case 1 : a+=k[0];
>> -      /* case 0: nothing left to add */
>> -    }
>> -  mix(a,b,c);
>> -  /*-------------------------------------------- report the result */
>> -  return c;
>> -}
>> diff --git a/hashtab.h b/hashtab.h
>> index cb3da01..509e627 100644
>> --- a/hashtab.h
>> +++ b/hashtab.h
>> @@ -153,9 +153,11 @@ extern void    htab_restore (htab_t, const char *, htab_restorefn);
>>
>>   #endif
>>
>> -/* An iterative hash function for arbitrary data.  */
>> -extern hashval_t iterative_hash (const void *, size_t, hashval_t);
>> +#include <xxhash.h>
>> +
>>   /* Shorthand for hashing something with an intrinsic size.  */
>> +#define hash(IN,LEN) XXH64(IN, LEN, 0)
>> +#define iterative_hash(IN,LEN,INIT) XXH64(IN, LEN, INIT)
>>   #define iterative_hash_object(OB,INIT) iterative_hash (&OB, sizeof (OB), INIT)
>>
>>   #ifdef __cplusplus
> 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Use xxHash hashing algorithm.
  2022-04-28 10:06   ` Martin Liška
@ 2022-06-20 13:29     ` Martin Liška
  0 siblings, 0 replies; 13+ messages in thread
From: Martin Liška @ 2022-06-20 13:29 UTC (permalink / raw)
  To: dwz; +Cc: Mark Wielaard

PING^3

On 4/28/22 12:06, Martin Liška wrote:
> PING^2
> 
> On 3/31/22 13:29, Martin Liška wrote:
>>
>> PING^1
>>
>> On 1/5/22 09:17, Martin Liška wrote:
>>> The algorithm provides the following speed-up with -O2:
>>>
>>> - 1/5: sysdig (60M)
>>> dwz_O2                : 9.7
>>> dwz_O2_xxhash         : 8.5 (87.7%)
>>> - 2/5: rtags (58M)
>>> dwz_O2                : 17.6
>>> dwz_O2_xxhash         : 15.8 (89.5%)
>>> - 3/5: libetonyek (91M)
>>> dwz_O2                : 10.8
>>> dwz_O2_xxhash         : 9.4 (86.7%)
>>> - 4/5: krita (685M)
>>> dwz_O2                : 96.0
>>> dwz_O2_xxhash         : 85.6 (89.1%)
>>> - 5/5: gcc (1.2G)
>>> dwz_O2                : 58.6
>>> dwz_O2_xxhash         : 54.1 (92.4%)
>>> ---
>>>   Makefile  |   4 +-
>>>   dwz.c     | 103 +++++++++++++++++++++++-----------------
>>>   hashtab.c | 139 ------------------------------------------------------
>>>   hashtab.h |   6 ++-
>>>   4 files changed, 65 insertions(+), 187 deletions(-)
>>>
>>> diff --git a/Makefile b/Makefile
>>> index 9394ef4..89546c2 100644
>>> --- a/Makefile
>>> +++ b/Makefile
>>> @@ -8,7 +8,7 @@ CFLAGS = -O2 -g
>>>   DWZ_VERSION := $(shell cat $(srcdir)/VERSION)
>>>   CFLAGS_VERSION = -DDWZ_VERSION='"$(DWZ_VERSION)"'
>>>   CFLAGS_COPYRIGHT = $(shell cat $(srcdir)/COPYRIGHT_YEARS)
>>> -CFLAGS_COMMON = -Wall -W -D_FILE_OFFSET_BITS=64
>>> +CFLAGS_COMMON = -Wall -W -D_FILE_OFFSET_BITS=64 -DXXH_INLINE_ALL=1
>>>   override CFLAGS += $(CFLAGS_COMMON) $(CFLAGS_VERSION) $(CFLAGS_COPYRIGHT)
>>>
>>>   prefix = /usr
>>> @@ -17,7 +17,7 @@ bindir = $(exec_prefix)/bin
>>>   datarootdir = $(prefix)/share
>>>   mandir = $(datarootdir)/man
>>>   OBJECTS = args.o dwz.o hashtab.o pool.o sha1.o dwarfnames.o
>>> -LIBS=-lelf
>>> +LIBS=-lelf -lxxhash
>>>   dwz: $(OBJECTS)
>>>       $(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
>>>   args.o: native.o
>>> diff --git a/dwz.c b/dwz.c
>>> index a3b289f..cc68f4a 100644
>>> --- a/dwz.c
>>> +++ b/dwz.c
>>> @@ -112,6 +112,21 @@
>>>   # define NT_GNU_BUILD_ID 3
>>>   #endif
>>>
>>> +/* xxHash state object.  */
>>> +static XXH64_state_t state;
>>> +
>>> +/* Clear xxHash state to zero.  */
>>> +#define hash_init_state() XXH64_reset(&state, 0)
>>> +
>>> +/* Update hash STATE with VALUE.  */
>>> +#define hash_update_state(value) XXH64_update(&state, &value, sizeof value)
>>> +
>>> +/* Update hash STATE with OBJECT that has a provided SIZE.  */
>>> +#define hash_update_state_object(object, size) XXH64_update(&state, object, size)
>>> +
>>> +/* Get digest once we are done with a state.  */
>>> +#define hash_digest() XXH64_digest(&state)
>>> +
>>>   /* Print memory amount M (in kb) in both exact and human readable, like so:
>>>      1382508 (1.3G).  */
>>>   static void
>>> @@ -1187,16 +1202,18 @@ compute_abbrev_hash (struct abbrev_tag *t)
>>>   {
>>>     unsigned int i;
>>>
>>> -  t->hash = iterative_hash_object (t->tag, 0);
>>> -  t->hash = iterative_hash_object (t->nattr, t->hash);
>>> -  t->hash = iterative_hash_object (t->children, t->hash);
>>> +  hash_init_state ();
>>> +  hash_update_state (t->tag);
>>> +  hash_update_state (t->nattr);
>>> +  hash_update_state (t->children);
>>>     for (i = 0; i < t->nattr; i++)
>>>       {
>>> -      t->hash = iterative_hash_object (t->attr[i].attr, t->hash);
>>> -      t->hash = iterative_hash_object (t->attr[i].form, t->hash);
>>> +      hash_update_state (t->attr[i].attr);
>>> +      hash_update_state (t->attr[i].form);
>>>         if (t->attr[i].form == DW_FORM_implicit_const)
>>> -    t->hash = iterative_hash_object (t->values[i], t->hash);
>>> +    hash_update_state (t->values[i]);
>>>       }
>>> +  t->hash = hash_digest ();
>>>   }
>>>
>>>   /* Maximum number of attributes in a DIE.  */
>>> @@ -3402,22 +3419,17 @@ checksum_die (DSO *dso, dw_cu_ref cu, dw_die_ref top_die, dw_die_ref die)
>>>             struct dw_file *cu_file = &cu->cu_files[value - 1];
>>>             size_t file_len = strlen (cu_file->file);
>>>             s = t->attr[i].attr;
>>> -          die->u.p1.die_hash
>>> -            = iterative_hash_object (s, die->u.p1.die_hash);
>>> -          die->u.p1.die_hash
>>> -            = iterative_hash_object (cu_file->time,
>>> -                         die->u.p1.die_hash);
>>> -          die->u.p1.die_hash
>>> -            = iterative_hash_object (cu_file->size,
>>> -                         die->u.p1.die_hash);
>>> -          die->u.p1.die_hash
>>> -            = iterative_hash (cu_file->file, file_len + 1,
>>> -                      die->u.p1.die_hash);
>>> +          hash_init_state ();
>>> +          hash_update_state (die->u.p1.die_hash);
>>> +          hash_update_state (s);
>>> +          hash_update_state (cu_file->time);
>>> +          hash_update_state (cu_file->size);
>>> +          hash_update_state_object (cu_file->file, file_len + 1);
>>>             if (cu_file->dir)
>>> -            die->u.p1.die_hash
>>> -              = iterative_hash (cu_file->dir,
>>> -                    strlen (cu_file->dir) + 1,
>>> -                    die->u.p1.die_hash);
>>> +            {
>>> +              hash_update_state_object (cu_file->dir,
>>> +                        strlen (cu_file->dir) + 1);
>>> +            }
>>>             /* Ignore DW_AT_comp_dir for DW_AT_*_file <built-in>
>>>                etc. if immediately followed by DW_AT_*_line 0.  */
>>>             else if (cu_file->file_angle_brackets_encapsulated_no_slash
>>> @@ -3427,7 +3439,13 @@ checksum_die (DSO *dso, dw_cu_ref cu, dw_die_ref top_die, dw_die_ref die)
>>>                     ? DW_AT_decl_line : DW_AT_call_line)
>>>                  && t->attr[i + 1].form == DW_FORM_data1
>>>                  && *new_ptr == 0)
>>> -            break;
>>> +            {
>>> +              die->u.p1.die_hash = hash_digest ();
>>> +              break;
>>> +            }
>>> +
>>> +          die->u.p1.die_hash = hash_digest ();
>>> +
>>>             if (cu->cu_comp_dir
>>>                 && (cu_file->dir ? cu_file->dir[0]
>>>                          : cu_file->file[0]) != '/')
>>> @@ -5541,7 +5559,7 @@ strp_eq2 (const void *p, const void *q)
>>>   static hashval_t
>>>   strp_hash3 (const void *p)
>>>   {
>>> -  return iterative_hash (p, strlen (p), 0);
>>> +  return hash (p, strlen (p));
>>>   }
>>>
>>>   /* Corresponding equality function in strp_htab.  */
>>> @@ -5571,7 +5589,7 @@ note_strp_offset (unsigned int off)
>>>
>>>         p = debug_sections[DEBUG_STR].data + off;
>>>         len = strlen ((char *) p);
>>> -      hash = iterative_hash (p, len, 0);
>>> +      hash = hash (p, len);
>>>         if (alt_strp_htab)
>>>       {
>>>         if (htab_find_with_hash (alt_strp_htab, p, hash))
>>> @@ -5653,7 +5671,7 @@ lookup_strp_offset (unsigned int off)
>>>
>>>         p = debug_sections[DEBUG_STR].data + off;
>>>         len = strlen ((char *) p);
>>> -      hash = iterative_hash (p, len, 0);
>>> +      hash = hash (p, len);
>>>         if (alt_strp_htab)
>>>       {
>>>         unsigned char *q = (unsigned char *)
>>> @@ -5694,7 +5712,7 @@ note_strp_offset2 (unsigned int off)
>>>       {
>>>         p = debug_sections[DEBUG_STR].data + off;
>>>         len = strlen ((char *) p);
>>> -      hash = iterative_hash (p, len, 0);
>>> +      hash = hash (p, len);
>>>         if (htab_find_with_hash (alt_strp_htab, p, hash))
>>>           return dwarf_5 ? DW_FORM_strp_sup : DW_FORM_GNU_strp_alt;
>>>       }
>>> @@ -5704,7 +5722,7 @@ note_strp_offset2 (unsigned int off)
>>>       return DW_FORM_strp;
>>>     p = debug_sections[DEBUG_STR].data + off;
>>>     q = (unsigned char *) strchr ((char *) p, '\0');
>>> -  hash = iterative_hash (p, q - p, 0);
>>> +  hash = hash (p, q - p);
>>>     se.off = off;
>>>     se.new_off = hash & ~1U;
>>>     struct strp_entry *s = (struct strp_entry *)
>>> @@ -5860,7 +5878,7 @@ finalize_strp (bool build_tail_offset_list)
>>>
>>>       memcpy (p, debug_sections[DEBUG_STR].data + arr[i]->off, len);
>>>       slot = htab_find_slot_with_hash (strp_htab, p,
>>> -                     iterative_hash (p, len - 1, 0),
>>> +                     hash (p, len - 1),
>>>                        INSERT);
>>>       if (slot == NULL)
>>>         dwz_oom ();
>>> @@ -5876,7 +5894,7 @@ finalize_strp (bool build_tail_offset_list)
>>>             if (tail_offset_list != NULL)
>>>           tail_offset_list[k++] = arr[j]->new_off;
>>>             slot = htab_find_slot_with_hash (strp_htab, q,
>>> -                           iterative_hash (q, l - 1, 0),
>>> +                           hash (q, l - 1),
>>>                              INSERT);
>>>             if (slot == NULL)
>>>           dwz_oom ();
>>> @@ -9849,17 +9867,16 @@ line_htab_lookup (dw_cu_ref cu, unsigned int id)
>>>   {
>>>     void **slot;
>>>     struct line_entry le;
>>> -  hashval_t h;
>>> -
>>>     if (id == 0)
>>>       return 0;
>>>     assert (id <= cu->cu_nfiles);
>>>     le.file = &cu->cu_files[id - 1];
>>> -  h = iterative_hash_object (le.file->time, 0);
>>> -  h = iterative_hash_object (le.file->size, h);
>>> -  h = iterative_hash (le.file->file, strlen (le.file->file) + 1, h);
>>> +  hash_init_state ();
>>> +  hash_update_state (le.file->time);
>>> +  hash_update_state (le.file->size);
>>> +  hash_update_state_object (le.file->file, strlen (le.file->file) + 1);
>>>     if (le.file->dir)
>>> -    h = iterative_hash (le.file->dir, strlen (le.file->dir) + 1, h);
>>> +    hash_update_state_object (le.file->dir, strlen (le.file->dir) + 1);
>>>     if (line_htab == NULL)
>>>       {
>>>         line_htab = htab_try_create (50, line_hash, line_eq, NULL);
>>> @@ -9867,15 +9884,15 @@ line_htab_lookup (dw_cu_ref cu, unsigned int id)
>>>       dwz_oom ();
>>>         max_line_id = 1;
>>>       }
>>> -  le.hash = h;
>>> -  slot = htab_find_slot_with_hash (line_htab, &le, h, INSERT);
>>> +  le.hash = hash_digest ();;
>>> +  slot = htab_find_slot_with_hash (line_htab, &le, le.hash, INSERT);
>>>     if (slot == NULL)
>>>       dwz_oom ();
>>>     if (*slot == NULL)
>>>       {
>>>         struct line_entry *l = pool_alloc (line_entry, sizeof (*l));
>>>         l->file = le.file;
>>> -      l->hash = h;
>>> +      l->hash = le.hash;
>>>         l->new_id = max_line_id++;
>>>         *slot = (void *) l;
>>>         return l->new_id;
>>> @@ -10347,7 +10364,7 @@ handle_macro (void)
>>>             /* This should only happen if there were multiple
>>>                same transparent units within a single object file.  */
>>>             && htab_find_with_hash (strp_htab, p,
>>> -                      iterative_hash (p, len, 0)) == NULL)
>>> +                      hash (p, len)) == NULL)
>>>           can_share = false;
>>>             s = ptr;
>>>             break;
>>> @@ -15756,7 +15773,7 @@ optimize_multifile (unsigned int *die_count)
>>>         hashval_t hash;
>>>
>>>         q = (unsigned char *) strchr ((char *) p, '\0');
>>> -      hash = iterative_hash (p, q - p, 0);
>>> +      hash = hash (p, q - p);
>>>         se.off = p - debug_sections[DEBUG_STR].data;
>>>         se.new_off = hash & ~1U;
>>>         slot = htab_find_slot_with_hash (strp_htab, &se, se.new_off, INSERT);
>>> @@ -16076,8 +16093,7 @@ read_multifile (int fd, unsigned int die_count)
>>>           {
>>>             q = (unsigned char *) strchr ((char *) p, '\0') + 1;
>>>             slot = htab_find_slot_with_hash (strp_htab, p,
>>> -                           iterative_hash (p, q - p - 1,
>>> -                                   0), INSERT);
>>> +                           hash (p, q - p - 1), INSERT);
>>>             if (slot == NULL)
>>>           dwz_oom ();
>>>             assert (*slot == NULL);
>>> @@ -16090,8 +16106,7 @@ read_multifile (int fd, unsigned int die_count)
>>>             p = debug_sections[DEBUG_STR].data + *pi;
>>>             q = (unsigned char *) strchr ((char *) p, '\0');
>>>             slot = htab_find_slot_with_hash (strp_htab, p,
>>> -                           iterative_hash (p, q - p,
>>> -                                   0), INSERT);
>>> +                           hash (p, q - p), INSERT);
>>>             if (slot == NULL)
>>>               dwz_oom ();
>>>             assert (*slot == NULL);
>>> diff --git a/hashtab.c b/hashtab.c
>>> index 41eab30..a9b9d13 100644
>>> --- a/hashtab.c
>>> +++ b/hashtab.c
>>> @@ -626,142 +626,3 @@ htab_restore (htab, name, restorefn)
>>>     fclose (f);
>>>   }
>>>   #endif
>>> -
>>> -/* DERIVED FROM:
>>> ---------------------------------------------------------------------
>>> -lookup2.c, by Bob Jenkins, December 1996, Public Domain.
>>> -hash(), hash2(), hash3, and mix() are externally useful functions.
>>> -Routines to test the hash are included if SELF_TEST is defined.
>>> -You can use this free for any purpose.  It has no warranty.
>>> ---------------------------------------------------------------------
>>> -*/
>>> -
>>> -/*
>>> ---------------------------------------------------------------------
>>> -mix -- mix 3 32-bit values reversibly.
>>> -For every delta with one or two bit set, and the deltas of all three
>>> -  high bits or all three low bits, whether the original value of a,b,c
>>> -  is almost all zero or is uniformly distributed,
>>> -* If mix() is run forward or backward, at least 32 bits in a,b,c
>>> -  have at least 1/4 probability of changing.
>>> -* If mix() is run forward, every bit of c will change between 1/3 and
>>> -  2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
>>> -mix() was built out of 36 single-cycle latency instructions in a
>>> -  structure that could supported 2x parallelism, like so:
>>> -      a -= b;
>>> -      a -= c; x = (c>>13);
>>> -      b -= c; a ^= x;
>>> -      b -= a; x = (a<<8);
>>> -      c -= a; b ^= x;
>>> -      c -= b; x = (b>>13);
>>> -      ...
>>> -  Unfortunately, superscalar Pentiums and Sparcs can't take advantage
>>> -  of that parallelism.  They've also turned some of those single-cycle
>>> -  latency instructions into multi-cycle latency instructions.  Still,
>>> -  this is the fastest good hash I could find.  There were about 2^^68
>>> -  to choose from.  I only looked at a billion or so.
>>> ---------------------------------------------------------------------
>>> -*/
>>> -/* same, but slower, works on systems that might have 8 byte hashval_t's */
>>> -#define mix(a,b,c) \
>>> -{ \
>>> -  a -= b; a -= c; a ^= (c>>13); \
>>> -  b -= c; b -= a; b ^= (a<< 8); \
>>> -  c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
>>> -  a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
>>> -  b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
>>> -  c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
>>> -  a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
>>> -  b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
>>> -  c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
>>> -}
>>> -
>>> -/*
>>> ---------------------------------------------------------------------
>>> -hash() -- hash a variable-length key into a 32-bit value
>>> -  k     : the key (the unaligned variable-length array of bytes)
>>> -  len   : the length of the key, counting by bytes
>>> -  level : can be any 4-byte value
>>> -Returns a 32-bit value.  Every bit of the key affects every bit of
>>> -the return value.  Every 1-bit and 2-bit delta achieves avalanche.
>>> -About 36+6len instructions.
>>> -
>>> -The best hash table sizes are powers of 2.  There is no need to do
>>> -mod a prime (mod is sooo slow!).  If you need less than 32 bits,
>>> -use a bitmask.  For example, if you need only 10 bits, do
>>> -  h = (h & hashmask(10));
>>> -In which case, the hash table should have hashsize(10) elements.
>>> -
>>> -If you are hashing n strings (ub1 **)k, do it like this:
>>> -  for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
>>> -
>>> -By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
>>> -code any way you wish, private, educational, or commercial.  It's free.
>>> -
>>> -See http://burtleburtle.net/bob/hash/evahash.html
>>> -Use for hash table lookup, or anything where one collision in 2^32 is
>>> -acceptable.  Do NOT use for cryptographic purposes.
>>> ---------------------------------------------------------------------
>>> -*/
>>> -
>>> -hashval_t
>>> -iterative_hash (const void *k_in /* the key */,
>>> -                register size_t  length /* the length of the key */,
>>> -                register hashval_t initval /* the previous hash, or
>>> -                                              an arbitrary value */)
>>> -{
>>> -  register const unsigned char *k = (const unsigned char *)k_in;
>>> -  register hashval_t a,b,c,len;
>>> -
>>> -  /* Set up the internal state */
>>> -  len = length;
>>> -  a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
>>> -  c = initval;           /* the previous hash value */
>>> -
>>> -  /*---------------------------------------- handle most of the key */
>>> -#ifndef WORDS_BIGENDIAN
>>> -  /* On a little-endian machine, if the data is 4-byte aligned we can hash
>>> -     by word for better speed.  This gives nondeterministic results on
>>> -     big-endian machines.  */
>>> -  if (sizeof (hashval_t) == 4 && (((size_t)k)&3) == 0)
>>> -    while (len >= 12)    /* aligned */
>>> -      {
>>> -    a += *(hashval_t *)(k+0);
>>> -    b += *(hashval_t *)(k+4);
>>> -    c += *(hashval_t *)(k+8);
>>> -    mix(a,b,c);
>>> -    k += 12; len -= 12;
>>> -      }
>>> -  else /* unaligned */
>>> -#endif
>>> -    while (len >= 12)
>>> -      {
>>> -    a += (k[0] +((hashval_t)k[1]<<8) +((hashval_t)k[2]<<16) +((hashval_t)k[3]<<24));
>>> -    b += (k[4] +((hashval_t)k[5]<<8) +((hashval_t)k[6]<<16) +((hashval_t)k[7]<<24));
>>> -    c += (k[8] +((hashval_t)k[9]<<8) +((hashval_t)k[10]<<16)+((hashval_t)k[11]<<24));
>>> -    mix(a,b,c);
>>> -    k += 12; len -= 12;
>>> -      }
>>> -
>>> -  /*------------------------------------- handle the last 11 bytes */
>>> -  c += length;
>>> -  switch(len)              /* all the case statements fall through */
>>> -    {
>>> -    case 11: c+=((hashval_t)k[10]<<24);    /* fall through */
>>> -    case 10: c+=((hashval_t)k[9]<<16);    /* fall through */
>>> -    case 9 : c+=((hashval_t)k[8]<<8);    /* fall through */
>>> -      /* the first byte of c is reserved for the length */
>>> -    case 8 : b+=((hashval_t)k[7]<<24);    /* fall through */
>>> -    case 7 : b+=((hashval_t)k[6]<<16);    /* fall through */
>>> -    case 6 : b+=((hashval_t)k[5]<<8);    /* fall through */
>>> -    case 5 : b+=k[4];            /* fall through */
>>> -    case 4 : a+=((hashval_t)k[3]<<24);    /* fall through */
>>> -    case 3 : a+=((hashval_t)k[2]<<16);    /* fall through */
>>> -    case 2 : a+=((hashval_t)k[1]<<8);    /* fall through */
>>> -    case 1 : a+=k[0];
>>> -      /* case 0: nothing left to add */
>>> -    }
>>> -  mix(a,b,c);
>>> -  /*-------------------------------------------- report the result */
>>> -  return c;
>>> -}
>>> diff --git a/hashtab.h b/hashtab.h
>>> index cb3da01..509e627 100644
>>> --- a/hashtab.h
>>> +++ b/hashtab.h
>>> @@ -153,9 +153,11 @@ extern void    htab_restore (htab_t, const char *, htab_restorefn);
>>>
>>>   #endif
>>>
>>> -/* An iterative hash function for arbitrary data.  */
>>> -extern hashval_t iterative_hash (const void *, size_t, hashval_t);
>>> +#include <xxhash.h>
>>> +
>>>   /* Shorthand for hashing something with an intrinsic size.  */
>>> +#define hash(IN,LEN) XXH64(IN, LEN, 0)
>>> +#define iterative_hash(IN,LEN,INIT) XXH64(IN, LEN, INIT)
>>>   #define iterative_hash_object(OB,INIT) iterative_hash (&OB, sizeof (OB), INIT)
>>>
>>>   #ifdef __cplusplus
>>
> 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Use xxHash hashing algorithm.
  2022-01-05  8:17 [PATCH] Use xxHash hashing algorithm Martin Liška
  2022-03-31 11:29 ` Martin Liška
@ 2022-06-25 19:44 ` Mark Wielaard
  2022-06-27 13:41   ` Martin Liška
  1 sibling, 1 reply; 13+ messages in thread
From: Mark Wielaard @ 2022-06-25 19:44 UTC (permalink / raw)
  To: Martin Liška; +Cc: dwz

Hi Martin,

Could you resent this patch with git send-email or rebase it against
current master. As is it doesn't apply giving errors when trying to
use it with git am.

BTW. One of the reasons that I haven't reviewed things before is
because I am getting one or more testsuite FAILs with newer Fedora
versions. Which makes checking for regressions somewhat hard.

On Wed, Jan 05, 2022 at 09:17:37AM +0100, Martin Liška wrote:
> The algorithm provides the following speed-up with -O2:
> 
> - 1/5: sysdig (60M)
> dwz_O2                : 9.7
> dwz_O2_xxhash         : 8.5 (87.7%)
> - 2/5: rtags (58M)
> dwz_O2                : 17.6
> dwz_O2_xxhash         : 15.8 (89.5%)
> - 3/5: libetonyek (91M)
> dwz_O2                : 10.8
> dwz_O2_xxhash         : 9.4 (86.7%)
> - 4/5: krita (685M)
> dwz_O2                : 96.0
> dwz_O2_xxhash         : 85.6 (89.1%)
> - 5/5: gcc (1.2G)
> dwz_O2                : 58.6
> dwz_O2_xxhash         : 54.1 (92.4%)

So a speedup of ~10%. Nice.

> ---
>  Makefile  |   4 +-
>  dwz.c     | 103 +++++++++++++++++++++++-----------------
>  hashtab.c | 139 ------------------------------------------------------
>  hashtab.h |   6 ++-
>  4 files changed, 65 insertions(+), 187 deletions(-)

We probably also need a configure change to detect whether xxhash is
actually available.

> diff --git a/Makefile b/Makefile
> index 9394ef4..89546c2 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -8,7 +8,7 @@ CFLAGS = -O2 -g
>  DWZ_VERSION := $(shell cat $(srcdir)/VERSION)
>  CFLAGS_VERSION = -DDWZ_VERSION='"$(DWZ_VERSION)"'
>  CFLAGS_COPYRIGHT = $(shell cat $(srcdir)/COPYRIGHT_YEARS)
> -CFLAGS_COMMON = -Wall -W -D_FILE_OFFSET_BITS=64
> +CFLAGS_COMMON = -Wall -W -D_FILE_OFFSET_BITS=64 -DXXH_INLINE_ALL=1
>  override CFLAGS += $(CFLAGS_COMMON) $(CFLAGS_VERSION) $(CFLAGS_COPYRIGHT)
>  prefix = /usr
> @@ -17,7 +17,7 @@ bindir = $(exec_prefix)/bin
>  datarootdir = $(prefix)/share
>  mandir = $(datarootdir)/man
>  OBJECTS = args.o dwz.o hashtab.o pool.o sha1.o dwarfnames.o
> -LIBS=-lelf
> +LIBS=-lelf -lxxhash
>  dwz: $(OBJECTS)
>  	$(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
>  args.o: native.o

So we use XXH_INLINE_ALL. Do we then still need to link with -lxxhash?

> diff --git a/dwz.c b/dwz.c
> index a3b289f..cc68f4a 100644
> --- a/dwz.c
> +++ b/dwz.c
> @@ -112,6 +112,21 @@
>  # define NT_GNU_BUILD_ID 3
>  #endif
> +/* xxHash state object.  */
> +static XXH64_state_t state;
> +
> +/* Clear xxHash state to zero.  */
> +#define hash_init_state() XXH64_reset(&state, 0)
> +
> +/* Update hash STATE with VALUE.  */
> +#define hash_update_state(value) XXH64_update(&state, &value, sizeof value)
> +
> +/* Update hash STATE with OBJECT that has a provided SIZE.  */
> +#define hash_update_state_object(object, size) XXH64_update(&state, object, size)
> +
> +/* Get digest once we are done with a state.  */
> +#define hash_digest() XXH64_digest(&state)

Allocating and reusing the global state is convenient, but might make
parellizing dwz somewhat more complicated. Was this design used
deliberately, or can we easily switch to stack allocating the state?

Also I find the naming slightly confusing.  We used
iterative_hash_object to update a hash with an integral value, and
iterative_hash to update a hash with a size plus pointer. Now it seems
we swap the meaning. iterative_hash gets replace by
hash_update_state_object. And iterative_hash_object gets replaced by
hash_update_state. Was the meaning of "object" deliberately swapped?

Also it seems iterative_hash_object with inital zero value is replaced
by hash, which isn't defined here but in hashtab.h. Why the split in
definitions?

>  /* Print memory amount M (in kb) in both exact and human readable, like so:
>     1382508 (1.3G).  */
>  static void
> @@ -1187,16 +1202,18 @@ compute_abbrev_hash (struct abbrev_tag *t)
>  {
>    unsigned int i;
> -  t->hash = iterative_hash_object (t->tag, 0);
> -  t->hash = iterative_hash_object (t->nattr, t->hash);
> -  t->hash = iterative_hash_object (t->children, t->hash);
> +  hash_init_state ();
> +  hash_update_state (t->tag);
> +  hash_update_state (t->nattr);
> +  hash_update_state (t->children);
>    for (i = 0; i < t->nattr; i++)
>      {
> -      t->hash = iterative_hash_object (t->attr[i].attr, t->hash);
> -      t->hash = iterative_hash_object (t->attr[i].form, t->hash);
> +      hash_update_state (t->attr[i].attr);
> +      hash_update_state (t->attr[i].form);
>        if (t->attr[i].form == DW_FORM_implicit_const)
> -	t->hash = iterative_hash_object (t->values[i], t->hash);
> +	hash_update_state (t->values[i]);
>      }
> +  t->hash = hash_digest ();
>  }

OK.

>  /* Maximum number of attributes in a DIE.  */
> @@ -3402,22 +3419,17 @@ checksum_die (DSO *dso, dw_cu_ref cu, dw_die_ref top_die, dw_die_ref die)
>  		  struct dw_file *cu_file = &cu->cu_files[value - 1];
>  		  size_t file_len = strlen (cu_file->file);
>  		  s = t->attr[i].attr;
> -		  die->u.p1.die_hash
> -		    = iterative_hash_object (s, die->u.p1.die_hash);
> -		  die->u.p1.die_hash
> -		    = iterative_hash_object (cu_file->time,
> -					     die->u.p1.die_hash);
> -		  die->u.p1.die_hash
> -		    = iterative_hash_object (cu_file->size,
> -					     die->u.p1.die_hash);
> -		  die->u.p1.die_hash
> -		    = iterative_hash (cu_file->file, file_len + 1,
> -				      die->u.p1.die_hash);
> +		  hash_init_state ();
> +		  hash_update_state (die->u.p1.die_hash);
> +		  hash_update_state (s);
> +		  hash_update_state (cu_file->time);
> +		  hash_update_state (cu_file->size);
> +		  hash_update_state_object (cu_file->file, file_len + 1);

OK.

>  		  if (cu_file->dir)
> -		    die->u.p1.die_hash
> -		      = iterative_hash (cu_file->dir,
> -					strlen (cu_file->dir) + 1,
> -					die->u.p1.die_hash);
> +		    {
> +		      hash_update_state_object (cu_file->dir,
> +						strlen (cu_file->dir) + 1);
> +		    }

OK, assuming the die->u.p1.die_hash below are alwasy done/correct.
But brackets aren't really needed.

>  		  /* Ignore DW_AT_comp_dir for DW_AT_*_file <built-in>
>  		     etc. if immediately followed by DW_AT_*_line 0.  */
>  		  else if (cu_file->file_angle_brackets_encapsulated_no_slash
> @@ -3427,7 +3439,13 @@ checksum_die (DSO *dso, dw_cu_ref cu, dw_die_ref top_die, dw_die_ref die)
>  				  ? DW_AT_decl_line : DW_AT_call_line)
>  			   && t->attr[i + 1].form == DW_FORM_data1
>  			   && *new_ptr == 0)
> -		    break;
> +		    {
> +		      die->u.p1.die_hash = hash_digest ();
> +		      break;
> +		    }
> +
> +		  die->u.p1.die_hash = hash_digest ();

I find the logic hard to follow here. What makes sure the
die->u.p1.die_hash is always updated? Isn't it updated twice
(redundantly) in the above case? And needless brackets.

>  		  if (cu->cu_comp_dir
>  		      && (cu_file->dir ? cu_file->dir[0]
>  				       : cu_file->file[0]) != '/')
> @@ -5541,7 +5559,7 @@ strp_eq2 (const void *p, const void *q)
>  static hashval_t
>  strp_hash3 (const void *p)
>  {
> -  return iterative_hash (p, strlen (p), 0);
> +  return hash (p, strlen (p));
>  }
>  /* Corresponding equality function in strp_htab.  */

OK.

> @@ -5571,7 +5589,7 @@ note_strp_offset (unsigned int off)
>        p = debug_sections[DEBUG_STR].data + off;
>        len = strlen ((char *) p);
> -      hash = iterative_hash (p, len, 0);
> +      hash = hash (p, len);
>        if (alt_strp_htab)
>  	{
>  	  if (htab_find_with_hash (alt_strp_htab, p, hash))

OK.

> @@ -5653,7 +5671,7 @@ lookup_strp_offset (unsigned int off)
>        p = debug_sections[DEBUG_STR].data + off;
>        len = strlen ((char *) p);
> -      hash = iterative_hash (p, len, 0);
> +      hash = hash (p, len);
>        if (alt_strp_htab)
>  	{
>  	  unsigned char *q = (unsigned char *)

OK.

> @@ -5694,7 +5712,7 @@ note_strp_offset2 (unsigned int off)
>  	{
>  	  p = debug_sections[DEBUG_STR].data + off;
>  	  len = strlen ((char *) p);
> -	  hash = iterative_hash (p, len, 0);
> +	  hash = hash (p, len);
>  	  if (htab_find_with_hash (alt_strp_htab, p, hash))
>  	    return dwarf_5 ? DW_FORM_strp_sup : DW_FORM_GNU_strp_alt;
>  	}

OK.

> @@ -5704,7 +5722,7 @@ note_strp_offset2 (unsigned int off)
>      return DW_FORM_strp;
>    p = debug_sections[DEBUG_STR].data + off;
>    q = (unsigned char *) strchr ((char *) p, '\0');
> -  hash = iterative_hash (p, q - p, 0);
> +  hash = hash (p, q - p);
>    se.off = off;
>    se.new_off = hash & ~1U;
>    struct strp_entry *s = (struct strp_entry *)

OK.

> @@ -5860,7 +5878,7 @@ finalize_strp (bool build_tail_offset_list)
>  	memcpy (p, debug_sections[DEBUG_STR].data + arr[i]->off, len);
>  	slot = htab_find_slot_with_hash (strp_htab, p,
> -					 iterative_hash (p, len - 1, 0),
> +					 hash (p, len - 1),
>  					 INSERT);
>  	if (slot == NULL)
>  	  dwz_oom ();

OK.

> @@ -5876,7 +5894,7 @@ finalize_strp (bool build_tail_offset_list)
>  	      if (tail_offset_list != NULL)
>  		tail_offset_list[k++] = arr[j]->new_off;
>  	      slot = htab_find_slot_with_hash (strp_htab, q,
> -					       iterative_hash (q, l - 1, 0),
> +					       hash (q, l - 1),
>  					       INSERT);
>  	      if (slot == NULL)
>  		dwz_oom ();

OK.

> @@ -9849,17 +9867,16 @@ line_htab_lookup (dw_cu_ref cu, unsigned int id)
>  {
>    void **slot;
>    struct line_entry le;
> -  hashval_t h;
> -
>    if (id == 0)
>      return 0;
>    assert (id <= cu->cu_nfiles);
>    le.file = &cu->cu_files[id - 1];
> -  h = iterative_hash_object (le.file->time, 0);
> -  h = iterative_hash_object (le.file->size, h);
> -  h = iterative_hash (le.file->file, strlen (le.file->file) + 1, h);
> +  hash_init_state ();
> +  hash_update_state (le.file->time);
> +  hash_update_state (le.file->size);
> +  hash_update_state_object (le.file->file, strlen (le.file->file) + 1);
>    if (le.file->dir)
> -    h = iterative_hash (le.file->dir, strlen (le.file->dir) + 1, h);
> +    hash_update_state_object (le.file->dir, strlen (le.file->dir) + 1);
>    if (line_htab == NULL)
>      {
>        line_htab = htab_try_create (50, line_hash, line_eq, NULL);
> @@ -9867,15 +9884,15 @@ line_htab_lookup (dw_cu_ref cu, unsigned int id)
>  	dwz_oom ();
>        max_line_id = 1;
>      }
> -  le.hash = h;
> -  slot = htab_find_slot_with_hash (line_htab, &le, h, INSERT);
> +  le.hash = hash_digest ();;
> +  slot = htab_find_slot_with_hash (line_htab, &le, le.hash, INSERT);

OK, but double ;;

>    if (slot == NULL)
>      dwz_oom ();
>    if (*slot == NULL)
>      {
>        struct line_entry *l = pool_alloc (line_entry, sizeof (*l));
>        l->file = le.file;
> -      l->hash = h;
> +      l->hash = le.hash;
>        l->new_id = max_line_id++;
>        *slot = (void *) l;
>        return l->new_id;

OK.

> @@ -10347,7 +10364,7 @@ handle_macro (void)
>  		  /* This should only happen if there were multiple
>  		     same transparent units within a single object file.  */
>  		  && htab_find_with_hash (strp_htab, p,
> -					  iterative_hash (p, len, 0)) == NULL)
> +					  hash (p, len)) == NULL)
>  		can_share = false;
>  	      s = ptr;
>  	      break;

OK.

> @@ -15756,7 +15773,7 @@ optimize_multifile (unsigned int *die_count)
>  	  hashval_t hash;
>  	  q = (unsigned char *) strchr ((char *) p, '\0');
> -	  hash = iterative_hash (p, q - p, 0);
> +	  hash = hash (p, q - p);
>  	  se.off = p - debug_sections[DEBUG_STR].data;
>  	  se.new_off = hash & ~1U;
>  	  slot = htab_find_slot_with_hash (strp_htab, &se, se.new_off, INSERT);

OK.

> @@ -16076,8 +16093,7 @@ read_multifile (int fd, unsigned int die_count)
>  	    {
>  	      q = (unsigned char *) strchr ((char *) p, '\0') + 1;
>  	      slot = htab_find_slot_with_hash (strp_htab, p,
> -					       iterative_hash (p, q - p - 1,
> -							       0), INSERT);
> +					       hash (p, q - p - 1), INSERT);
>  	      if (slot == NULL)
>  		dwz_oom ();
>  	      assert (*slot == NULL);

OK.

> @@ -16090,8 +16106,7 @@ read_multifile (int fd, unsigned int die_count)
>  		  p = debug_sections[DEBUG_STR].data + *pi;
>  		  q = (unsigned char *) strchr ((char *) p, '\0');
>  		  slot = htab_find_slot_with_hash (strp_htab, p,
> -						   iterative_hash (p, q - p,
> -								   0), INSERT);
> +						   hash (p, q - p), INSERT);
>  		  if (slot == NULL)
>  		    dwz_oom ();
>  		  assert (*slot == NULL);

OK.

> diff --git a/hashtab.c b/hashtab.c
> index 41eab30..a9b9d13 100644
> --- a/hashtab.c
> +++ b/hashtab.c
> @@ -626,142 +626,3 @@ htab_restore (htab, name, restorefn)
>    fclose (f);
>  }
>  #endif
> -
> -/* DERIVED FROM:
> ---------------------------------------------------------------------
> -lookup2.c, by Bob Jenkins, December 1996, Public Domain.
> -hash(), hash2(), hash3, and mix() are externally useful functions.
> -Routines to test the hash are included if SELF_TEST is defined.
> -You can use this free for any purpose.  It has no warranty.
> ---------------------------------------------------------------------
> -*/
> -
> -/*
> ---------------------------------------------------------------------
> -mix -- mix 3 32-bit values reversibly.
> -For every delta with one or two bit set, and the deltas of all three
> -  high bits or all three low bits, whether the original value of a,b,c
> -  is almost all zero or is uniformly distributed,
> -* If mix() is run forward or backward, at least 32 bits in a,b,c
> -  have at least 1/4 probability of changing.
> -* If mix() is run forward, every bit of c will change between 1/3 and
> -  2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
> -mix() was built out of 36 single-cycle latency instructions in a
> -  structure that could supported 2x parallelism, like so:
> -      a -= b;
> -      a -= c; x = (c>>13);
> -      b -= c; a ^= x;
> -      b -= a; x = (a<<8);
> -      c -= a; b ^= x;
> -      c -= b; x = (b>>13);
> -      ...
> -  Unfortunately, superscalar Pentiums and Sparcs can't take advantage
> -  of that parallelism.  They've also turned some of those single-cycle
> -  latency instructions into multi-cycle latency instructions.  Still,
> -  this is the fastest good hash I could find.  There were about 2^^68
> -  to choose from.  I only looked at a billion or so.
> ---------------------------------------------------------------------
> -*/
> -/* same, but slower, works on systems that might have 8 byte hashval_t's */
> -#define mix(a,b,c) \
> -{ \
> -  a -= b; a -= c; a ^= (c>>13); \
> -  b -= c; b -= a; b ^= (a<< 8); \
> -  c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
> -  a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
> -  b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
> -  c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
> -  a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
> -  b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
> -  c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
> -}
> -
> -/*
> ---------------------------------------------------------------------
> -hash() -- hash a variable-length key into a 32-bit value
> -  k     : the key (the unaligned variable-length array of bytes)
> -  len   : the length of the key, counting by bytes
> -  level : can be any 4-byte value
> -Returns a 32-bit value.  Every bit of the key affects every bit of
> -the return value.  Every 1-bit and 2-bit delta achieves avalanche.
> -About 36+6len instructions.
> -
> -The best hash table sizes are powers of 2.  There is no need to do
> -mod a prime (mod is sooo slow!).  If you need less than 32 bits,
> -use a bitmask.  For example, if you need only 10 bits, do
> -  h = (h & hashmask(10));
> -In which case, the hash table should have hashsize(10) elements.
> -
> -If you are hashing n strings (ub1 **)k, do it like this:
> -  for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
> -
> -By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
> -code any way you wish, private, educational, or commercial.  It's free.
> -
> -See http://burtleburtle.net/bob/hash/evahash.html
> -Use for hash table lookup, or anything where one collision in 2^32 is
> -acceptable.  Do NOT use for cryptographic purposes.
> ---------------------------------------------------------------------
> -*/
> -
> -hashval_t
> -iterative_hash (const void *k_in /* the key */,
> -                register size_t  length /* the length of the key */,
> -                register hashval_t initval /* the previous hash, or
> -                                              an arbitrary value */)
> -{
> -  register const unsigned char *k = (const unsigned char *)k_in;
> -  register hashval_t a,b,c,len;
> -
> -  /* Set up the internal state */
> -  len = length;
> -  a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
> -  c = initval;           /* the previous hash value */
> -
> -  /*---------------------------------------- handle most of the key */
> -#ifndef WORDS_BIGENDIAN
> -  /* On a little-endian machine, if the data is 4-byte aligned we can hash
> -     by word for better speed.  This gives nondeterministic results on
> -     big-endian machines.  */
> -  if (sizeof (hashval_t) == 4 && (((size_t)k)&3) == 0)
> -    while (len >= 12)    /* aligned */
> -      {
> -	a += *(hashval_t *)(k+0);
> -	b += *(hashval_t *)(k+4);
> -	c += *(hashval_t *)(k+8);
> -	mix(a,b,c);
> -	k += 12; len -= 12;
> -      }
> -  else /* unaligned */
> -#endif
> -    while (len >= 12)
> -      {
> -	a += (k[0] +((hashval_t)k[1]<<8) +((hashval_t)k[2]<<16) +((hashval_t)k[3]<<24));
> -	b += (k[4] +((hashval_t)k[5]<<8) +((hashval_t)k[6]<<16) +((hashval_t)k[7]<<24));
> -	c += (k[8] +((hashval_t)k[9]<<8) +((hashval_t)k[10]<<16)+((hashval_t)k[11]<<24));
> -	mix(a,b,c);
> -	k += 12; len -= 12;
> -      }
> -
> -  /*------------------------------------- handle the last 11 bytes */
> -  c += length;
> -  switch(len)              /* all the case statements fall through */
> -    {
> -    case 11: c+=((hashval_t)k[10]<<24);	/* fall through */
> -    case 10: c+=((hashval_t)k[9]<<16);	/* fall through */
> -    case 9 : c+=((hashval_t)k[8]<<8);	/* fall through */
> -      /* the first byte of c is reserved for the length */
> -    case 8 : b+=((hashval_t)k[7]<<24);	/* fall through */
> -    case 7 : b+=((hashval_t)k[6]<<16);	/* fall through */
> -    case 6 : b+=((hashval_t)k[5]<<8);	/* fall through */
> -    case 5 : b+=k[4];			/* fall through */
> -    case 4 : a+=((hashval_t)k[3]<<24);	/* fall through */
> -    case 3 : a+=((hashval_t)k[2]<<16);	/* fall through */
> -    case 2 : a+=((hashval_t)k[1]<<8);	/* fall through */
> -    case 1 : a+=k[0];
> -      /* case 0: nothing left to add */
> -    }
> -  mix(a,b,c);
> -  /*-------------------------------------------- report the result */
> -  return c;
> -}
> diff --git a/hashtab.h b/hashtab.h
> index cb3da01..509e627 100644
> --- a/hashtab.h
> +++ b/hashtab.h
> @@ -153,9 +153,11 @@ extern void	htab_restore (htab_t, const char *, htab_restorefn);
>  #endif
> -/* An iterative hash function for arbitrary data.  */
> -extern hashval_t iterative_hash (const void *, size_t, hashval_t);
> +#include <xxhash.h>
> +
>  /* Shorthand for hashing something with an intrinsic size.  */
> +#define hash(IN,LEN) XXH64(IN, LEN, 0)
> +#define iterative_hash(IN,LEN,INIT) XXH64(IN, LEN, INIT)
>  #define iterative_hash_object(OB,INIT) iterative_hash (&OB, sizeof (OB), INIT)
>  #ifdef __cplusplus

See above for the question why everything hash related isn't defined
in the same place.

How much of hashtab.{c,h} are we still using?

Thanks,

Mark

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Use xxHash hashing algorithm.
  2022-06-25 19:44 ` Mark Wielaard
@ 2022-06-27 13:41   ` Martin Liška
  2022-06-30 16:13     ` Mark Wielaard
  0 siblings, 1 reply; 13+ messages in thread
From: Martin Liška @ 2022-06-27 13:41 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: dwz

[-- Attachment #1: Type: text/plain, Size: 20786 bytes --]

On 6/25/22 21:44, Mark Wielaard wrote:
> Hi Martin,

Hello.

> 
> Could you resent this patch with git send-email or rebase it against
> current master. As is it doesn't apply giving errors when trying to
> use it with git am.

Sure, rebased and attached to this email.

> 
> BTW. One of the reasons that I haven't reviewed things before is
> because I am getting one or more testsuite FAILs with newer Fedora
> versions. Which makes checking for regressions somewhat hard.
> 
> On Wed, Jan 05, 2022 at 09:17:37AM +0100, Martin Liška wrote:
>> The algorithm provides the following speed-up with -O2:
>>
>> - 1/5: sysdig (60M)
>> dwz_O2                : 9.7
>> dwz_O2_xxhash         : 8.5 (87.7%)
>> - 2/5: rtags (58M)
>> dwz_O2                : 17.6
>> dwz_O2_xxhash         : 15.8 (89.5%)
>> - 3/5: libetonyek (91M)
>> dwz_O2                : 10.8
>> dwz_O2_xxhash         : 9.4 (86.7%)
>> - 4/5: krita (685M)
>> dwz_O2                : 96.0
>> dwz_O2_xxhash         : 85.6 (89.1%)
>> - 5/5: gcc (1.2G)
>> dwz_O2                : 58.6
>> dwz_O2_xxhash         : 54.1 (92.4%)
> 
> So a speedup of ~10%. Nice.
> 
>> ---
>>  Makefile  |   4 +-
>>  dwz.c     | 103 +++++++++++++++++++++++-----------------
>>  hashtab.c | 139 ------------------------------------------------------
>>  hashtab.h |   6 ++-
>>  4 files changed, 65 insertions(+), 187 deletions(-)
> 
> We probably also need a configure change to detect whether xxhash is
> actually available.

With my patch, one will see a compilation error. Can you please help me with
the configure detection?

> 
>> diff --git a/Makefile b/Makefile
>> index 9394ef4..89546c2 100644
>> --- a/Makefile
>> +++ b/Makefile
>> @@ -8,7 +8,7 @@ CFLAGS = -O2 -g
>>  DWZ_VERSION := $(shell cat $(srcdir)/VERSION)
>>  CFLAGS_VERSION = -DDWZ_VERSION='"$(DWZ_VERSION)"'
>>  CFLAGS_COPYRIGHT = $(shell cat $(srcdir)/COPYRIGHT_YEARS)
>> -CFLAGS_COMMON = -Wall -W -D_FILE_OFFSET_BITS=64
>> +CFLAGS_COMMON = -Wall -W -D_FILE_OFFSET_BITS=64 -DXXH_INLINE_ALL=1
>>  override CFLAGS += $(CFLAGS_COMMON) $(CFLAGS_VERSION) $(CFLAGS_COPYRIGHT)
>>  prefix = /usr
>> @@ -17,7 +17,7 @@ bindir = $(exec_prefix)/bin
>>  datarootdir = $(prefix)/share
>>  mandir = $(datarootdir)/man
>>  OBJECTS = args.o dwz.o hashtab.o pool.o sha1.o dwarfnames.o
>> -LIBS=-lelf
>> +LIBS=-lelf -lxxhash
>>  dwz: $(OBJECTS)
>>  	$(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
>>  args.o: native.o
> 
> So we use XXH_INLINE_ALL. Do we then still need to link with -lxxhash?

Correct, it's not needed.

> 
>> diff --git a/dwz.c b/dwz.c
>> index a3b289f..cc68f4a 100644
>> --- a/dwz.c
>> +++ b/dwz.c
>> @@ -112,6 +112,21 @@
>>  # define NT_GNU_BUILD_ID 3
>>  #endif
>> +/* xxHash state object.  */
>> +static XXH64_state_t state;
>> +
>> +/* Clear xxHash state to zero.  */
>> +#define hash_init_state() XXH64_reset(&state, 0)
>> +
>> +/* Update hash STATE with VALUE.  */
>> +#define hash_update_state(value) XXH64_update(&state, &value, sizeof value)
>> +
>> +/* Update hash STATE with OBJECT that has a provided SIZE.  */
>> +#define hash_update_state_object(object, size) XXH64_update(&state, object, size)
>> +
>> +/* Get digest once we are done with a state.  */
>> +#define hash_digest() XXH64_digest(&state)
> 
> Allocating and reusing the global state is convenient, but might make
> parellizing dwz somewhat more complicated. Was this design used
> deliberately, or can we easily switch to stack allocating the state?

Yes, but the alternative approach would be making it a stack
variable in each function where we use it. Note right now, processes are used
for parallel execution.

> 
> Also I find the naming slightly confusing.  We used
> iterative_hash_object to update a hash with an integral value, and
> iterative_hash to update a hash with a size plus pointer. Now it seems
> we swap the meaning. iterative_hash gets replace by
> hash_update_state_object. And iterative_hash_object gets replaced by
> hash_update_state. Was the meaning of "object" deliberately swapped?

No, fixed that.

> 
> Also it seems iterative_hash_object with inital zero value is replaced
> by hash, which isn't defined here but in hashtab.h. Why the split in
> definitions?

All moved to dwz.c right now.

> 
>>  /* Print memory amount M (in kb) in both exact and human readable, like so:
>>     1382508 (1.3G).  */
>>  static void
>> @@ -1187,16 +1202,18 @@ compute_abbrev_hash (struct abbrev_tag *t)
>>  {
>>    unsigned int i;
>> -  t->hash = iterative_hash_object (t->tag, 0);
>> -  t->hash = iterative_hash_object (t->nattr, t->hash);
>> -  t->hash = iterative_hash_object (t->children, t->hash);
>> +  hash_init_state ();
>> +  hash_update_state (t->tag);
>> +  hash_update_state (t->nattr);
>> +  hash_update_state (t->children);
>>    for (i = 0; i < t->nattr; i++)
>>      {
>> -      t->hash = iterative_hash_object (t->attr[i].attr, t->hash);
>> -      t->hash = iterative_hash_object (t->attr[i].form, t->hash);
>> +      hash_update_state (t->attr[i].attr);
>> +      hash_update_state (t->attr[i].form);
>>        if (t->attr[i].form == DW_FORM_implicit_const)
>> -	t->hash = iterative_hash_object (t->values[i], t->hash);
>> +	hash_update_state (t->values[i]);
>>      }
>> +  t->hash = hash_digest ();
>>  }
> 
> OK.
> 
>>  /* Maximum number of attributes in a DIE.  */
>> @@ -3402,22 +3419,17 @@ checksum_die (DSO *dso, dw_cu_ref cu, dw_die_ref top_die, dw_die_ref die)
>>  		  struct dw_file *cu_file = &cu->cu_files[value - 1];
>>  		  size_t file_len = strlen (cu_file->file);
>>  		  s = t->attr[i].attr;
>> -		  die->u.p1.die_hash
>> -		    = iterative_hash_object (s, die->u.p1.die_hash);
>> -		  die->u.p1.die_hash
>> -		    = iterative_hash_object (cu_file->time,
>> -					     die->u.p1.die_hash);
>> -		  die->u.p1.die_hash
>> -		    = iterative_hash_object (cu_file->size,
>> -					     die->u.p1.die_hash);
>> -		  die->u.p1.die_hash
>> -		    = iterative_hash (cu_file->file, file_len + 1,
>> -				      die->u.p1.die_hash);
>> +		  hash_init_state ();
>> +		  hash_update_state (die->u.p1.die_hash);
>> +		  hash_update_state (s);
>> +		  hash_update_state (cu_file->time);
>> +		  hash_update_state (cu_file->size);
>> +		  hash_update_state_object (cu_file->file, file_len + 1);
> 
> OK.
> 
>>  		  if (cu_file->dir)
>> -		    die->u.p1.die_hash
>> -		      = iterative_hash (cu_file->dir,
>> -					strlen (cu_file->dir) + 1,
>> -					die->u.p1.die_hash);
>> +		    {
>> +		      hash_update_state_object (cu_file->dir,
>> +						strlen (cu_file->dir) + 1);
>> +		    }
> 
> OK, assuming the die->u.p1.die_hash below are alwasy done/correct.
> But brackets aren't really needed.
> 
>>  		  /* Ignore DW_AT_comp_dir for DW_AT_*_file <built-in>
>>  		     etc. if immediately followed by DW_AT_*_line 0.  */
>>  		  else if (cu_file->file_angle_brackets_encapsulated_no_slash
>> @@ -3427,7 +3439,13 @@ checksum_die (DSO *dso, dw_cu_ref cu, dw_die_ref top_die, dw_die_ref die)
>>  				  ? DW_AT_decl_line : DW_AT_call_line)
>>  			   && t->attr[i + 1].form == DW_FORM_data1
>>  			   && *new_ptr == 0)
>> -		    break;
>> +		    {
>> +		      die->u.p1.die_hash = hash_digest ();
>> +		      break;
>> +		    }
>> +
>> +		  die->u.p1.die_hash = hash_digest ();
> 
> I find the logic hard to follow here. What makes sure the
> die->u.p1.die_hash is always updated? Isn't it updated twice
> (redundantly) in the above case? And needless brackets.

Yeah, it's hard because the 'break;' means the next call to:
die->u.p1.die_hash = hash_digest ();
is not executed. And die->u.p1.die_hash is used in the current version
as initial hash value (and it's modified in each iterative_hash_object call).

> 
>>  		  if (cu->cu_comp_dir
>>  		      && (cu_file->dir ? cu_file->dir[0]
>>  				       : cu_file->file[0]) != '/')
>> @@ -5541,7 +5559,7 @@ strp_eq2 (const void *p, const void *q)
>>  static hashval_t
>>  strp_hash3 (const void *p)
>>  {
>> -  return iterative_hash (p, strlen (p), 0);
>> +  return hash (p, strlen (p));
>>  }
>>  /* Corresponding equality function in strp_htab.  */
> 
> OK.
> 
>> @@ -5571,7 +5589,7 @@ note_strp_offset (unsigned int off)
>>        p = debug_sections[DEBUG_STR].data + off;
>>        len = strlen ((char *) p);
>> -      hash = iterative_hash (p, len, 0);
>> +      hash = hash (p, len);
>>        if (alt_strp_htab)
>>  	{
>>  	  if (htab_find_with_hash (alt_strp_htab, p, hash))
> 
> OK.
> 
>> @@ -5653,7 +5671,7 @@ lookup_strp_offset (unsigned int off)
>>        p = debug_sections[DEBUG_STR].data + off;
>>        len = strlen ((char *) p);
>> -      hash = iterative_hash (p, len, 0);
>> +      hash = hash (p, len);
>>        if (alt_strp_htab)
>>  	{
>>  	  unsigned char *q = (unsigned char *)
> 
> OK.
> 
>> @@ -5694,7 +5712,7 @@ note_strp_offset2 (unsigned int off)
>>  	{
>>  	  p = debug_sections[DEBUG_STR].data + off;
>>  	  len = strlen ((char *) p);
>> -	  hash = iterative_hash (p, len, 0);
>> +	  hash = hash (p, len);
>>  	  if (htab_find_with_hash (alt_strp_htab, p, hash))
>>  	    return dwarf_5 ? DW_FORM_strp_sup : DW_FORM_GNU_strp_alt;
>>  	}
> 
> OK.
> 
>> @@ -5704,7 +5722,7 @@ note_strp_offset2 (unsigned int off)
>>      return DW_FORM_strp;
>>    p = debug_sections[DEBUG_STR].data + off;
>>    q = (unsigned char *) strchr ((char *) p, '\0');
>> -  hash = iterative_hash (p, q - p, 0);
>> +  hash = hash (p, q - p);
>>    se.off = off;
>>    se.new_off = hash & ~1U;
>>    struct strp_entry *s = (struct strp_entry *)
> 
> OK.
> 
>> @@ -5860,7 +5878,7 @@ finalize_strp (bool build_tail_offset_list)
>>  	memcpy (p, debug_sections[DEBUG_STR].data + arr[i]->off, len);
>>  	slot = htab_find_slot_with_hash (strp_htab, p,
>> -					 iterative_hash (p, len - 1, 0),
>> +					 hash (p, len - 1),
>>  					 INSERT);
>>  	if (slot == NULL)
>>  	  dwz_oom ();
> 
> OK.
> 
>> @@ -5876,7 +5894,7 @@ finalize_strp (bool build_tail_offset_list)
>>  	      if (tail_offset_list != NULL)
>>  		tail_offset_list[k++] = arr[j]->new_off;
>>  	      slot = htab_find_slot_with_hash (strp_htab, q,
>> -					       iterative_hash (q, l - 1, 0),
>> +					       hash (q, l - 1),
>>  					       INSERT);
>>  	      if (slot == NULL)
>>  		dwz_oom ();
> 
> OK.
> 
>> @@ -9849,17 +9867,16 @@ line_htab_lookup (dw_cu_ref cu, unsigned int id)
>>  {
>>    void **slot;
>>    struct line_entry le;
>> -  hashval_t h;
>> -
>>    if (id == 0)
>>      return 0;
>>    assert (id <= cu->cu_nfiles);
>>    le.file = &cu->cu_files[id - 1];
>> -  h = iterative_hash_object (le.file->time, 0);
>> -  h = iterative_hash_object (le.file->size, h);
>> -  h = iterative_hash (le.file->file, strlen (le.file->file) + 1, h);
>> +  hash_init_state ();
>> +  hash_update_state (le.file->time);
>> +  hash_update_state (le.file->size);
>> +  hash_update_state_object (le.file->file, strlen (le.file->file) + 1);
>>    if (le.file->dir)
>> -    h = iterative_hash (le.file->dir, strlen (le.file->dir) + 1, h);
>> +    hash_update_state_object (le.file->dir, strlen (le.file->dir) + 1);
>>    if (line_htab == NULL)
>>      {
>>        line_htab = htab_try_create (50, line_hash, line_eq, NULL);
>> @@ -9867,15 +9884,15 @@ line_htab_lookup (dw_cu_ref cu, unsigned int id)
>>  	dwz_oom ();
>>        max_line_id = 1;
>>      }
>> -  le.hash = h;
>> -  slot = htab_find_slot_with_hash (line_htab, &le, h, INSERT);
>> +  le.hash = hash_digest ();;
>> +  slot = htab_find_slot_with_hash (line_htab, &le, le.hash, INSERT);
> 
> OK, but double ;;

Fixed.

> 
>>    if (slot == NULL)
>>      dwz_oom ();
>>    if (*slot == NULL)
>>      {
>>        struct line_entry *l = pool_alloc (line_entry, sizeof (*l));
>>        l->file = le.file;
>> -      l->hash = h;
>> +      l->hash = le.hash;
>>        l->new_id = max_line_id++;
>>        *slot = (void *) l;
>>        return l->new_id;
> 
> OK.
> 
>> @@ -10347,7 +10364,7 @@ handle_macro (void)
>>  		  /* This should only happen if there were multiple
>>  		     same transparent units within a single object file.  */
>>  		  && htab_find_with_hash (strp_htab, p,
>> -					  iterative_hash (p, len, 0)) == NULL)
>> +					  hash (p, len)) == NULL)
>>  		can_share = false;
>>  	      s = ptr;
>>  	      break;
> 
> OK.
> 
>> @@ -15756,7 +15773,7 @@ optimize_multifile (unsigned int *die_count)
>>  	  hashval_t hash;
>>  	  q = (unsigned char *) strchr ((char *) p, '\0');
>> -	  hash = iterative_hash (p, q - p, 0);
>> +	  hash = hash (p, q - p);
>>  	  se.off = p - debug_sections[DEBUG_STR].data;
>>  	  se.new_off = hash & ~1U;
>>  	  slot = htab_find_slot_with_hash (strp_htab, &se, se.new_off, INSERT);
> 
> OK.
> 
>> @@ -16076,8 +16093,7 @@ read_multifile (int fd, unsigned int die_count)
>>  	    {
>>  	      q = (unsigned char *) strchr ((char *) p, '\0') + 1;
>>  	      slot = htab_find_slot_with_hash (strp_htab, p,
>> -					       iterative_hash (p, q - p - 1,
>> -							       0), INSERT);
>> +					       hash (p, q - p - 1), INSERT);
>>  	      if (slot == NULL)
>>  		dwz_oom ();
>>  	      assert (*slot == NULL);
> 
> OK.
> 
>> @@ -16090,8 +16106,7 @@ read_multifile (int fd, unsigned int die_count)
>>  		  p = debug_sections[DEBUG_STR].data + *pi;
>>  		  q = (unsigned char *) strchr ((char *) p, '\0');
>>  		  slot = htab_find_slot_with_hash (strp_htab, p,
>> -						   iterative_hash (p, q - p,
>> -								   0), INSERT);
>> +						   hash (p, q - p), INSERT);
>>  		  if (slot == NULL)
>>  		    dwz_oom ();
>>  		  assert (*slot == NULL);
> 
> OK.
> 
>> diff --git a/hashtab.c b/hashtab.c
>> index 41eab30..a9b9d13 100644
>> --- a/hashtab.c
>> +++ b/hashtab.c
>> @@ -626,142 +626,3 @@ htab_restore (htab, name, restorefn)
>>    fclose (f);
>>  }
>>  #endif
>> -
>> -/* DERIVED FROM:
>> ---------------------------------------------------------------------
>> -lookup2.c, by Bob Jenkins, December 1996, Public Domain.
>> -hash(), hash2(), hash3, and mix() are externally useful functions.
>> -Routines to test the hash are included if SELF_TEST is defined.
>> -You can use this free for any purpose.  It has no warranty.
>> ---------------------------------------------------------------------
>> -*/
>> -
>> -/*
>> ---------------------------------------------------------------------
>> -mix -- mix 3 32-bit values reversibly.
>> -For every delta with one or two bit set, and the deltas of all three
>> -  high bits or all three low bits, whether the original value of a,b,c
>> -  is almost all zero or is uniformly distributed,
>> -* If mix() is run forward or backward, at least 32 bits in a,b,c
>> -  have at least 1/4 probability of changing.
>> -* If mix() is run forward, every bit of c will change between 1/3 and
>> -  2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
>> -mix() was built out of 36 single-cycle latency instructions in a
>> -  structure that could supported 2x parallelism, like so:
>> -      a -= b;
>> -      a -= c; x = (c>>13);
>> -      b -= c; a ^= x;
>> -      b -= a; x = (a<<8);
>> -      c -= a; b ^= x;
>> -      c -= b; x = (b>>13);
>> -      ...
>> -  Unfortunately, superscalar Pentiums and Sparcs can't take advantage
>> -  of that parallelism.  They've also turned some of those single-cycle
>> -  latency instructions into multi-cycle latency instructions.  Still,
>> -  this is the fastest good hash I could find.  There were about 2^^68
>> -  to choose from.  I only looked at a billion or so.
>> ---------------------------------------------------------------------
>> -*/
>> -/* same, but slower, works on systems that might have 8 byte hashval_t's */
>> -#define mix(a,b,c) \
>> -{ \
>> -  a -= b; a -= c; a ^= (c>>13); \
>> -  b -= c; b -= a; b ^= (a<< 8); \
>> -  c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
>> -  a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
>> -  b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
>> -  c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
>> -  a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
>> -  b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
>> -  c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
>> -}
>> -
>> -/*
>> ---------------------------------------------------------------------
>> -hash() -- hash a variable-length key into a 32-bit value
>> -  k     : the key (the unaligned variable-length array of bytes)
>> -  len   : the length of the key, counting by bytes
>> -  level : can be any 4-byte value
>> -Returns a 32-bit value.  Every bit of the key affects every bit of
>> -the return value.  Every 1-bit and 2-bit delta achieves avalanche.
>> -About 36+6len instructions.
>> -
>> -The best hash table sizes are powers of 2.  There is no need to do
>> -mod a prime (mod is sooo slow!).  If you need less than 32 bits,
>> -use a bitmask.  For example, if you need only 10 bits, do
>> -  h = (h & hashmask(10));
>> -In which case, the hash table should have hashsize(10) elements.
>> -
>> -If you are hashing n strings (ub1 **)k, do it like this:
>> -  for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
>> -
>> -By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
>> -code any way you wish, private, educational, or commercial.  It's free.
>> -
>> -See http://burtleburtle.net/bob/hash/evahash.html
>> -Use for hash table lookup, or anything where one collision in 2^32 is
>> -acceptable.  Do NOT use for cryptographic purposes.
>> ---------------------------------------------------------------------
>> -*/
>> -
>> -hashval_t
>> -iterative_hash (const void *k_in /* the key */,
>> -                register size_t  length /* the length of the key */,
>> -                register hashval_t initval /* the previous hash, or
>> -                                              an arbitrary value */)
>> -{
>> -  register const unsigned char *k = (const unsigned char *)k_in;
>> -  register hashval_t a,b,c,len;
>> -
>> -  /* Set up the internal state */
>> -  len = length;
>> -  a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
>> -  c = initval;           /* the previous hash value */
>> -
>> -  /*---------------------------------------- handle most of the key */
>> -#ifndef WORDS_BIGENDIAN
>> -  /* On a little-endian machine, if the data is 4-byte aligned we can hash
>> -     by word for better speed.  This gives nondeterministic results on
>> -     big-endian machines.  */
>> -  if (sizeof (hashval_t) == 4 && (((size_t)k)&3) == 0)
>> -    while (len >= 12)    /* aligned */
>> -      {
>> -	a += *(hashval_t *)(k+0);
>> -	b += *(hashval_t *)(k+4);
>> -	c += *(hashval_t *)(k+8);
>> -	mix(a,b,c);
>> -	k += 12; len -= 12;
>> -      }
>> -  else /* unaligned */
>> -#endif
>> -    while (len >= 12)
>> -      {
>> -	a += (k[0] +((hashval_t)k[1]<<8) +((hashval_t)k[2]<<16) +((hashval_t)k[3]<<24));
>> -	b += (k[4] +((hashval_t)k[5]<<8) +((hashval_t)k[6]<<16) +((hashval_t)k[7]<<24));
>> -	c += (k[8] +((hashval_t)k[9]<<8) +((hashval_t)k[10]<<16)+((hashval_t)k[11]<<24));
>> -	mix(a,b,c);
>> -	k += 12; len -= 12;
>> -      }
>> -
>> -  /*------------------------------------- handle the last 11 bytes */
>> -  c += length;
>> -  switch(len)              /* all the case statements fall through */
>> -    {
>> -    case 11: c+=((hashval_t)k[10]<<24);	/* fall through */
>> -    case 10: c+=((hashval_t)k[9]<<16);	/* fall through */
>> -    case 9 : c+=((hashval_t)k[8]<<8);	/* fall through */
>> -      /* the first byte of c is reserved for the length */
>> -    case 8 : b+=((hashval_t)k[7]<<24);	/* fall through */
>> -    case 7 : b+=((hashval_t)k[6]<<16);	/* fall through */
>> -    case 6 : b+=((hashval_t)k[5]<<8);	/* fall through */
>> -    case 5 : b+=k[4];			/* fall through */
>> -    case 4 : a+=((hashval_t)k[3]<<24);	/* fall through */
>> -    case 3 : a+=((hashval_t)k[2]<<16);	/* fall through */
>> -    case 2 : a+=((hashval_t)k[1]<<8);	/* fall through */
>> -    case 1 : a+=k[0];
>> -      /* case 0: nothing left to add */
>> -    }
>> -  mix(a,b,c);
>> -  /*-------------------------------------------- report the result */
>> -  return c;
>> -}
>> diff --git a/hashtab.h b/hashtab.h
>> index cb3da01..509e627 100644
>> --- a/hashtab.h
>> +++ b/hashtab.h
>> @@ -153,9 +153,11 @@ extern void	htab_restore (htab_t, const char *, htab_restorefn);
>>  #endif
>> -/* An iterative hash function for arbitrary data.  */
>> -extern hashval_t iterative_hash (const void *, size_t, hashval_t);
>> +#include <xxhash.h>
>> +
>>  /* Shorthand for hashing something with an intrinsic size.  */
>> +#define hash(IN,LEN) XXH64(IN, LEN, 0)
>> +#define iterative_hash(IN,LEN,INIT) XXH64(IN, LEN, INIT)
>>  #define iterative_hash_object(OB,INIT) iterative_hash (&OB, sizeof (OB), INIT)
>>  #ifdef __cplusplus
> 
> See above for the question why everything hash related isn't defined
> in the same place.

Done.

> 
> How much of hashtab.{c,h} are we still using?

wc -l hashtab.[ch]
  628 hashtab.c
  160 hashtab.h
  788 total

with my patch.

Please take a look at the updated patch.

Martin

> 
> Thanks,
> 
> Mark

[-- Attachment #2: 0001-Use-xxHash-hashing-algorithm.patch --]
[-- Type: text/x-patch, Size: 16859 bytes --]

From 51dabb571513bbf887c86e9ebf1b4e43dee6ebc3 Mon Sep 17 00:00:00 2001
From: Martin Liska <mliska@suse.cz>
Date: Wed, 22 Dec 2021 14:45:40 +0100
Subject: [PATCH] Use xxHash hashing algorithm.

The algorithm provides the following speed-up with -O2:

- 1/5: sysdig (60M)
dwz_O2                : 9.7
dwz_O2_xxhash         : 8.5 (87.7%)
- 2/5: rtags (58M)
dwz_O2                : 17.6
dwz_O2_xxhash         : 15.8 (89.5%)
- 3/5: libetonyek (91M)
dwz_O2                : 10.8
dwz_O2_xxhash         : 9.4 (86.7%)
- 4/5: krita (685M)
dwz_O2                : 96.0
dwz_O2_xxhash         : 85.6 (89.1%)
- 5/5: gcc (1.2G)
dwz_O2                : 58.6
dwz_O2_xxhash         : 54.1 (92.4%)
---
 Makefile  |   2 +-
 dwz.c     | 110 +++++++++++++++++++++++++-----------------
 hashtab.c | 139 ------------------------------------------------------
 hashtab.h |   5 --
 4 files changed, 67 insertions(+), 189 deletions(-)

diff --git a/Makefile b/Makefile
index 9394ef4..e9193f9 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ CFLAGS = -O2 -g
 DWZ_VERSION := $(shell cat $(srcdir)/VERSION)
 CFLAGS_VERSION = -DDWZ_VERSION='"$(DWZ_VERSION)"'
 CFLAGS_COPYRIGHT = $(shell cat $(srcdir)/COPYRIGHT_YEARS)
-CFLAGS_COMMON = -Wall -W -D_FILE_OFFSET_BITS=64
+CFLAGS_COMMON = -Wall -W -D_FILE_OFFSET_BITS=64 -DXXH_INLINE_ALL=1
 override CFLAGS += $(CFLAGS_COMMON) $(CFLAGS_VERSION) $(CFLAGS_COPYRIGHT)
 
 prefix = /usr
diff --git a/dwz.c b/dwz.c
index a3b289f..8973a42 100644
--- a/dwz.c
+++ b/dwz.c
@@ -39,6 +39,8 @@
 #include <obstack.h>
 
 #include <gelf.h>
+#include <xxhash.h>
+
 #include "dwarf2.h"
 #include "hashtab.h"
 #include "sha1.h"
@@ -112,6 +114,26 @@
 # define NT_GNU_BUILD_ID 3
 #endif
 
+/* xxHash state object.  */
+static XXH64_state_t state;
+
+/* Clear xxHash state to zero.  */
+#define hash_init_state() XXH64_reset(&state, 0)
+
+/* Update hash STATE with VALUE.  */
+#define hash_update_state_object(value) XXH64_update(&state, &value, sizeof value)
+
+/* Update hash STATE with OBJECT that has a provided SIZE.  */
+#define hash_update_state(object, size) XXH64_update(&state, object, size)
+
+/* Get digest once we are done with a state.  */
+#define hash_digest() XXH64_digest(&state)
+
+/* Shorthand for hashing something with an intrinsic size.  */
+#define hash(IN,LEN) XXH64(IN, LEN, 0)
+#define iterative_hash(IN,LEN,INIT) XXH64(IN, LEN, INIT)
+#define iterative_hash_object(OB,INIT) iterative_hash (&OB, sizeof (OB), INIT)
+
 /* Print memory amount M (in kb) in both exact and human readable, like so:
    1382508 (1.3G).  */
 static void
@@ -1187,16 +1209,18 @@ compute_abbrev_hash (struct abbrev_tag *t)
 {
   unsigned int i;
 
-  t->hash = iterative_hash_object (t->tag, 0);
-  t->hash = iterative_hash_object (t->nattr, t->hash);
-  t->hash = iterative_hash_object (t->children, t->hash);
+  hash_init_state ();
+  hash_update_state_object (t->tag);
+  hash_update_state_object (t->nattr);
+  hash_update_state_object (t->children);
   for (i = 0; i < t->nattr; i++)
     {
-      t->hash = iterative_hash_object (t->attr[i].attr, t->hash);
-      t->hash = iterative_hash_object (t->attr[i].form, t->hash);
+      hash_update_state_object (t->attr[i].attr);
+      hash_update_state_object (t->attr[i].form);
       if (t->attr[i].form == DW_FORM_implicit_const)
-	t->hash = iterative_hash_object (t->values[i], t->hash);
+	hash_update_state_object (t->values[i]);
     }
+  t->hash = hash_digest ();
 }
 
 /* Maximum number of attributes in a DIE.  */
@@ -3402,22 +3426,17 @@ checksum_die (DSO *dso, dw_cu_ref cu, dw_die_ref top_die, dw_die_ref die)
 		  struct dw_file *cu_file = &cu->cu_files[value - 1];
 		  size_t file_len = strlen (cu_file->file);
 		  s = t->attr[i].attr;
-		  die->u.p1.die_hash
-		    = iterative_hash_object (s, die->u.p1.die_hash);
-		  die->u.p1.die_hash
-		    = iterative_hash_object (cu_file->time,
-					     die->u.p1.die_hash);
-		  die->u.p1.die_hash
-		    = iterative_hash_object (cu_file->size,
-					     die->u.p1.die_hash);
-		  die->u.p1.die_hash
-		    = iterative_hash (cu_file->file, file_len + 1,
-				      die->u.p1.die_hash);
+		  hash_init_state ();
+		  hash_update_state_object (die->u.p1.die_hash);
+		  hash_update_state_object (s);
+		  hash_update_state_object (cu_file->time);
+		  hash_update_state_object (cu_file->size);
+		  hash_update_state (cu_file->file, file_len + 1);
 		  if (cu_file->dir)
-		    die->u.p1.die_hash
-		      = iterative_hash (cu_file->dir,
-					strlen (cu_file->dir) + 1,
-					die->u.p1.die_hash);
+		    {
+		      hash_update_state (cu_file->dir,
+						strlen (cu_file->dir) + 1);
+		    }
 		  /* Ignore DW_AT_comp_dir for DW_AT_*_file <built-in>
 		     etc. if immediately followed by DW_AT_*_line 0.  */
 		  else if (cu_file->file_angle_brackets_encapsulated_no_slash
@@ -3427,7 +3446,13 @@ checksum_die (DSO *dso, dw_cu_ref cu, dw_die_ref top_die, dw_die_ref die)
 				  ? DW_AT_decl_line : DW_AT_call_line)
 			   && t->attr[i + 1].form == DW_FORM_data1
 			   && *new_ptr == 0)
-		    break;
+		    {
+		      die->u.p1.die_hash = hash_digest ();
+		      break;
+		    }
+
+		  die->u.p1.die_hash = hash_digest ();
+
 		  if (cu->cu_comp_dir
 		      && (cu_file->dir ? cu_file->dir[0]
 				       : cu_file->file[0]) != '/')
@@ -5541,7 +5566,7 @@ strp_eq2 (const void *p, const void *q)
 static hashval_t
 strp_hash3 (const void *p)
 {
-  return iterative_hash (p, strlen (p), 0);
+  return hash (p, strlen (p));
 }
 
 /* Corresponding equality function in strp_htab.  */
@@ -5571,7 +5596,7 @@ note_strp_offset (unsigned int off)
 
       p = debug_sections[DEBUG_STR].data + off;
       len = strlen ((char *) p);
-      hash = iterative_hash (p, len, 0);
+      hash = hash (p, len);
       if (alt_strp_htab)
 	{
 	  if (htab_find_with_hash (alt_strp_htab, p, hash))
@@ -5653,7 +5678,7 @@ lookup_strp_offset (unsigned int off)
 
       p = debug_sections[DEBUG_STR].data + off;
       len = strlen ((char *) p);
-      hash = iterative_hash (p, len, 0);
+      hash = hash (p, len);
       if (alt_strp_htab)
 	{
 	  unsigned char *q = (unsigned char *)
@@ -5694,7 +5719,7 @@ note_strp_offset2 (unsigned int off)
 	{
 	  p = debug_sections[DEBUG_STR].data + off;
 	  len = strlen ((char *) p);
-	  hash = iterative_hash (p, len, 0);
+	  hash = hash (p, len);
 	  if (htab_find_with_hash (alt_strp_htab, p, hash))
 	    return dwarf_5 ? DW_FORM_strp_sup : DW_FORM_GNU_strp_alt;
 	}
@@ -5704,7 +5729,7 @@ note_strp_offset2 (unsigned int off)
     return DW_FORM_strp;
   p = debug_sections[DEBUG_STR].data + off;
   q = (unsigned char *) strchr ((char *) p, '\0');
-  hash = iterative_hash (p, q - p, 0);
+  hash = hash (p, q - p);
   se.off = off;
   se.new_off = hash & ~1U;
   struct strp_entry *s = (struct strp_entry *)
@@ -5860,7 +5885,7 @@ finalize_strp (bool build_tail_offset_list)
 
 	memcpy (p, debug_sections[DEBUG_STR].data + arr[i]->off, len);
 	slot = htab_find_slot_with_hash (strp_htab, p,
-					 iterative_hash (p, len - 1, 0),
+					 hash (p, len - 1),
 					 INSERT);
 	if (slot == NULL)
 	  dwz_oom ();
@@ -5876,7 +5901,7 @@ finalize_strp (bool build_tail_offset_list)
 	      if (tail_offset_list != NULL)
 		tail_offset_list[k++] = arr[j]->new_off;
 	      slot = htab_find_slot_with_hash (strp_htab, q,
-					       iterative_hash (q, l - 1, 0),
+					       hash (q, l - 1),
 					       INSERT);
 	      if (slot == NULL)
 		dwz_oom ();
@@ -9849,17 +9874,16 @@ line_htab_lookup (dw_cu_ref cu, unsigned int id)
 {
   void **slot;
   struct line_entry le;
-  hashval_t h;
-
   if (id == 0)
     return 0;
   assert (id <= cu->cu_nfiles);
   le.file = &cu->cu_files[id - 1];
-  h = iterative_hash_object (le.file->time, 0);
-  h = iterative_hash_object (le.file->size, h);
-  h = iterative_hash (le.file->file, strlen (le.file->file) + 1, h);
+  hash_init_state ();
+  hash_update_state_object (le.file->time);
+  hash_update_state_object (le.file->size);
+  hash_update_state (le.file->file, strlen (le.file->file) + 1);
   if (le.file->dir)
-    h = iterative_hash (le.file->dir, strlen (le.file->dir) + 1, h);
+    hash_update_state (le.file->dir, strlen (le.file->dir) + 1);
   if (line_htab == NULL)
     {
       line_htab = htab_try_create (50, line_hash, line_eq, NULL);
@@ -9867,15 +9891,15 @@ line_htab_lookup (dw_cu_ref cu, unsigned int id)
 	dwz_oom ();
       max_line_id = 1;
     }
-  le.hash = h;
-  slot = htab_find_slot_with_hash (line_htab, &le, h, INSERT);
+  le.hash = hash_digest ();
+  slot = htab_find_slot_with_hash (line_htab, &le, le.hash, INSERT);
   if (slot == NULL)
     dwz_oom ();
   if (*slot == NULL)
     {
       struct line_entry *l = pool_alloc (line_entry, sizeof (*l));
       l->file = le.file;
-      l->hash = h;
+      l->hash = le.hash;
       l->new_id = max_line_id++;
       *slot = (void *) l;
       return l->new_id;
@@ -10347,7 +10371,7 @@ handle_macro (void)
 		  /* This should only happen if there were multiple
 		     same transparent units within a single object file.  */
 		  && htab_find_with_hash (strp_htab, p,
-					  iterative_hash (p, len, 0)) == NULL)
+					  hash (p, len)) == NULL)
 		can_share = false;
 	      s = ptr;
 	      break;
@@ -15756,7 +15780,7 @@ optimize_multifile (unsigned int *die_count)
 	  hashval_t hash;
 
 	  q = (unsigned char *) strchr ((char *) p, '\0');
-	  hash = iterative_hash (p, q - p, 0);
+	  hash = hash (p, q - p);
 	  se.off = p - debug_sections[DEBUG_STR].data;
 	  se.new_off = hash & ~1U;
 	  slot = htab_find_slot_with_hash (strp_htab, &se, se.new_off, INSERT);
@@ -16076,8 +16100,7 @@ read_multifile (int fd, unsigned int die_count)
 	    {
 	      q = (unsigned char *) strchr ((char *) p, '\0') + 1;
 	      slot = htab_find_slot_with_hash (strp_htab, p,
-					       iterative_hash (p, q - p - 1,
-							       0), INSERT);
+					       hash (p, q - p - 1), INSERT);
 	      if (slot == NULL)
 		dwz_oom ();
 	      assert (*slot == NULL);
@@ -16090,8 +16113,7 @@ read_multifile (int fd, unsigned int die_count)
 		  p = debug_sections[DEBUG_STR].data + *pi;
 		  q = (unsigned char *) strchr ((char *) p, '\0');
 		  slot = htab_find_slot_with_hash (strp_htab, p,
-						   iterative_hash (p, q - p,
-								   0), INSERT);
+						   hash (p, q - p), INSERT);
 		  if (slot == NULL)
 		    dwz_oom ();
 		  assert (*slot == NULL);
diff --git a/hashtab.c b/hashtab.c
index 41eab30..a9b9d13 100644
--- a/hashtab.c
+++ b/hashtab.c
@@ -626,142 +626,3 @@ htab_restore (htab, name, restorefn)
   fclose (f);
 }
 #endif
-
-/* DERIVED FROM:
---------------------------------------------------------------------
-lookup2.c, by Bob Jenkins, December 1996, Public Domain.
-hash(), hash2(), hash3, and mix() are externally useful functions.
-Routines to test the hash are included if SELF_TEST is defined.
-You can use this free for any purpose.  It has no warranty.
---------------------------------------------------------------------
-*/
-
-/*
---------------------------------------------------------------------
-mix -- mix 3 32-bit values reversibly.
-For every delta with one or two bit set, and the deltas of all three
-  high bits or all three low bits, whether the original value of a,b,c
-  is almost all zero or is uniformly distributed,
-* If mix() is run forward or backward, at least 32 bits in a,b,c
-  have at least 1/4 probability of changing.
-* If mix() is run forward, every bit of c will change between 1/3 and
-  2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
-mix() was built out of 36 single-cycle latency instructions in a 
-  structure that could supported 2x parallelism, like so:
-      a -= b; 
-      a -= c; x = (c>>13);
-      b -= c; a ^= x;
-      b -= a; x = (a<<8);
-      c -= a; b ^= x;
-      c -= b; x = (b>>13);
-      ...
-  Unfortunately, superscalar Pentiums and Sparcs can't take advantage 
-  of that parallelism.  They've also turned some of those single-cycle
-  latency instructions into multi-cycle latency instructions.  Still,
-  this is the fastest good hash I could find.  There were about 2^^68
-  to choose from.  I only looked at a billion or so.
---------------------------------------------------------------------
-*/
-/* same, but slower, works on systems that might have 8 byte hashval_t's */
-#define mix(a,b,c) \
-{ \
-  a -= b; a -= c; a ^= (c>>13); \
-  b -= c; b -= a; b ^= (a<< 8); \
-  c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
-  a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
-  b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
-  c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
-  a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
-  b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
-  c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
-}
-
-/*
---------------------------------------------------------------------
-hash() -- hash a variable-length key into a 32-bit value
-  k     : the key (the unaligned variable-length array of bytes)
-  len   : the length of the key, counting by bytes
-  level : can be any 4-byte value
-Returns a 32-bit value.  Every bit of the key affects every bit of
-the return value.  Every 1-bit and 2-bit delta achieves avalanche.
-About 36+6len instructions.
-
-The best hash table sizes are powers of 2.  There is no need to do
-mod a prime (mod is sooo slow!).  If you need less than 32 bits,
-use a bitmask.  For example, if you need only 10 bits, do
-  h = (h & hashmask(10));
-In which case, the hash table should have hashsize(10) elements.
-
-If you are hashing n strings (ub1 **)k, do it like this:
-  for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
-
-By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
-code any way you wish, private, educational, or commercial.  It's free.
-
-See http://burtleburtle.net/bob/hash/evahash.html
-Use for hash table lookup, or anything where one collision in 2^32 is
-acceptable.  Do NOT use for cryptographic purposes.
---------------------------------------------------------------------
-*/
-
-hashval_t
-iterative_hash (const void *k_in /* the key */,
-                register size_t  length /* the length of the key */,
-                register hashval_t initval /* the previous hash, or
-                                              an arbitrary value */)
-{
-  register const unsigned char *k = (const unsigned char *)k_in;
-  register hashval_t a,b,c,len;
-
-  /* Set up the internal state */
-  len = length;
-  a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
-  c = initval;           /* the previous hash value */
-
-  /*---------------------------------------- handle most of the key */
-#ifndef WORDS_BIGENDIAN
-  /* On a little-endian machine, if the data is 4-byte aligned we can hash
-     by word for better speed.  This gives nondeterministic results on
-     big-endian machines.  */
-  if (sizeof (hashval_t) == 4 && (((size_t)k)&3) == 0)
-    while (len >= 12)    /* aligned */
-      {
-	a += *(hashval_t *)(k+0);
-	b += *(hashval_t *)(k+4);
-	c += *(hashval_t *)(k+8);
-	mix(a,b,c);
-	k += 12; len -= 12;
-      }
-  else /* unaligned */
-#endif
-    while (len >= 12)
-      {
-	a += (k[0] +((hashval_t)k[1]<<8) +((hashval_t)k[2]<<16) +((hashval_t)k[3]<<24));
-	b += (k[4] +((hashval_t)k[5]<<8) +((hashval_t)k[6]<<16) +((hashval_t)k[7]<<24));
-	c += (k[8] +((hashval_t)k[9]<<8) +((hashval_t)k[10]<<16)+((hashval_t)k[11]<<24));
-	mix(a,b,c);
-	k += 12; len -= 12;
-      }
-
-  /*------------------------------------- handle the last 11 bytes */
-  c += length;
-  switch(len)              /* all the case statements fall through */
-    {
-    case 11: c+=((hashval_t)k[10]<<24);	/* fall through */
-    case 10: c+=((hashval_t)k[9]<<16);	/* fall through */
-    case 9 : c+=((hashval_t)k[8]<<8);	/* fall through */
-      /* the first byte of c is reserved for the length */
-    case 8 : b+=((hashval_t)k[7]<<24);	/* fall through */
-    case 7 : b+=((hashval_t)k[6]<<16);	/* fall through */
-    case 6 : b+=((hashval_t)k[5]<<8);	/* fall through */
-    case 5 : b+=k[4];			/* fall through */
-    case 4 : a+=((hashval_t)k[3]<<24);	/* fall through */
-    case 3 : a+=((hashval_t)k[2]<<16);	/* fall through */
-    case 2 : a+=((hashval_t)k[1]<<8);	/* fall through */
-    case 1 : a+=k[0];
-      /* case 0: nothing left to add */
-    }
-  mix(a,b,c);
-  /*-------------------------------------------- report the result */
-  return c;
-}
diff --git a/hashtab.h b/hashtab.h
index cb3da01..5920abc 100644
--- a/hashtab.h
+++ b/hashtab.h
@@ -153,11 +153,6 @@ extern void	htab_restore (htab_t, const char *, htab_restorefn);
 
 #endif
 
-/* An iterative hash function for arbitrary data.  */
-extern hashval_t iterative_hash (const void *, size_t, hashval_t);
-/* Shorthand for hashing something with an intrinsic size.  */
-#define iterative_hash_object(OB,INIT) iterative_hash (&OB, sizeof (OB), INIT)
-
 #ifdef __cplusplus
 }
 #endif /* __cplusplus */
-- 
2.36.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Use xxHash hashing algorithm.
  2022-06-27 13:41   ` Martin Liška
@ 2022-06-30 16:13     ` Mark Wielaard
  2022-07-04 13:31       ` Martin Liška
  0 siblings, 1 reply; 13+ messages in thread
From: Mark Wielaard @ 2022-06-30 16:13 UTC (permalink / raw)
  To: Martin Liška; +Cc: dwz

Hi Martin,

On Mon, 2022-06-27 at 15:41 +0200, Martin Liška wrote:
> On 6/25/22 21:44, Mark Wielaard wrote:
> > Could you resent this patch with git send-email or rebase it
> > against
> > current master. As is it doesn't apply giving errors when trying to
> > use it with git am.
> 
> Sure, rebased and attached to this email.

Thanks. That applies cleanly. And (after installing xxhash-devel)
compiles and make checks nicely.

> > We probably also need a configure change to detect whether xxhash
> > is actually available.
> 
> With my patch, one will see a compilation error. Can you please help
> me with the configure detection?

Ha. I just looked at our configure file and it, uh, is almost a noop.
It doesn't no any checking, not even for having libelf. Lets make
updating configure a separate task.

> > > diff --git a/Makefile b/Makefile
> > > index 9394ef4..89546c2 100644
> > > --- a/Makefile
> > > +++ b/Makefile
> > > @@ -8,7 +8,7 @@ CFLAGS = -O2 -g
> > >  DWZ_VERSION := $(shell cat $(srcdir)/VERSION)
> > >  CFLAGS_VERSION = -DDWZ_VERSION='"$(DWZ_VERSION)"'
> > >  CFLAGS_COPYRIGHT = $(shell cat $(srcdir)/COPYRIGHT_YEARS)
> > > -CFLAGS_COMMON = -Wall -W -D_FILE_OFFSET_BITS=64
> > > +CFLAGS_COMMON = -Wall -W -D_FILE_OFFSET_BITS=64
> > > -DXXH_INLINE_ALL=1
> > >  override CFLAGS += $(CFLAGS_COMMON) $(CFLAGS_VERSION)
> > > $(CFLAGS_COPYRIGHT)
> > >  prefix = /usr
> > > @@ -17,7 +17,7 @@ bindir = $(exec_prefix)/bin
> > >  datarootdir = $(prefix)/share
> > >  mandir = $(datarootdir)/man
> > >  OBJECTS = args.o dwz.o hashtab.o pool.o sha1.o dwarfnames.o
> > > -LIBS=-lelf
> > > +LIBS=-lelf -lxxhash
> > >  dwz: $(OBJECTS)
> > >  	$(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
> > >  args.o: native.o
> > 
> > So we use XXH_INLINE_ALL. Do we then still need to link with
> > -lxxhash?
> 
> Correct, it's not needed.

Good.

> > Allocating and reusing the global state is convenient, but might
> > make
> > parellizing dwz somewhat more complicated. Was this design used
> > deliberately, or can we easily switch to stack allocating the
> > state?
> 
> Yes, but the alternative approach would be making it a stack
> variable in each function where we use it. Note right now, processes
> are used for parallel execution.

OK, so nothing stands in the way of doing that if we do want to do in-
process parallelism.

> > Also I find the naming slightly confusing.  We used
> > iterative_hash_object to update a hash with an integral value, and
> > iterative_hash to update a hash with a size plus pointer. Now it
> > seems
> > we swap the meaning. iterative_hash gets replace by
> > hash_update_state_object. And iterative_hash_object gets replaced
> > by
> > hash_update_state. Was the meaning of "object" deliberately
> > swapped?
> 
> No, fixed that.

Thanks, that makes it a lot simpler imho.

> > Also it seems iterative_hash_object with inital zero value is
> > replaced
> > by hash, which isn't defined here but in hashtab.h. Why the split
> > in definitions?
> 
> All moved to dwz.c right now.

And added comments for everything. Thanks.

> > > +		  hash_init_state ();
> > > +		  hash_update_state (die->u.p1.die_hash);
> > > +		  hash_update_state (s);
> > > +		  hash_update_state (cu_file->time);
> > > +		  hash_update_state (cu_file->size);
> > > +		  hash_update_state_object (cu_file->file, file_len +
> > > 1);
> > 
> > OK.
> > 
> > >  		  if (cu_file->dir)
> > > -		    die->u.p1.die_hash
> > > -		      = iterative_hash (cu_file->dir,
> > > -					strlen (cu_file->dir) + 1,
> > > -					die->u.p1.die_hash);
> > > +		    {
> > > +		      hash_update_state_object (cu_file->dir,
> > > +						strlen (cu_file->dir) +
> > > 1);
> > > +		    }
> > 
> > OK, assuming the die->u.p1.die_hash below are alwasy done/correct.
> > But brackets aren't really needed.
> > 
> > >  		  /* Ignore DW_AT_comp_dir for DW_AT_*_file <built-in>
> > >  		     etc. if immediately followed by DW_AT_*_line
> > > 0.  */
> > >  		  else if (cu_file-
> > > >file_angle_brackets_encapsulated_no_slash
> > > @@ -3427,7 +3439,13 @@ checksum_die (DSO *dso, dw_cu_ref cu,
> > > dw_die_ref top_die, dw_die_ref die)
> > >  				  ? DW_AT_decl_line : DW_AT_call_line)
> > >  			   && t->attr[i + 1].form == DW_FORM_data1
> > >  			   && *new_ptr == 0)
> > > -		    break;
> > > +		    {
> > > +		      die->u.p1.die_hash = hash_digest ();
> > > +		      break;
> > > +		    }
> > > +
> > > +		  die->u.p1.die_hash = hash_digest ();
> > 
> > I find the logic hard to follow here. What makes sure the
> > die->u.p1.die_hash is always updated? Isn't it updated twice
> > (redundantly) in the above case? And needless brackets.
> 
> Yeah, it's hard because the 'break;' means the next call to:
> die->u.p1.die_hash = hash_digest ();
> is not executed. And die->u.p1.die_hash is used in the current version
> as initial hash value (and it's modified in each iterative_hash_object call).

Yeah, I see it is slightly easier to follow when not in patch form. It
does look correct.

> > > diff --git a/hashtab.h b/hashtab.h
> > > index cb3da01..509e627 100644
> > > --- a/hashtab.h
> > > +++ b/hashtab.h
> > > @@ -153,9 +153,11 @@ extern void	htab_restore (htab_t, const
> > > char *, htab_restorefn);
> > >  #endif
> > > -/* An iterative hash function for arbitrary data.  */
> > > -extern hashval_t iterative_hash (const void *, size_t,
> > > hashval_t);
> > > +#include <xxhash.h>
> > > +
> > >  /* Shorthand for hashing something with an intrinsic size.  */
> > > +#define hash(IN,LEN) XXH64(IN, LEN, 0)
> > > +#define iterative_hash(IN,LEN,INIT) XXH64(IN, LEN, INIT)
> > >  #define iterative_hash_object(OB,INIT) iterative_hash (&OB,
> > > sizeof (OB), INIT)
> > >  #ifdef __cplusplus
> > 
> > See above for the question why everything hash related isn't
> > defined
> > in the same place.
> 
> Done.

OK. dwz.c is fine. But any reason why you picked that instead of
hashtab.h?

> > 
> > How much of hashtab.{c,h} are we still using?
> 
> wc -l hashtab.[ch]
>   628 hashtab.c
>   160 hashtab.h
>   788 total
> 
> with my patch.

Ah, right, all the actual hashtable code is still there of course. We
only replaced the hash, not the datastructure.

> Please take a look at the updated patch.

I read it again and it looks good to me.

Thanks,

Mark

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Use xxHash hashing algorithm.
  2022-06-30 16:13     ` Mark Wielaard
@ 2022-07-04 13:31       ` Martin Liška
  2022-07-04 15:40         ` Mark Wielaard
  0 siblings, 1 reply; 13+ messages in thread
From: Martin Liška @ 2022-07-04 13:31 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: dwz

> 
> OK. dwz.c is fine. But any reason why you picked that instead of
> hashtab.h?

Well, I wanted to have all macro definitions together to the definition
of a static variable:

+/* xxHash state object.  */
+static XXH64_state_t state;

> 
>>>
>>> How much of hashtab.{c,h} are we still using?
>>
>> wc -l hashtab.[ch]
>>   628 hashtab.c
>>   160 hashtab.h
>>   788 total
>>
>> with my patch.
> 
> Ah, right, all the actual hashtable code is still there of course. We
> only replaced the hash, not the datastructure.

Yep, the hash table related stuff is still needed.

> 
>> Please take a look at the updated patch.
> 
> I read it again and it looks good to me.

Fine. Can I install the patch now or should we wait for a follow up
patch that will come up with a proper configuration?

Cheers,
Martin

> 
> Thanks,
> 
> Mark


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Use xxHash hashing algorithm.
  2022-07-04 13:31       ` Martin Liška
@ 2022-07-04 15:40         ` Mark Wielaard
  2022-07-07 12:29           ` Martin Liška
  0 siblings, 1 reply; 13+ messages in thread
From: Mark Wielaard @ 2022-07-04 15:40 UTC (permalink / raw)
  To: Martin Liška; +Cc: dwz

Hi Martin,

On Mon, 2022-07-04 at 15:31 +0200, Martin Liška wrote:
> > Please take a look at the updated patch.
> > 
> > I read it again and it looks good to me.
> 
> Fine. Can I install the patch now or should we wait for a follow up
> patch that will come up with a proper configuration?

I think we can do the configure stuff later. We also should add a check
for libelf.

One last check. In theory dwz is arch independent, the result of
running dwz on a file should be the same whether it is done on a 32/64
bit or big/little-endian machine. To keep things reproducible the hash
should be the same across arches.

The xxhash documentation states: "hashes are identical across all
platforms (little / big endian)".

I assume the above holds, even on 32bit arches (we always use XXH64).

So unless you know that there are cases where the hash produces
different values on some arches then I'll say it is fine to install.

Thanks,

Mark

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Use xxHash hashing algorithm.
  2022-07-04 15:40         ` Mark Wielaard
@ 2022-07-07 12:29           ` Martin Liška
  2022-07-07 13:36             ` xxhash devel on buildbot workers (Was: [PATCH] Use xxHash hashing algorithm) Mark Wielaard
  0 siblings, 1 reply; 13+ messages in thread
From: Martin Liška @ 2022-07-07 12:29 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: dwz

On 7/4/22 17:40, Mark Wielaard wrote:
> So unless you know that there are cases where the hash produces
> different values on some arches then I'll say it is fine to install.

Hello.

I've just pushed the revision. Can you please install xxhash-devel (libxxhash-dev for Debian)
for all non-container builders?

https://builder.sourceware.org/buildbot/#/builders?tags=dwz

Thanks,
Martin

^ permalink raw reply	[flat|nested] 13+ messages in thread

* xxhash devel on buildbot workers (Was: [PATCH] Use xxHash hashing algorithm)
  2022-07-07 12:29           ` Martin Liška
@ 2022-07-07 13:36             ` Mark Wielaard
  2022-07-07 15:04               ` xxhash devel on buildbot workers Thomas Fitzsimmons
  2022-07-11  7:48               ` xxhash devel on buildbot workers (Was: [PATCH] Use xxHash hashing algorithm) Dan Horák
  0 siblings, 2 replies; 13+ messages in thread
From: Mark Wielaard @ 2022-07-07 13:36 UTC (permalink / raw)
  To: Martin Liška; +Cc: dwz, buildbot, Thomas Fitzsimmons, Dan Horák

Hi Martin, Dan, Tom,

On Thu, 2022-07-07 at 14:29 +0200, Martin Liška wrote:
> I've just pushed the revision. Can you please install xxhash-devel
> (libxxhash-dev for Debian)
> for all non-container builders?
> 
> https://builder.sourceware.org/buildbot/#/builders?tags=dwz

Installed on centos-x86_64, debian-arm64, debian-armhf and debian-i386.

Dan, could you install xxhash-devel on lfedora1 marist (s390x) and the
fit.vutbr.cz (ppc64le) server?

Tom, could you install libxxhash-dev on the debian-ppc64 server?

Thanks,

Mark

BTW. Note that on debian-oldstable (the arm64 and armhf boards)
xxhash.h tries to include xxhash.c with XXH_INLINE_ALL, which doesn't
work. We'll probably need a configure check or simply upgrade to a
newer version.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: xxhash devel on buildbot workers
  2022-07-07 13:36             ` xxhash devel on buildbot workers (Was: [PATCH] Use xxHash hashing algorithm) Mark Wielaard
@ 2022-07-07 15:04               ` Thomas Fitzsimmons
  2022-07-11  7:48               ` xxhash devel on buildbot workers (Was: [PATCH] Use xxHash hashing algorithm) Dan Horák
  1 sibling, 0 replies; 13+ messages in thread
From: Thomas Fitzsimmons @ 2022-07-07 15:04 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: Martin Liška, dwz, buildbot, Dan Horák

Hi Mark,

Mark Wielaard <mark@klomp.org> writes:

> On Thu, 2022-07-07 at 14:29 +0200, Martin Liška wrote:
>> I've just pushed the revision. Can you please install xxhash-devel
>> (libxxhash-dev for Debian)
>> for all non-container builders?
>> 
>> https://builder.sourceware.org/buildbot/#/builders?tags=dwz
>
> Installed on centos-x86_64, debian-arm64, debian-armhf and debian-i386.
>
> Dan, could you install xxhash-devel on lfedora1 marist (s390x) and the
> fit.vutbr.cz (ppc64le) server?
>
> Tom, could you install libxxhash-dev on the debian-ppc64 server

Done.

> BTW. Note that on debian-oldstable (the arm64 and armhf boards)
> xxhash.h tries to include xxhash.c with XXH_INLINE_ALL, which doesn't
> work. We'll probably need a configure check or simply upgrade to a
> newer version.

I installed libxxhash-dev:ppc64 0.8.1-1.  According to comments in
xxhash.h, this version is new enough that it does not host the
implementation inside xxhash.c.

Thomas

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: xxhash devel on buildbot workers (Was: [PATCH] Use xxHash hashing algorithm)
  2022-07-07 13:36             ` xxhash devel on buildbot workers (Was: [PATCH] Use xxHash hashing algorithm) Mark Wielaard
  2022-07-07 15:04               ` xxhash devel on buildbot workers Thomas Fitzsimmons
@ 2022-07-11  7:48               ` Dan Horák
  1 sibling, 0 replies; 13+ messages in thread
From: Dan Horák @ 2022-07-11  7:48 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: Martin Liška, dwz, buildbot, Thomas Fitzsimmons

Hi Mark,

On Thu, 07 Jul 2022 15:36:47 +0200
Mark Wielaard <mark@klomp.org> wrote:

> Hi Martin, Dan, Tom,
> 
> On Thu, 2022-07-07 at 14:29 +0200, Martin Liška wrote:
> > I've just pushed the revision. Can you please install xxhash-devel
> > (libxxhash-dev for Debian)
> > for all non-container builders?
> > 
> > https://builder.sourceware.org/buildbot/#/builders?tags=dwz
> 
> Installed on centos-x86_64, debian-arm64, debian-armhf and debian-i386.
> 
> Dan, could you install xxhash-devel on lfedora1 marist (s390x) and the
> fit.vutbr.cz (ppc64le) server?

done


		Dan
 
> Tom, could you install libxxhash-dev on the debian-ppc64 server?
> 
> Thanks,
> 
> Mark
> 
> BTW. Note that on debian-oldstable (the arm64 and armhf boards)
> xxhash.h tries to include xxhash.c with XXH_INLINE_ALL, which doesn't
> work. We'll probably need a configure check or simply upgrade to a
> newer version.
> 


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2022-07-11  7:48 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-05  8:17 [PATCH] Use xxHash hashing algorithm Martin Liška
2022-03-31 11:29 ` Martin Liška
2022-04-28 10:06   ` Martin Liška
2022-06-20 13:29     ` Martin Liška
2022-06-25 19:44 ` Mark Wielaard
2022-06-27 13:41   ` Martin Liška
2022-06-30 16:13     ` Mark Wielaard
2022-07-04 13:31       ` Martin Liška
2022-07-04 15:40         ` Mark Wielaard
2022-07-07 12:29           ` Martin Liška
2022-07-07 13:36             ` xxhash devel on buildbot workers (Was: [PATCH] Use xxHash hashing algorithm) Mark Wielaard
2022-07-07 15:04               ` xxhash devel on buildbot workers Thomas Fitzsimmons
2022-07-11  7:48               ` xxhash devel on buildbot workers (Was: [PATCH] Use xxHash hashing algorithm) Dan Horák

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).