public inbox for dwz@sourceware.org
 help / color / mirror / Atom feed
From: "Martin Liška" <mliska@suse.cz>
To: dwz@sourceware.org
Subject: Re: [PATCH] Use xxHash hashing algorithm.
Date: Thu, 31 Mar 2022 13:29:45 +0200	[thread overview]
Message-ID: <f56a9101-9864-2f50-a090-7f12edd683b2@suse.cz> (raw)
In-Reply-To: <c8e771e5-2b17-02de-3afb-243cac1bd179@suse.cz>


PING^1

On 1/5/22 09:17, Martin Liška wrote:
> The algorithm provides the following speed-up with -O2:
> 
> - 1/5: sysdig (60M)
> dwz_O2                : 9.7
> dwz_O2_xxhash         : 8.5 (87.7%)
> - 2/5: rtags (58M)
> dwz_O2                : 17.6
> dwz_O2_xxhash         : 15.8 (89.5%)
> - 3/5: libetonyek (91M)
> dwz_O2                : 10.8
> dwz_O2_xxhash         : 9.4 (86.7%)
> - 4/5: krita (685M)
> dwz_O2                : 96.0
> dwz_O2_xxhash         : 85.6 (89.1%)
> - 5/5: gcc (1.2G)
> dwz_O2                : 58.6
> dwz_O2_xxhash         : 54.1 (92.4%)
> ---
>   Makefile  |   4 +-
>   dwz.c     | 103 +++++++++++++++++++++++-----------------
>   hashtab.c | 139 ------------------------------------------------------
>   hashtab.h |   6 ++-
>   4 files changed, 65 insertions(+), 187 deletions(-)
> 
> diff --git a/Makefile b/Makefile
> index 9394ef4..89546c2 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -8,7 +8,7 @@ CFLAGS = -O2 -g
>   DWZ_VERSION := $(shell cat $(srcdir)/VERSION)
>   CFLAGS_VERSION = -DDWZ_VERSION='"$(DWZ_VERSION)"'
>   CFLAGS_COPYRIGHT = $(shell cat $(srcdir)/COPYRIGHT_YEARS)
> -CFLAGS_COMMON = -Wall -W -D_FILE_OFFSET_BITS=64
> +CFLAGS_COMMON = -Wall -W -D_FILE_OFFSET_BITS=64 -DXXH_INLINE_ALL=1
>   override CFLAGS += $(CFLAGS_COMMON) $(CFLAGS_VERSION) $(CFLAGS_COPYRIGHT)
> 
>   prefix = /usr
> @@ -17,7 +17,7 @@ bindir = $(exec_prefix)/bin
>   datarootdir = $(prefix)/share
>   mandir = $(datarootdir)/man
>   OBJECTS = args.o dwz.o hashtab.o pool.o sha1.o dwarfnames.o
> -LIBS=-lelf
> +LIBS=-lelf -lxxhash
>   dwz: $(OBJECTS)
>       $(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
>   args.o: native.o
> diff --git a/dwz.c b/dwz.c
> index a3b289f..cc68f4a 100644
> --- a/dwz.c
> +++ b/dwz.c
> @@ -112,6 +112,21 @@
>   # define NT_GNU_BUILD_ID 3
>   #endif
> 
> +/* xxHash state object.  */
> +static XXH64_state_t state;
> +
> +/* Clear xxHash state to zero.  */
> +#define hash_init_state() XXH64_reset(&state, 0)
> +
> +/* Update hash STATE with VALUE.  */
> +#define hash_update_state(value) XXH64_update(&state, &value, sizeof value)
> +
> +/* Update hash STATE with OBJECT that has a provided SIZE.  */
> +#define hash_update_state_object(object, size) XXH64_update(&state, object, size)
> +
> +/* Get digest once we are done with a state.  */
> +#define hash_digest() XXH64_digest(&state)
> +
>   /* Print memory amount M (in kb) in both exact and human readable, like so:
>      1382508 (1.3G).  */
>   static void
> @@ -1187,16 +1202,18 @@ compute_abbrev_hash (struct abbrev_tag *t)
>   {
>     unsigned int i;
> 
> -  t->hash = iterative_hash_object (t->tag, 0);
> -  t->hash = iterative_hash_object (t->nattr, t->hash);
> -  t->hash = iterative_hash_object (t->children, t->hash);
> +  hash_init_state ();
> +  hash_update_state (t->tag);
> +  hash_update_state (t->nattr);
> +  hash_update_state (t->children);
>     for (i = 0; i < t->nattr; i++)
>       {
> -      t->hash = iterative_hash_object (t->attr[i].attr, t->hash);
> -      t->hash = iterative_hash_object (t->attr[i].form, t->hash);
> +      hash_update_state (t->attr[i].attr);
> +      hash_update_state (t->attr[i].form);
>         if (t->attr[i].form == DW_FORM_implicit_const)
> -    t->hash = iterative_hash_object (t->values[i], t->hash);
> +    hash_update_state (t->values[i]);
>       }
> +  t->hash = hash_digest ();
>   }
> 
>   /* Maximum number of attributes in a DIE.  */
> @@ -3402,22 +3419,17 @@ checksum_die (DSO *dso, dw_cu_ref cu, dw_die_ref top_die, dw_die_ref die)
>             struct dw_file *cu_file = &cu->cu_files[value - 1];
>             size_t file_len = strlen (cu_file->file);
>             s = t->attr[i].attr;
> -          die->u.p1.die_hash
> -            = iterative_hash_object (s, die->u.p1.die_hash);
> -          die->u.p1.die_hash
> -            = iterative_hash_object (cu_file->time,
> -                         die->u.p1.die_hash);
> -          die->u.p1.die_hash
> -            = iterative_hash_object (cu_file->size,
> -                         die->u.p1.die_hash);
> -          die->u.p1.die_hash
> -            = iterative_hash (cu_file->file, file_len + 1,
> -                      die->u.p1.die_hash);
> +          hash_init_state ();
> +          hash_update_state (die->u.p1.die_hash);
> +          hash_update_state (s);
> +          hash_update_state (cu_file->time);
> +          hash_update_state (cu_file->size);
> +          hash_update_state_object (cu_file->file, file_len + 1);
>             if (cu_file->dir)
> -            die->u.p1.die_hash
> -              = iterative_hash (cu_file->dir,
> -                    strlen (cu_file->dir) + 1,
> -                    die->u.p1.die_hash);
> +            {
> +              hash_update_state_object (cu_file->dir,
> +                        strlen (cu_file->dir) + 1);
> +            }
>             /* Ignore DW_AT_comp_dir for DW_AT_*_file <built-in>
>                etc. if immediately followed by DW_AT_*_line 0.  */
>             else if (cu_file->file_angle_brackets_encapsulated_no_slash
> @@ -3427,7 +3439,13 @@ checksum_die (DSO *dso, dw_cu_ref cu, dw_die_ref top_die, dw_die_ref die)
>                     ? DW_AT_decl_line : DW_AT_call_line)
>                  && t->attr[i + 1].form == DW_FORM_data1
>                  && *new_ptr == 0)
> -            break;
> +            {
> +              die->u.p1.die_hash = hash_digest ();
> +              break;
> +            }
> +
> +          die->u.p1.die_hash = hash_digest ();
> +
>             if (cu->cu_comp_dir
>                 && (cu_file->dir ? cu_file->dir[0]
>                          : cu_file->file[0]) != '/')
> @@ -5541,7 +5559,7 @@ strp_eq2 (const void *p, const void *q)
>   static hashval_t
>   strp_hash3 (const void *p)
>   {
> -  return iterative_hash (p, strlen (p), 0);
> +  return hash (p, strlen (p));
>   }
> 
>   /* Corresponding equality function in strp_htab.  */
> @@ -5571,7 +5589,7 @@ note_strp_offset (unsigned int off)
> 
>         p = debug_sections[DEBUG_STR].data + off;
>         len = strlen ((char *) p);
> -      hash = iterative_hash (p, len, 0);
> +      hash = hash (p, len);
>         if (alt_strp_htab)
>       {
>         if (htab_find_with_hash (alt_strp_htab, p, hash))
> @@ -5653,7 +5671,7 @@ lookup_strp_offset (unsigned int off)
> 
>         p = debug_sections[DEBUG_STR].data + off;
>         len = strlen ((char *) p);
> -      hash = iterative_hash (p, len, 0);
> +      hash = hash (p, len);
>         if (alt_strp_htab)
>       {
>         unsigned char *q = (unsigned char *)
> @@ -5694,7 +5712,7 @@ note_strp_offset2 (unsigned int off)
>       {
>         p = debug_sections[DEBUG_STR].data + off;
>         len = strlen ((char *) p);
> -      hash = iterative_hash (p, len, 0);
> +      hash = hash (p, len);
>         if (htab_find_with_hash (alt_strp_htab, p, hash))
>           return dwarf_5 ? DW_FORM_strp_sup : DW_FORM_GNU_strp_alt;
>       }
> @@ -5704,7 +5722,7 @@ note_strp_offset2 (unsigned int off)
>       return DW_FORM_strp;
>     p = debug_sections[DEBUG_STR].data + off;
>     q = (unsigned char *) strchr ((char *) p, '\0');
> -  hash = iterative_hash (p, q - p, 0);
> +  hash = hash (p, q - p);
>     se.off = off;
>     se.new_off = hash & ~1U;
>     struct strp_entry *s = (struct strp_entry *)
> @@ -5860,7 +5878,7 @@ finalize_strp (bool build_tail_offset_list)
> 
>       memcpy (p, debug_sections[DEBUG_STR].data + arr[i]->off, len);
>       slot = htab_find_slot_with_hash (strp_htab, p,
> -                     iterative_hash (p, len - 1, 0),
> +                     hash (p, len - 1),
>                        INSERT);
>       if (slot == NULL)
>         dwz_oom ();
> @@ -5876,7 +5894,7 @@ finalize_strp (bool build_tail_offset_list)
>             if (tail_offset_list != NULL)
>           tail_offset_list[k++] = arr[j]->new_off;
>             slot = htab_find_slot_with_hash (strp_htab, q,
> -                           iterative_hash (q, l - 1, 0),
> +                           hash (q, l - 1),
>                              INSERT);
>             if (slot == NULL)
>           dwz_oom ();
> @@ -9849,17 +9867,16 @@ line_htab_lookup (dw_cu_ref cu, unsigned int id)
>   {
>     void **slot;
>     struct line_entry le;
> -  hashval_t h;
> -
>     if (id == 0)
>       return 0;
>     assert (id <= cu->cu_nfiles);
>     le.file = &cu->cu_files[id - 1];
> -  h = iterative_hash_object (le.file->time, 0);
> -  h = iterative_hash_object (le.file->size, h);
> -  h = iterative_hash (le.file->file, strlen (le.file->file) + 1, h);
> +  hash_init_state ();
> +  hash_update_state (le.file->time);
> +  hash_update_state (le.file->size);
> +  hash_update_state_object (le.file->file, strlen (le.file->file) + 1);
>     if (le.file->dir)
> -    h = iterative_hash (le.file->dir, strlen (le.file->dir) + 1, h);
> +    hash_update_state_object (le.file->dir, strlen (le.file->dir) + 1);
>     if (line_htab == NULL)
>       {
>         line_htab = htab_try_create (50, line_hash, line_eq, NULL);
> @@ -9867,15 +9884,15 @@ line_htab_lookup (dw_cu_ref cu, unsigned int id)
>       dwz_oom ();
>         max_line_id = 1;
>       }
> -  le.hash = h;
> -  slot = htab_find_slot_with_hash (line_htab, &le, h, INSERT);
> +  le.hash = hash_digest ();;
> +  slot = htab_find_slot_with_hash (line_htab, &le, le.hash, INSERT);
>     if (slot == NULL)
>       dwz_oom ();
>     if (*slot == NULL)
>       {
>         struct line_entry *l = pool_alloc (line_entry, sizeof (*l));
>         l->file = le.file;
> -      l->hash = h;
> +      l->hash = le.hash;
>         l->new_id = max_line_id++;
>         *slot = (void *) l;
>         return l->new_id;
> @@ -10347,7 +10364,7 @@ handle_macro (void)
>             /* This should only happen if there were multiple
>                same transparent units within a single object file.  */
>             && htab_find_with_hash (strp_htab, p,
> -                      iterative_hash (p, len, 0)) == NULL)
> +                      hash (p, len)) == NULL)
>           can_share = false;
>             s = ptr;
>             break;
> @@ -15756,7 +15773,7 @@ optimize_multifile (unsigned int *die_count)
>         hashval_t hash;
> 
>         q = (unsigned char *) strchr ((char *) p, '\0');
> -      hash = iterative_hash (p, q - p, 0);
> +      hash = hash (p, q - p);
>         se.off = p - debug_sections[DEBUG_STR].data;
>         se.new_off = hash & ~1U;
>         slot = htab_find_slot_with_hash (strp_htab, &se, se.new_off, INSERT);
> @@ -16076,8 +16093,7 @@ read_multifile (int fd, unsigned int die_count)
>           {
>             q = (unsigned char *) strchr ((char *) p, '\0') + 1;
>             slot = htab_find_slot_with_hash (strp_htab, p,
> -                           iterative_hash (p, q - p - 1,
> -                                   0), INSERT);
> +                           hash (p, q - p - 1), INSERT);
>             if (slot == NULL)
>           dwz_oom ();
>             assert (*slot == NULL);
> @@ -16090,8 +16106,7 @@ read_multifile (int fd, unsigned int die_count)
>             p = debug_sections[DEBUG_STR].data + *pi;
>             q = (unsigned char *) strchr ((char *) p, '\0');
>             slot = htab_find_slot_with_hash (strp_htab, p,
> -                           iterative_hash (p, q - p,
> -                                   0), INSERT);
> +                           hash (p, q - p), INSERT);
>             if (slot == NULL)
>               dwz_oom ();
>             assert (*slot == NULL);
> diff --git a/hashtab.c b/hashtab.c
> index 41eab30..a9b9d13 100644
> --- a/hashtab.c
> +++ b/hashtab.c
> @@ -626,142 +626,3 @@ htab_restore (htab, name, restorefn)
>     fclose (f);
>   }
>   #endif
> -
> -/* DERIVED FROM:
> ---------------------------------------------------------------------
> -lookup2.c, by Bob Jenkins, December 1996, Public Domain.
> -hash(), hash2(), hash3, and mix() are externally useful functions.
> -Routines to test the hash are included if SELF_TEST is defined.
> -You can use this free for any purpose.  It has no warranty.
> ---------------------------------------------------------------------
> -*/
> -
> -/*
> ---------------------------------------------------------------------
> -mix -- mix 3 32-bit values reversibly.
> -For every delta with one or two bit set, and the deltas of all three
> -  high bits or all three low bits, whether the original value of a,b,c
> -  is almost all zero or is uniformly distributed,
> -* If mix() is run forward or backward, at least 32 bits in a,b,c
> -  have at least 1/4 probability of changing.
> -* If mix() is run forward, every bit of c will change between 1/3 and
> -  2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
> -mix() was built out of 36 single-cycle latency instructions in a
> -  structure that could supported 2x parallelism, like so:
> -      a -= b;
> -      a -= c; x = (c>>13);
> -      b -= c; a ^= x;
> -      b -= a; x = (a<<8);
> -      c -= a; b ^= x;
> -      c -= b; x = (b>>13);
> -      ...
> -  Unfortunately, superscalar Pentiums and Sparcs can't take advantage
> -  of that parallelism.  They've also turned some of those single-cycle
> -  latency instructions into multi-cycle latency instructions.  Still,
> -  this is the fastest good hash I could find.  There were about 2^^68
> -  to choose from.  I only looked at a billion or so.
> ---------------------------------------------------------------------
> -*/
> -/* same, but slower, works on systems that might have 8 byte hashval_t's */
> -#define mix(a,b,c) \
> -{ \
> -  a -= b; a -= c; a ^= (c>>13); \
> -  b -= c; b -= a; b ^= (a<< 8); \
> -  c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
> -  a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
> -  b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
> -  c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
> -  a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
> -  b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
> -  c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
> -}
> -
> -/*
> ---------------------------------------------------------------------
> -hash() -- hash a variable-length key into a 32-bit value
> -  k     : the key (the unaligned variable-length array of bytes)
> -  len   : the length of the key, counting by bytes
> -  level : can be any 4-byte value
> -Returns a 32-bit value.  Every bit of the key affects every bit of
> -the return value.  Every 1-bit and 2-bit delta achieves avalanche.
> -About 36+6len instructions.
> -
> -The best hash table sizes are powers of 2.  There is no need to do
> -mod a prime (mod is sooo slow!).  If you need less than 32 bits,
> -use a bitmask.  For example, if you need only 10 bits, do
> -  h = (h & hashmask(10));
> -In which case, the hash table should have hashsize(10) elements.
> -
> -If you are hashing n strings (ub1 **)k, do it like this:
> -  for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
> -
> -By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
> -code any way you wish, private, educational, or commercial.  It's free.
> -
> -See http://burtleburtle.net/bob/hash/evahash.html
> -Use for hash table lookup, or anything where one collision in 2^32 is
> -acceptable.  Do NOT use for cryptographic purposes.
> ---------------------------------------------------------------------
> -*/
> -
> -hashval_t
> -iterative_hash (const void *k_in /* the key */,
> -                register size_t  length /* the length of the key */,
> -                register hashval_t initval /* the previous hash, or
> -                                              an arbitrary value */)
> -{
> -  register const unsigned char *k = (const unsigned char *)k_in;
> -  register hashval_t a,b,c,len;
> -
> -  /* Set up the internal state */
> -  len = length;
> -  a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
> -  c = initval;           /* the previous hash value */
> -
> -  /*---------------------------------------- handle most of the key */
> -#ifndef WORDS_BIGENDIAN
> -  /* On a little-endian machine, if the data is 4-byte aligned we can hash
> -     by word for better speed.  This gives nondeterministic results on
> -     big-endian machines.  */
> -  if (sizeof (hashval_t) == 4 && (((size_t)k)&3) == 0)
> -    while (len >= 12)    /* aligned */
> -      {
> -    a += *(hashval_t *)(k+0);
> -    b += *(hashval_t *)(k+4);
> -    c += *(hashval_t *)(k+8);
> -    mix(a,b,c);
> -    k += 12; len -= 12;
> -      }
> -  else /* unaligned */
> -#endif
> -    while (len >= 12)
> -      {
> -    a += (k[0] +((hashval_t)k[1]<<8) +((hashval_t)k[2]<<16) +((hashval_t)k[3]<<24));
> -    b += (k[4] +((hashval_t)k[5]<<8) +((hashval_t)k[6]<<16) +((hashval_t)k[7]<<24));
> -    c += (k[8] +((hashval_t)k[9]<<8) +((hashval_t)k[10]<<16)+((hashval_t)k[11]<<24));
> -    mix(a,b,c);
> -    k += 12; len -= 12;
> -      }
> -
> -  /*------------------------------------- handle the last 11 bytes */
> -  c += length;
> -  switch(len)              /* all the case statements fall through */
> -    {
> -    case 11: c+=((hashval_t)k[10]<<24);    /* fall through */
> -    case 10: c+=((hashval_t)k[9]<<16);    /* fall through */
> -    case 9 : c+=((hashval_t)k[8]<<8);    /* fall through */
> -      /* the first byte of c is reserved for the length */
> -    case 8 : b+=((hashval_t)k[7]<<24);    /* fall through */
> -    case 7 : b+=((hashval_t)k[6]<<16);    /* fall through */
> -    case 6 : b+=((hashval_t)k[5]<<8);    /* fall through */
> -    case 5 : b+=k[4];            /* fall through */
> -    case 4 : a+=((hashval_t)k[3]<<24);    /* fall through */
> -    case 3 : a+=((hashval_t)k[2]<<16);    /* fall through */
> -    case 2 : a+=((hashval_t)k[1]<<8);    /* fall through */
> -    case 1 : a+=k[0];
> -      /* case 0: nothing left to add */
> -    }
> -  mix(a,b,c);
> -  /*-------------------------------------------- report the result */
> -  return c;
> -}
> diff --git a/hashtab.h b/hashtab.h
> index cb3da01..509e627 100644
> --- a/hashtab.h
> +++ b/hashtab.h
> @@ -153,9 +153,11 @@ extern void    htab_restore (htab_t, const char *, htab_restorefn);
> 
>   #endif
> 
> -/* An iterative hash function for arbitrary data.  */
> -extern hashval_t iterative_hash (const void *, size_t, hashval_t);
> +#include <xxhash.h>
> +
>   /* Shorthand for hashing something with an intrinsic size.  */
> +#define hash(IN,LEN) XXH64(IN, LEN, 0)
> +#define iterative_hash(IN,LEN,INIT) XXH64(IN, LEN, INIT)
>   #define iterative_hash_object(OB,INIT) iterative_hash (&OB, sizeof (OB), INIT)
> 
>   #ifdef __cplusplus


  reply	other threads:[~2022-03-31 11:29 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-05  8:17 Martin Liška
2022-03-31 11:29 ` Martin Liška [this message]
2022-04-28 10:06   ` Martin Liška
2022-06-20 13:29     ` Martin Liška
2022-06-25 19:44 ` Mark Wielaard
2022-06-27 13:41   ` Martin Liška
2022-06-30 16:13     ` Mark Wielaard
2022-07-04 13:31       ` Martin Liška
2022-07-04 15:40         ` Mark Wielaard
2022-07-07 12:29           ` Martin Liška
2022-07-07 13:36             ` xxhash devel on buildbot workers (Was: [PATCH] Use xxHash hashing algorithm) Mark Wielaard
2022-07-07 15:04               ` xxhash devel on buildbot workers Thomas Fitzsimmons
2022-07-11  7:48               ` xxhash devel on buildbot workers (Was: [PATCH] Use xxHash hashing algorithm) Dan Horák

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f56a9101-9864-2f50-a090-7f12edd683b2@suse.cz \
    --to=mliska@suse.cz \
    --cc=dwz@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).