public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed
* [glibc] nss: Optimize nss_hash in nss_hash.c
@ 2022-05-23 16:23 Noah Goldstein
  0 siblings, 0 replies; only message in thread
From: Noah Goldstein @ 2022-05-23 16:23 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=3d155d4b6c29ddfd0b3318fa58dbf8ef20e7bca0

commit 3d155d4b6c29ddfd0b3318fa58dbf8ef20e7bca0
Author: Noah Goldstein <goldstein.w.n@gmail.com>
Date:   Thu May 19 17:18:02 2022 -0500

    nss: Optimize nss_hash in nss_hash.c
    
    The prior unrolling didn't really do much as it left the dependency
    chain between iterations. Unrolled the loop for 4 so 4x multiplies
    could be pipelined in out-of-order machines.
    
    Results for __nss_hash
    Benchmarked on Tigerlake: 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz
    
    Time as Geometric Mean of N=25 runs
    Geometric of all benchmark New / Old: 0.845
      type, length, New Time, Old Time, New Time / Old Time
     fixed,      0,    4.019,    3.729,               1.078
     fixed,      1,     4.95,    5.707,               0.867
     fixed,      2,    5.152,    5.657,               0.911
     fixed,      3,    4.641,    5.721,               0.811
     fixed,      4,    5.551,     5.81,               0.955
     fixed,      5,    6.525,    6.552,               0.996
     fixed,      6,    6.711,    6.561,               1.023
     fixed,      7,    6.715,    6.767,               0.992
     fixed,      8,    7.874,    7.915,               0.995
     fixed,      9,    8.888,    9.767,                0.91
     fixed,     10,    8.959,    9.762,               0.918
     fixed,     11,    9.188,    9.987,                0.92
     fixed,     12,    9.708,   10.618,               0.914
     fixed,     13,   10.393,    11.14,               0.933
     fixed,     14,   10.628,   12.097,               0.879
     fixed,     15,   10.982,   12.965,               0.847
     fixed,     16,   11.851,   14.429,               0.821
     fixed,     32,   24.334,   34.414,               0.707
     fixed,     64,   55.618,   86.688,               0.642
     fixed,    128,  118.261,   224.36,               0.527
     fixed,    256,  256.183,  538.629,               0.476
    random,      2,   11.194,   11.556,               0.969
    random,      4,   17.516,   17.205,               1.018
    random,      8,   23.501,   20.985,                1.12
    random,     16,   28.131,   29.212,               0.963
    random,     32,   35.436,   38.662,               0.917
    random,     64,    45.74,   58.868,               0.777
    random,    128,   75.394,  121.963,               0.618
    random,    256,  139.524,  260.726,               0.535
    Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>

Diff:
---
 nss/nss_hash.c | 79 +++++++++++++++++++++++++++++++---------------------------
 1 file changed, 42 insertions(+), 37 deletions(-)

diff --git a/nss/nss_hash.c b/nss/nss_hash.c
index 3d8e4cf37e..1d3787e675 100644
--- a/nss/nss_hash.c
+++ b/nss/nss_hash.c
@@ -19,58 +19,63 @@
 
 /* This is from libc/db/hash/hash_func.c, hash3 is static there */
 /*
- * This is INCREDIBLY ugly, but fast.  We break the string up into 8 byte
+ * This is INCREDIBLY ugly, but fast.  We break the string up into 4 byte
  * units.  On the first time through the loop we get the "leftover bytes"
- * (strlen % 8).  On every other iteration, we perform 8 HASHC's so we handle
- * all 8 bytes.  Essentially, this saves us 7 cmp & branch instructions.  If
- * this routine is heavily used enough, it's worth the ugly coding.
+ * (len % 4).  On every other iteration, we perform a 4x unrolled version
+ * HASHC. Further unrolling does not appear to help.
  *
  * OZ's original sdbm hash
  */
 uint32_t
 __nss_hash (const void *keyarg, size_t len)
 {
+  enum
+  {
+    HASH_CONST_P0 = 1,	       /* (uint32_t)(65599 ^ 0).  */
+    HASH_CONST_P1 = 65599,     /* (uint32_t)(65599 ^ 1).  */
+    HASH_CONST_P2 = 8261505,   /* (uint32_t)(65599 ^ 2).  */
+    HASH_CONST_P3 = 780587199, /* (uint32_t)(65599 ^ 3).  */
+    HASH_CONST_P4 = 1139564289 /* (uint32_t)(65599 ^ 4).  */
+  };
+
   const unsigned char *key;
-  size_t loop;
   uint32_t h;
 
-#define HASHC   h = *key++ + 65599 * h
+#define HASHC	h = *key++ + HASH_CONST_P1 * h
 
   h = 0;
   key = keyarg;
   if (len > 0)
     {
-      loop = (len + 8 - 1) >> 3;
-      switch (len & (8 - 1))
-        {
-        case 0:
-          do
-            {
-              HASHC;
-              /* FALLTHROUGH */
-            case 7:
-              HASHC;
-              /* FALLTHROUGH */
-            case 6:
-              HASHC;
-              /* FALLTHROUGH */
-            case 5:
-              HASHC;
-              /* FALLTHROUGH */
-            case 4:
-              HASHC;
-              /* FALLTHROUGH */
-            case 3:
-              HASHC;
-              /* FALLTHROUGH */
-            case 2:
-              HASHC;
-              /* FALLTHROUGH */
-            case 1:
-              HASHC;
-            }
-	  while (--loop);
-        }
+      switch ((len & (4 - 1)))
+	{
+	case 0:
+	  /* h starts out as zero so no need to include the multiply. */
+	  h = *key++;
+	  /* FALLTHROUGH */
+	case 3:
+	  HASHC;
+	  /* FALLTHROUGH */
+	case 2:
+	  HASHC;
+	  /* FALLTHROUGH */
+	case 1:
+	  HASHC;
+	  /* FALLTHROUGH */
+	}
+
+      uint32_t c0, c1, c2, c3;
+      for (--len; len >= 4; len -= 4)
+	{
+	  c0 = (unsigned char) *(key + 0);
+	  c1 = (unsigned char) *(key + 1);
+	  c2 = (unsigned char) *(key + 2);
+	  c3 = (unsigned char) *(key + 3);
+	  h = HASH_CONST_P4 * h + HASH_CONST_P3 * c0 + HASH_CONST_P2 * c1
+	      + HASH_CONST_P1 * c2 + HASH_CONST_P0 * c3;
+
+	  key += 4;
+	}
     }
   return h;
 }


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-05-23 16:23 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-23 16:23 [glibc] nss: Optimize nss_hash in nss_hash.c Noah Goldstein

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).