public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [Confirmed for long needles] Fastest String Search Algorithm.
@ 2021-06-15 20:22 Amit Choudhary
  0 siblings, 0 replies; only message in thread
From: Amit Choudhary @ 2021-06-15 20:22 UTC (permalink / raw)
  To: GNU C Library

[-- Attachment #1: Type: text/plain, Size: 2543 bytes --]

Hi,

In case someone is interested, else please ignore (just sending on
public domain for other people if they do web search) [Tested on
latest glibc - glibc-2.33].

I have done final testing with long needles. I modified bench-strstr.c
and included my algorithm in it.

Attached are the diff, analysis.xlsx, and original bench-strstr.out.

Below is my code:

==============================================================================================

// Choudhary algorithm
static char *
chal(char *text, char *pattern)
{

#define false 0
#define true 1
#define ALPHABET_SIZE 256

    long i = 0;
    long index = 0;
    long end_index = 0;
    int not_found = false;

    long text_len = strlen(text);
    long pattern_len = strlen(pattern);

    long pi_44 = pattern_len - 1;
    long pi_34 = (3 * pattern_len) / 4;
    long pi_24 = pattern_len / 2;
    long pi_14 = pattern_len / 4;

    long fps = 0;
    long skip_table[ALPHABET_SIZE] = {0};

    // preprocess pattern and fill skip_table
    for (i = 0; i < pattern_len; i++) {
        skip_table[(int)(pattern[i])] = pattern_len - 1 - i;
    }

    // now search
    for (i = 0; i < text_len; i++) {

        if ((text_len - i) < pattern_len) {
                //printf("\tfps = %ld\n", fps);
            return NULL;
            //return -1;
        }

        if (pattern[pi_44] != text[i + pi_44]) {
            if (skip_table[(int)(text[i + pi_44])] > 0) {
                i = i + skip_table[(int)(text[i + pi_44])] - 1;
            }
            continue;
        }

        if (pattern[0] != text[i]) {
            continue;
        }

        if (pattern[pi_24] != text[i + pi_24]) {
            continue;
        }

        if (pattern[pi_34] != text[i + pi_34]) {
            continue;
        }

        if (pattern[pi_14] != text[i + pi_14]) {
            continue;
        }

        fps = fps + 1;
        end_index = i + pi_44;
        not_found = false;

        for (index = i; index <= end_index; index++) {
            if (text[index] != pattern[index - i]) {
                not_found = true;
                break;
            }
        } // end of inner for loop

        if (not_found == false) { // match is found
            //printf("\tfps = %ld\n", fps);
            return (text + i);
            //return i;
        }

    } // end of outer for loop

                //printf("\tfps = %ld\n", fps);
    return NULL;
    //return -1;

} // end of chal

==============================================================================================

Amit

[-- Attachment #2: analysis.xlsx --]
[-- Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, Size: 8878 bytes --]

[-- Attachment #3: glibc-2.33.diff --]
[-- Type: text/x-patch, Size: 3991 bytes --]

diff -ru glibc-2.33.orig/benchtests/bench-strstr.c glibc-2.33/benchtests/bench-strstr.c
--- glibc-2.33.orig/benchtests/bench-strstr.c	2021-02-01 22:45:33.000000000 +0530
+++ glibc-2.33/benchtests/bench-strstr.c	2021-06-16 01:22:34.320912352 +0530
@@ -45,6 +45,8 @@
 "library functions, and _where_ in this manual you can find more specific "
 "information about them.\n";
 
+static char *chal(char *text, char *pattern);
+
 /* Simple yet efficient strstr - for needles < 32 bytes it is 2-4 times
    faster than the optimized twoway_strstr.  */
 static char *
@@ -125,6 +127,7 @@
 typedef char *(*proto_t) (const char *, const char *);
 
 IMPL (strstr, 1)
+IMPL (chal, 0)
 IMPL (twoway_strstr, 0)
 IMPL (basic_strstr, 0)
 
@@ -210,6 +213,7 @@
    increasing needle sizes.  The slowest cases of the two implementations are
    within a factor of 2 on several different microarchitectures.  */
 
+#if 0
 static void
 test_hard_needle (size_t ne_len, size_t hs_len)
 {
@@ -278,6 +282,94 @@
     putchar ('\n');
   }
 }
+#endif
+
+// Choudhary algorithm
+static char *
+chal(char *text, char *pattern)
+{
+
+#define false 0
+#define true 1
+#define ALPHABET_SIZE 256
+
+    long i = 0;
+    long index = 0;
+    long end_index = 0;
+    int not_found = false;
+
+    long text_len = strlen(text);
+    long pattern_len = strlen(pattern);
+
+    long pi_44 = pattern_len - 1;
+    long pi_34 = (3 * pattern_len) / 4;
+    long pi_24 = pattern_len / 2;
+    long pi_14 = pattern_len / 4;
+
+    long fps = 0;
+    long skip_table[ALPHABET_SIZE] = {0};
+
+    // preprocess pattern and fill skip_table
+    for (i = 0; i < pattern_len; i++) {
+        skip_table[(int)(pattern[i])] = pattern_len - 1 - i;
+    }
+
+    // now search
+    for (i = 0; i < text_len; i++) {
+
+        if ((text_len - i) < pattern_len) {
+                //printf("\tfps = %ld\n", fps);
+            return NULL;
+            //return -1;
+        }
+
+        if (pattern[pi_44] != text[i + pi_44]) {
+            if (skip_table[(int)(text[i + pi_44])] > 0) {
+                i = i + skip_table[(int)(text[i + pi_44])] - 1;
+            }
+            continue;
+        }
+
+        if (pattern[0] != text[i]) {
+            continue;
+        }
+
+        if (pattern[pi_24] != text[i + pi_24]) {
+            continue;
+        }
+
+        if (pattern[pi_34] != text[i + pi_34]) {
+            continue;
+        }
+
+        if (pattern[pi_14] != text[i + pi_14]) {
+            continue;
+        }
+
+        fps = fps + 1;
+        end_index = i + pi_44;
+        not_found = false;
+
+        for (index = i; index <= end_index; index++) {
+            if (text[index] != pattern[index - i]) {
+                not_found = true;
+                break;
+            }
+        } // end of inner for loop
+
+        if (not_found == false) { // match is found
+            //printf("\tfps = %ld\n", fps);
+            return (text + i);
+            //return i;
+        }
+
+    } // end of outer for loop
+
+                //printf("\tfps = %ld\n", fps);
+    return NULL;
+    //return -1;
+
+} // end of chal
 
 static int
 test_main (void)
@@ -292,20 +384,21 @@
   for (size_t hlen = 64; hlen <= 256; hlen += 32)
     for (size_t klen = 1; klen <= 16; klen++)
       {
-	do_test (1, 3, hlen, klen, 0);
-	do_test (0, 9, hlen, klen, 1);
+	//do_test (1, 3, hlen, klen, 0);
+	//do_test (0, 9, hlen, klen, 1);
       }
 
-  for (size_t hlen = 256; hlen <= 65536; hlen *= 2)
-    for (size_t klen = 16; klen <= 256; klen *= 2)
+  for (size_t hlen = 1024; hlen <= 65536; hlen *= 2)
+    //for (size_t klen = 16; klen <= 256; klen *= 2)
+    for (size_t klen = 1024; klen <= hlen; klen *= 2)
       {
 	do_test (1, 11, hlen, klen, 0);
 	do_test (14, 5, hlen, klen, 1);
       }
 
-  test_hard_needle (64, 65536);
-  test_hard_needle (256, 65536);
-  test_hard_needle (1024, 65536);
+  //test_hard_needle (64, 65536);
+  //test_hard_needle (256, 65536);
+  //test_hard_needle (1024, 65536);
 
   return ret;
 }

[-- Attachment #4: bench-strstr.out --]
[-- Type: application/octet-stream, Size: 4607 bytes --]

                       	basic_strstr	twoway_strstr	chal	__strstr_sse2_unaligned	__strstr_sse2
Length 1024/1024, alignment  1/11, found:	2089.06	428.125	12255.5	4561.72	429.688
Length 1024/1024, alignment 14/ 5, fail :	4536.72	299.219	9113.28	974.219	224.219
Length 2048/1024, alignment  1/11, found:	9598.44	20886.7	12909.4	3925	20866.4
Length 2048/1024, alignment 14/ 5, fail :	12341.4	18972.7	8628.91	3103.91	19281.2
Length 2048/2048, alignment  1/11, found:	4476.56	625	21861.7	8421.88	651.562
Length 2048/2048, alignment 14/ 5, fail :	14996.9	340.625	18109.4	9892.97	389.844
Length 4096/1024, alignment  1/11, found:	14972.7	22117.2	14968.8	6058.59	22322.7
Length 4096/1024, alignment 14/ 5, fail :	17291.4	20445.3	11021.9	4682.81	20541.4
Length 4096/2048, alignment  1/11, found:	16896.1	39532.8	24222.7	43337.5	40996.9
Length 4096/2048, alignment 14/ 5, fail :	30721.1	39861.7	18486.7	8394.53	38468
Length 4096/4096, alignment  1/11, found:	9270.31	1419.53	50182	16295.3	1282.81
Length 4096/4096, alignment 14/ 5, fail :	37185.9	603.125	11363.3	10475	975
Length 8192/1024, alignment  1/11, found:	79155.5	44248.4	15274.2	17704.7	26135.2
Length 8192/1024, alignment 14/ 5, fail :	71643	21028.9	10257	16943	34669.5
Length 8192/2048, alignment  1/11, found:	59628.9	35382.8	12239.1	15216.4	35798.4
Length 8192/2048, alignment 14/ 5, fail :	37575	24750	7460.16	15131.2	21144.5
Length 8192/4096, alignment  1/11, found:	42871.9	76123.4	33758.6	37295.3	31769.5
Length 8192/4096, alignment 14/ 5, fail :	39203.9	40171.9	10601.6	36032	32105.5
Length 8192/8192, alignment  1/11, found:	13257.8	2188.28	41884.4	22994.5	1785.94
Length 8192/8192, alignment 14/ 5, fail :	43484.4	800.781	22860.9	10253.1	772.656
Length 16384/1024, alignment  1/11, found:	54652.3	23450.8	16033.6	16032	20920.3
Length 16384/1024, alignment 14/ 5, fail :	55950	20818	14721.9	15960.2	19725.8
Length 16384/2048, alignment  1/11, found:	33974.2	30698.4	14841.4	16936.7	29123.4
Length 16384/2048, alignment 14/ 5, fail :	43609.4	54007.8	15832.8	28162.5	31682
Length 16384/4096, alignment  1/11, found:	45509.4	51984.4	25340.6	64178.1	40407
Length 16384/4096, alignment 14/ 5, fail :	44695.3	47563.3	18793	40025.8	38476.6
Length 16384/8192, alignment  1/11, found:	76010.9	88800.8	28921.1	82950	67578.1
Length 16384/8192, alignment 14/ 5, fail :	111872	108791	29732	79808.6	80371.1
Length 16384/16384, alignment  1/11, found:	25663.3	2778.91	54814.1	32831.2	2764.06
Length 16384/16384, alignment 14/ 5, fail :	171498	1650	38649.2	20471.1	1378.12
Length 32768/1024, alignment  1/11, found:	114112	37924.2	24429.7	32809.4	36200
Length 32768/1024, alignment 14/ 5, fail :	115409	35757.8	23677.3	22817.2	35640.6
Length 32768/2048, alignment  1/11, found:	111464	29918	22440.6	57910.2	29357.8
Length 32768/2048, alignment 14/ 5, fail :	122780	38343.8	31703.9	70637.5	28878.9
Length 32768/4096, alignment  1/11, found:	161623	86311.7	40021.1	61573.4	83786.7
Length 32768/4096, alignment 14/ 5, fail :	168648	94081.2	62041.4	57056.2	54064.8
Length 32768/8192, alignment  1/11, found:	190786	153845	51453.9	120277	93224.2
Length 32768/8192, alignment 14/ 5, fail :	224271	137055	50689.1	109388	85260.9
Length 32768/16384, alignment  1/11, found:	291351	238527	59493	162361	138698
Length 32768/16384, alignment 14/ 5, fail :	363070	195028	85438.3	192824	168337
Length 32768/32768, alignment  1/11, found:	33070.3	5542.97	349531	80743.8	6096.88
Length 32768/32768, alignment 14/ 5, fail :	991841	4489.84	288573	73746.9	5169.53
Length 65536/1024, alignment  1/11, found:	260891	61587.5	37722.7	51047.7	90785.9
Length 65536/1024, alignment 14/ 5, fail :	355538	54465.6	27995.3	61132	47860.2
Length 65536/2048, alignment  1/11, found:	252798	80010.9	43000.8	81523.4	91832
Length 65536/2048, alignment 14/ 5, fail :	373905	91524.2	41894.5	75481.2	73662.5
Length 65536/4096, alignment  1/11, found:	338044	104859	70168.8	97125	118416
Length 65536/4096, alignment 14/ 5, fail :	366308	97412.5	65432.8	93324.2	97521.1
Length 65536/8192, alignment  1/11, found:	368997	162164	144940	131199	124479
Length 65536/8192, alignment 14/ 5, fail :	337055	135984	114395	133409	196736
Length 65536/16384, alignment  1/11, found:	525169	509878	372732	450800	444905
Length 65536/16384, alignment 14/ 5, fail :	623478	484677	391644	460172	427140
Length 65536/32768, alignment  1/11, found:	981066	797843	348248	598640	582176
Length 65536/32768, alignment 14/ 5, fail :	897535	570133	469474	537280	496208
Length 65536/65536, alignment  1/11, found:	67847.7	10714.8	646270	151578	10800
Length 65536/65536, alignment 14/ 5, fail :	2.91258e+06	5287.5	552998	152034	9558.59

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-06-15 20:22 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-15 20:22 [Confirmed for long needles] Fastest String Search Algorithm Amit Choudhary

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).