public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [Code] Fastest String Search Algorithm.
@ 2021-06-13  4:43 Amit Choudhary
  0 siblings, 0 replies; only message in thread
From: Amit Choudhary @ 2021-06-13  4:43 UTC (permalink / raw)
  To: GNU C Library

I am sending my code not for the purpose of getting it included in glibc.

The sole purpose is that glibc email list is in public domain and is
indexed by search engines. So, people who are outside glibc can find this
algorithm if they search for string search algorithm on web.

=====================================================================

// Choudhary string search algorithm
static char * choudhary_string_search_algorithm(char *text, char *pattern)
{

#define false 0
#define true 1
#define ALPHABET_SIZE 256

    int i = 0;
    int end_index = 0;
    int not_found = false;

    char pattern_char[ALPHABET_SIZE] = {0};

    int text_len = strlen(text);
    int pattern_len = strlen(pattern);

    int pi_44 = pattern_len - 1;
    int pi_34 = (3 * pattern_len) / 4;
    int pi_24 = pattern_len / 2;
    int pi_14 = pattern_len / 4;

    int last_failed_index = -1;

    // preprocessing
    for (i = 0; i < pattern_len; i++) {
        pattern_char[(int)(pattern[i])] = 1;
    }

    // now search
    for (i = 0; i < text_len; i++) {

        if ((text_len - i) < pattern_len) {
            return NULL;
        }

        // Before comparing all characters we will first compare with
        // character at last failed index.
        if (last_failed_index >= 0) {

            if (pattern[last_failed_index] != text[i + last_failed_index]) {
                continue;
            }

        } else if (pattern[pi_44] != text[i + pi_44]) {

            last_failed_index = pi_44;

            // this character doesn't appear in pattern, so skip
            if (pattern_char[(int)(text[i + pi_44])] == 0) {
                i = i + pi_44;
            }

        } else if (pattern[pi_34] != text[i + pi_34]) {

            last_failed_index = pi_34;

            // this character doesn't appear in pattern, so skip
            if (pattern_char[(int)(text[i + pi_34])] == 0) {
                i = i + pi_34;
            }

        } else if (pattern[pi_24] != text[i + pi_24]) {

            last_failed_index = pi_24;

            // this character doesn't appear in pattern, so skip
            if (pattern_char[(int)(text[i + pi_24])] == 0) {
                i = i + pi_24;
            }

        } else if (pattern[pi_14] != text[i + pi_14]) {

            last_failed_index = pi_14;

            // this character doesn't appear in pattern, so skip
            if (pattern_char[(int)(text[i + pi_14])] == 0) {
                i = i + pi_14;
            }

        } else if (pattern[0] == text[i]) {

            end_index = i + pi_44;
            not_found = false;
            int index = 0;

            for (index = i; index <= end_index; index++) {
                if (text[index] != pattern[index - i]) {
                    last_failed_index = index - i;
                    not_found = true;
                    break;
                }
            } // end of inner for loop

            if (not_found == false) { // match is found
                return (text + i);
            } else if (pattern_char[(int)(text[index])] == 0) {
                i = index;
            }
        } // end of if-else if.. block

    } // end of outer for loop

    return NULL;

} // end of choudhary_string_search_algorithm

=====================================================================

Amit

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-06-13  4:44 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-13  4:43 [Code] Fastest String Search Algorithm Amit Choudhary

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).