public inbox for glibc-bugs-regex@sourceware.org
help / color / mirror / Atom feed
* [Bug regex/544] New: Even unneeded OP_{OPEN,CLOSE}_SUBEXP nodes slow regexec down a lot
@ 2004-11-12 16:39 jakub at redhat dot com
2004-11-19 10:28 ` [Bug regex/544] " jakub at redhat dot com
0 siblings, 1 reply; 2+ messages in thread
From: jakub at redhat dot com @ 2004-11-12 16:39 UTC (permalink / raw)
To: glibc-bugs-regex
#include <fcntl.h>
#include <locale.h>
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <unistd.h>
static int
do_test (void)
{
static const char *pat[] = {
".?.?.?.?.?.?.?abcde",
"((((((((((.?))))))))))((((((((((.?))))))))))((((((((((.?))))))))))"
"((((((((((.?))))))))))((((((((((.?))))))))))((((((((((.?))))))))))"
"((((((((((.?))))))))))abcde" };
int fd = open ("../ChangeLog.14", O_RDONLY);
if (fd < 0)
{
printf ("Couldn't open ChangeLog.14: %m\n");
return 1;
}
struct stat64 st;
if (fstat64 (fd, &st) < 0)
{
printf ("Couldn't fstat ChangeLog.14: %m\n");
return 1;
}
char *buf = malloc (st.st_size + 1);
if (buf == NULL)
{
printf ("Couldn't allocate buffer: %m\n");
return 1;
}
if (read (fd, buf, st.st_size) != (ssize_t) st.st_size)
{
puts ("Couldn't read ChangeLog.14");
return 1;
}
close (fd);
buf[st.st_size] = '\0';
setlocale (LC_ALL, "de_DE.UTF-8");
for (int i = 0; i < sizeof (pat) / sizeof (pat[0]); ++i)
{
printf ("pattern %s", pat[i]);
regex_t rbuf;
int err = regcomp (&rbuf, pat[i], REG_EXTENDED | REG_NOSUB);
if (err != 0)
{
putchar ('\n');
char errstr[300];
regerror (err, &rbuf, errstr, sizeof (errstr));
puts (errstr);
return err;
}
struct timeval start, stop;
gettimeofday (&start, NULL);
err = regexec (&rbuf, buf, 0, NULL, 0);
if (err != REG_NOMATCH)
{
puts ("\nregexec unexpectedly matched");
return 1;
}
gettimeofday (&stop, NULL);
stop.tv_sec -= start.tv_sec;
if (stop.tv_usec < start.tv_usec)
{
stop.tv_sec--;
stop.tv_usec += 1000000 - start.tv_usec;
}
else
stop.tv_usec -= start.tv_usec;
printf (": %ld.%06lds\n", (long) stop.tv_sec, (long) stop.tv_usec);
regfree (&rbuf);
}
return 0;
}
#define TIMEOUT 10
#define TEST_FUNCTION do_test ()
#include "../test-skeleton.c"
There is no reason why the second regexec should be any slower than the first
one, yet on my box the second regexec is about 6 times slower than the first one.
I'll look into what can be done. I hope regcomp can kill those nodes from being
seen by regexec if they aren't needed for backreferences and REG_NOSUB, or if
they are nested with no intervening tokens in between (e.g. '((x))') and we can
tell the final match computation that say match 0 is identical to match 1.
--
Summary: Even unneeded OP_{OPEN,CLOSE}_SUBEXP nodes slow regexec
down a lot
Product: glibc
Version: unspecified
Status: NEW
Severity: normal
Priority: P2
Component: regex
AssignedTo: gotom at debian dot or dot jp
ReportedBy: jakub at redhat dot com
CC: glibc-bugs-regex at sources dot redhat dot com,glibc-
bugs at sources dot redhat dot com
http://sources.redhat.com/bugzilla/show_bug.cgi?id=544
------- You are receiving this mail because: -------
You are on the CC list for the bug, or are watching someone who is.
^ permalink raw reply [flat|nested] 2+ messages in thread
* [Bug regex/544] Even unneeded OP_{OPEN,CLOSE}_SUBEXP nodes slow regexec down a lot
2004-11-12 16:39 [Bug regex/544] New: Even unneeded OP_{OPEN,CLOSE}_SUBEXP nodes slow regexec down a lot jakub at redhat dot com
@ 2004-11-19 10:28 ` jakub at redhat dot com
0 siblings, 0 replies; 2+ messages in thread
From: jakub at redhat dot com @ 2004-11-19 10:28 UTC (permalink / raw)
To: glibc-bugs-regex
------- Additional Comments From jakub at redhat dot com 2004-11-19 10:28 -------
This should be fixed in CVS now.
--
What |Removed |Added
----------------------------------------------------------------------------
Status|NEW |RESOLVED
Resolution| |FIXED
http://sources.redhat.com/bugzilla/show_bug.cgi?id=544
------- You are receiving this mail because: -------
You are on the CC list for the bug, or are watching someone who is.
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2004-11-19 10:28 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-11-12 16:39 [Bug regex/544] New: Even unneeded OP_{OPEN,CLOSE}_SUBEXP nodes slow regexec down a lot jakub at redhat dot com
2004-11-19 10:28 ` [Bug regex/544] " jakub at redhat dot com
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).