* [PATCH] Handle really large number of files in glob_in_dir (BZ#3253)
@ 2006-09-25 11:33 Jakub Jelinek
0 siblings, 0 replies; only message in thread
From: Jakub Jelinek @ 2006-09-25 11:33 UTC (permalink / raw)
To: Ulrich Drepper; +Cc: Glibc hackers, Pavel Baudys
Hi!
This is my proposed patch for the BZ#3253 issue. Allocating a pointer pair
with alloca one at a time is overkill and on some arches really bad
(on some arches each alloca allocates quite a big fixed area in addition
to the requested size), on the other side we want at least some initial
allocations done with alloca for speed and also copying memory around
on realloc would be costly. So, this patch uses a chain of (decreasingly
big) arrays of pointers, the last few of them allocated with alloca and
the really big ones with malloc. The last array is even in a local
variable, so that the allocation code can be only in one place (the other
two need just one pointer entry).
2006-09-25 Jakub Jelinek <jakub@redhat.com>
[BZ #3253]
* posix/glob.c (glob_in_dir): Don't alloca one struct globlink at a
time, rather allocate increasingly bigger arrays of pointers, if
possible with alloca, if too large with malloc. Reported by
Petr Baudys <pasky@suse.cz>.
--- libc/posix/glob.c.jj 2006-01-11 16:55:30.000000000 +0100
+++ libc/posix/glob.c 2006-09-25 13:06:07.000000000 +0200
@@ -1090,16 +1090,20 @@ glob_in_dir (const char *pattern, const
{
size_t dirlen = strlen (directory);
void *stream = NULL;
- struct globlink
+ struct globnames
{
- struct globlink *next;
- char *name;
+ struct globnames *next;
+ size_t count;
+ char *name[16];
};
- struct globlink *names = NULL;
- size_t nfound;
+ struct globnames init_names;
+ struct globnames *names = &init_names, *names_alloca = &init_names;
+ size_t nfound = 0, allocasize = sizeof (init_names), cur = 0;
int meta;
int save;
+ init_names.next = NULL;
+ init_names.count = 16;
meta = __glob_pattern_p (pattern, !(flags & GLOB_NOESCAPE));
if (meta == 0 && (flags & (GLOB_NOCHECK|GLOB_NOMAGIC)))
{
@@ -1107,7 +1111,6 @@ glob_in_dir (const char *pattern, const
characters and we must not return an error therefore the
result will always contain exactly one name. */
flags |= GLOB_NOCHECK;
- nfound = 0;
}
else if (meta == 0 &&
((flags & GLOB_NOESCAPE) || strchr (pattern, '\\') == NULL))
@@ -1128,8 +1131,6 @@ glob_in_dir (const char *pattern, const
/* We found this file to be existing. Now tell the rest
of the function to copy this name into the result. */
flags |= GLOB_NOCHECK;
-
- nfound = 0;
}
else
{
@@ -1137,12 +1138,10 @@ glob_in_dir (const char *pattern, const
{
/* This is a special case for matching directories like in
"*a/". */
- names = (struct globlink *) __alloca (sizeof (struct globlink));
- names->name = (char *) malloc (1);
- if (names->name == NULL)
+ names->name[cur] = (char *) malloc (1);
+ if (names->name[cur] == NULL)
goto memory_error;
- names->name[0] = '\0';
- names->next = NULL;
+ *names->name[cur++] = '\0';
nfound = 1;
meta = 0;
}
@@ -1157,7 +1156,6 @@ glob_in_dir (const char *pattern, const
&& ((errfunc != NULL && (*errfunc) (directory, errno))
|| (flags & GLOB_ERR)))
return GLOB_ABORTED;
- nfound = 0;
meta = 0;
}
else
@@ -1168,7 +1166,6 @@ glob_in_dir (const char *pattern, const
| FNM_CASEFOLD
#endif
);
- nfound = 0;
flags |= GLOB_MAGCHAR;
while (1)
@@ -1224,15 +1221,29 @@ glob_in_dir (const char *pattern, const
|| link_exists_p (directory, dirlen, name, pglob,
flags))
{
- struct globlink *new = (struct globlink *)
- __alloca (sizeof (struct globlink));
+ if (cur == names->count)
+ {
+ struct globnames *newnames;
+ size_t count = names->count * 2;
+ size_t size = sizeof (struct globnames)
+ + (count - 16) * sizeof (char *);
+ allocasize += size;
+ if (__libc_use_alloca (allocasize))
+ newnames = names_alloca = __alloca (size);
+ else if ((newnames = malloc (size))
+ == NULL)
+ goto memory_error;
+ newnames->count = count;
+ newnames->next = names;
+ names = newnames;
+ cur = 0;
+ }
len = NAMLEN (d);
- new->name = (char *) malloc (len + 1);
- if (new->name == NULL)
+ names->name[cur] = (char *) malloc (len + 1);
+ if (names->name[cur] == NULL)
goto memory_error;
- *((char *) mempcpy (new->name, name, len)) = '\0';
- new->next = names;
- names = new;
+ *((char *) mempcpy (names->name[cur++], name, len))
+ = '\0';
++nfound;
}
}
@@ -1245,12 +1256,10 @@ glob_in_dir (const char *pattern, const
{
size_t len = strlen (pattern);
nfound = 1;
- names = (struct globlink *) __alloca (sizeof (struct globlink));
- names->next = NULL;
- names->name = (char *) malloc (len + 1);
- if (names->name == NULL)
+ names->name[cur] = (char *) malloc (len + 1);
+ if (names->name[cur] == NULL)
goto memory_error;
- *((char *) mempcpy (names->name, pattern, len)) = '\0';
+ *((char *) mempcpy (names->name[cur++], pattern, len)) = '\0';
}
if (nfound != 0)
@@ -1265,8 +1274,23 @@ glob_in_dir (const char *pattern, const
goto memory_error;
pglob->gl_pathv = new_gl_pathv;
- for (; names != NULL; names = names->next)
- pglob->gl_pathv[pglob->gl_offs + pglob->gl_pathc++] = names->name;
+ while (1)
+ {
+ size_t i;
+ struct globnames *old = names;
+ for (i = 0; i < cur; ++i)
+ pglob->gl_pathv[pglob->gl_offs + pglob->gl_pathc++]
+ = names->name[i];
+ names = names->next;
+ if (names == NULL)
+ break;
+ cur = names->count;
+ if (old == names_alloca)
+ names_alloca = names;
+ else
+ free (old);
+ }
+
pglob->gl_pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
pglob->gl_flags = flags;
@@ -1293,11 +1317,20 @@ glob_in_dir (const char *pattern, const
closedir (stream);
__set_errno (save);
}
- while (names != NULL)
+ while (1)
{
- if (names->name != NULL)
- free (names->name);
+ size_t i;
+ struct globnames *old = names;
+ for (i = 0; i < cur; ++i)
+ free (names->name[i]);
names = names->next;
+ if (names == NULL)
+ break;
+ cur = names->count;
+ if (old == names_alloca)
+ names_alloca = names;
+ else
+ free (old);
}
return GLOB_NOSPACE;
}
Jakub
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2006-09-25 11:33 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-09-25 11:33 [PATCH] Handle really large number of files in glob_in_dir (BZ#3253) Jakub Jelinek
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).