From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 23322 invoked by alias); 25 Sep 2006 11:33:09 -0000 Received: (qmail 23306 invoked by uid 22791); 25 Sep 2006 11:33:08 -0000 X-Spam-Check-By: sourceware.org Received: from sunsite.ms.mff.cuni.cz (HELO sunsite.mff.cuni.cz) (195.113.15.26) by sourceware.org (qpsmtpd/0.31) with ESMTP; Mon, 25 Sep 2006 11:33:01 +0000 Received: from sunsite.mff.cuni.cz (sunsite.mff.cuni.cz [127.0.0.1]) by sunsite.mff.cuni.cz (8.13.1/8.13.1) with ESMTP id k8PBWp7Q027124; Mon, 25 Sep 2006 13:32:51 +0200 Received: (from jj@localhost) by sunsite.mff.cuni.cz (8.13.1/8.13.1/Submit) id k8PBWoEV027123; Mon, 25 Sep 2006 13:32:50 +0200 Date: Mon, 25 Sep 2006 11:33:00 -0000 From: Jakub Jelinek To: Ulrich Drepper Cc: Glibc hackers , Pavel Baudys Subject: [PATCH] Handle really large number of files in glob_in_dir (BZ#3253) Message-ID: <20060925113250.GX4556@sunsite.mff.cuni.cz> Reply-To: Jakub Jelinek Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.4.1i Mailing-List: contact libc-hacker-help@sourceware.org; run by ezmlm Precedence: bulk List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-hacker-owner@sourceware.org X-SW-Source: 2006-09/txt/msg00050.txt.bz2 Hi! This is my proposed patch for the BZ#3253 issue. Allocating a pointer pair with alloca one at a time is overkill and on some arches really bad (on some arches each alloca allocates quite a big fixed area in addition to the requested size), on the other side we want at least some initial allocations done with alloca for speed and also copying memory around on realloc would be costly. So, this patch uses a chain of (decreasingly big) arrays of pointers, the last few of them allocated with alloca and the really big ones with malloc. The last array is even in a local variable, so that the allocation code can be only in one place (the other two need just one pointer entry). 2006-09-25 Jakub Jelinek [BZ #3253] * posix/glob.c (glob_in_dir): Don't alloca one struct globlink at a time, rather allocate increasingly bigger arrays of pointers, if possible with alloca, if too large with malloc. Reported by Petr Baudys . --- libc/posix/glob.c.jj 2006-01-11 16:55:30.000000000 +0100 +++ libc/posix/glob.c 2006-09-25 13:06:07.000000000 +0200 @@ -1090,16 +1090,20 @@ glob_in_dir (const char *pattern, const { size_t dirlen = strlen (directory); void *stream = NULL; - struct globlink + struct globnames { - struct globlink *next; - char *name; + struct globnames *next; + size_t count; + char *name[16]; }; - struct globlink *names = NULL; - size_t nfound; + struct globnames init_names; + struct globnames *names = &init_names, *names_alloca = &init_names; + size_t nfound = 0, allocasize = sizeof (init_names), cur = 0; int meta; int save; + init_names.next = NULL; + init_names.count = 16; meta = __glob_pattern_p (pattern, !(flags & GLOB_NOESCAPE)); if (meta == 0 && (flags & (GLOB_NOCHECK|GLOB_NOMAGIC))) { @@ -1107,7 +1111,6 @@ glob_in_dir (const char *pattern, const characters and we must not return an error therefore the result will always contain exactly one name. */ flags |= GLOB_NOCHECK; - nfound = 0; } else if (meta == 0 && ((flags & GLOB_NOESCAPE) || strchr (pattern, '\\') == NULL)) @@ -1128,8 +1131,6 @@ glob_in_dir (const char *pattern, const /* We found this file to be existing. Now tell the rest of the function to copy this name into the result. */ flags |= GLOB_NOCHECK; - - nfound = 0; } else { @@ -1137,12 +1138,10 @@ glob_in_dir (const char *pattern, const { /* This is a special case for matching directories like in "*a/". */ - names = (struct globlink *) __alloca (sizeof (struct globlink)); - names->name = (char *) malloc (1); - if (names->name == NULL) + names->name[cur] = (char *) malloc (1); + if (names->name[cur] == NULL) goto memory_error; - names->name[0] = '\0'; - names->next = NULL; + *names->name[cur++] = '\0'; nfound = 1; meta = 0; } @@ -1157,7 +1156,6 @@ glob_in_dir (const char *pattern, const && ((errfunc != NULL && (*errfunc) (directory, errno)) || (flags & GLOB_ERR))) return GLOB_ABORTED; - nfound = 0; meta = 0; } else @@ -1168,7 +1166,6 @@ glob_in_dir (const char *pattern, const | FNM_CASEFOLD #endif ); - nfound = 0; flags |= GLOB_MAGCHAR; while (1) @@ -1224,15 +1221,29 @@ glob_in_dir (const char *pattern, const || link_exists_p (directory, dirlen, name, pglob, flags)) { - struct globlink *new = (struct globlink *) - __alloca (sizeof (struct globlink)); + if (cur == names->count) + { + struct globnames *newnames; + size_t count = names->count * 2; + size_t size = sizeof (struct globnames) + + (count - 16) * sizeof (char *); + allocasize += size; + if (__libc_use_alloca (allocasize)) + newnames = names_alloca = __alloca (size); + else if ((newnames = malloc (size)) + == NULL) + goto memory_error; + newnames->count = count; + newnames->next = names; + names = newnames; + cur = 0; + } len = NAMLEN (d); - new->name = (char *) malloc (len + 1); - if (new->name == NULL) + names->name[cur] = (char *) malloc (len + 1); + if (names->name[cur] == NULL) goto memory_error; - *((char *) mempcpy (new->name, name, len)) = '\0'; - new->next = names; - names = new; + *((char *) mempcpy (names->name[cur++], name, len)) + = '\0'; ++nfound; } } @@ -1245,12 +1256,10 @@ glob_in_dir (const char *pattern, const { size_t len = strlen (pattern); nfound = 1; - names = (struct globlink *) __alloca (sizeof (struct globlink)); - names->next = NULL; - names->name = (char *) malloc (len + 1); - if (names->name == NULL) + names->name[cur] = (char *) malloc (len + 1); + if (names->name[cur] == NULL) goto memory_error; - *((char *) mempcpy (names->name, pattern, len)) = '\0'; + *((char *) mempcpy (names->name[cur++], pattern, len)) = '\0'; } if (nfound != 0) @@ -1265,8 +1274,23 @@ glob_in_dir (const char *pattern, const goto memory_error; pglob->gl_pathv = new_gl_pathv; - for (; names != NULL; names = names->next) - pglob->gl_pathv[pglob->gl_offs + pglob->gl_pathc++] = names->name; + while (1) + { + size_t i; + struct globnames *old = names; + for (i = 0; i < cur; ++i) + pglob->gl_pathv[pglob->gl_offs + pglob->gl_pathc++] + = names->name[i]; + names = names->next; + if (names == NULL) + break; + cur = names->count; + if (old == names_alloca) + names_alloca = names; + else + free (old); + } + pglob->gl_pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; pglob->gl_flags = flags; @@ -1293,11 +1317,20 @@ glob_in_dir (const char *pattern, const closedir (stream); __set_errno (save); } - while (names != NULL) + while (1) { - if (names->name != NULL) - free (names->name); + size_t i; + struct globnames *old = names; + for (i = 0; i < cur; ++i) + free (names->name[i]); names = names->next; + if (names == NULL) + break; + cur = names->count; + if (old == names_alloca) + names_alloca = names; + else + free (old); } return GLOB_NOSPACE; } Jakub