public inbox for libc-hacker@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] Handle really large number of files in glob_in_dir (BZ#3253)
@ 2006-09-25 11:33 Jakub Jelinek
  0 siblings, 0 replies; only message in thread
From: Jakub Jelinek @ 2006-09-25 11:33 UTC (permalink / raw)
  To: Ulrich Drepper; +Cc: Glibc hackers, Pavel Baudys

Hi!

This is my proposed patch for the BZ#3253 issue.  Allocating a pointer pair
with alloca one at a time is overkill and on some arches really bad
(on some arches each alloca allocates quite a big fixed area in addition
to the requested size), on the other side we want at least some initial
allocations done with alloca for speed and also copying memory around
on realloc would be costly.  So, this patch uses a chain of (decreasingly
big) arrays of pointers, the last few of them allocated with alloca and
the really big ones with malloc.  The last array is even in a local
variable, so that the allocation code can be only in one place (the other
two need just one pointer entry).

2006-09-25  Jakub Jelinek  <jakub@redhat.com>

	[BZ #3253]
	* posix/glob.c (glob_in_dir): Don't alloca one struct globlink at a
	time, rather allocate increasingly bigger arrays of pointers, if
	possible with alloca, if too large with malloc.  Reported by
	Petr Baudys <pasky@suse.cz>.

--- libc/posix/glob.c.jj	2006-01-11 16:55:30.000000000 +0100
+++ libc/posix/glob.c	2006-09-25 13:06:07.000000000 +0200
@@ -1090,16 +1090,20 @@ glob_in_dir (const char *pattern, const 
 {
   size_t dirlen = strlen (directory);
   void *stream = NULL;
-  struct globlink
+  struct globnames
     {
-      struct globlink *next;
-      char *name;
+      struct globnames *next;
+      size_t count;
+      char *name[16];
     };
-  struct globlink *names = NULL;
-  size_t nfound;
+  struct globnames init_names;
+  struct globnames *names = &init_names, *names_alloca = &init_names;
+  size_t nfound = 0, allocasize = sizeof (init_names), cur = 0;
   int meta;
   int save;
 
+  init_names.next = NULL;
+  init_names.count = 16;
   meta = __glob_pattern_p (pattern, !(flags & GLOB_NOESCAPE));
   if (meta == 0 && (flags & (GLOB_NOCHECK|GLOB_NOMAGIC)))
     {
@@ -1107,7 +1111,6 @@ glob_in_dir (const char *pattern, const 
 	 characters and we must not return an error therefore the
 	 result will always contain exactly one name.  */
       flags |= GLOB_NOCHECK;
-      nfound = 0;
     }
   else if (meta == 0 &&
 	   ((flags & GLOB_NOESCAPE) || strchr (pattern, '\\') == NULL))
@@ -1128,8 +1131,6 @@ glob_in_dir (const char *pattern, const 
 	/* We found this file to be existing.  Now tell the rest
 	   of the function to copy this name into the result.  */
 	flags |= GLOB_NOCHECK;
-
-      nfound = 0;
     }
   else
     {
@@ -1137,12 +1138,10 @@ glob_in_dir (const char *pattern, const 
 	{
 	  /* This is a special case for matching directories like in
 	     "*a/".  */
-	  names = (struct globlink *) __alloca (sizeof (struct globlink));
-	  names->name = (char *) malloc (1);
-	  if (names->name == NULL)
+	  names->name[cur] = (char *) malloc (1);
+	  if (names->name[cur] == NULL)
 	    goto memory_error;
-	  names->name[0] = '\0';
-	  names->next = NULL;
+	  *names->name[cur++] = '\0';
 	  nfound = 1;
 	  meta = 0;
 	}
@@ -1157,7 +1156,6 @@ glob_in_dir (const char *pattern, const 
 		  && ((errfunc != NULL && (*errfunc) (directory, errno))
 		      || (flags & GLOB_ERR)))
 		return GLOB_ABORTED;
-	      nfound = 0;
 	      meta = 0;
 	    }
 	  else
@@ -1168,7 +1166,6 @@ glob_in_dir (const char *pattern, const 
 			       | FNM_CASEFOLD
 #endif
 			       );
-	      nfound = 0;
 	      flags |= GLOB_MAGCHAR;
 
 	      while (1)
@@ -1224,15 +1221,29 @@ glob_in_dir (const char *pattern, const 
 			  || link_exists_p (directory, dirlen, name, pglob,
 					    flags))
 			{
-			  struct globlink *new = (struct globlink *)
-			    __alloca (sizeof (struct globlink));
+			  if (cur == names->count)
+			    {
+			      struct globnames *newnames;
+			      size_t count = names->count * 2;
+			      size_t size = sizeof (struct globnames)
+					    + (count - 16) * sizeof (char *);
+			      allocasize += size;
+			      if (__libc_use_alloca (allocasize))
+				newnames = names_alloca = __alloca (size);
+			      else if ((newnames = malloc (size))
+				       == NULL)
+				goto memory_error;
+			      newnames->count = count;
+			      newnames->next = names;
+			      names = newnames;
+			      cur = 0;
+			    }
 			  len = NAMLEN (d);
-			  new->name = (char *) malloc (len + 1);
-			  if (new->name == NULL)
+			  names->name[cur] = (char *) malloc (len + 1);
+			  if (names->name[cur] == NULL)
 			    goto memory_error;
-			  *((char *) mempcpy (new->name, name, len)) = '\0';
-			  new->next = names;
-			  names = new;
+			  *((char *) mempcpy (names->name[cur++], name, len))
+			    = '\0';
 			  ++nfound;
 			}
 		    }
@@ -1245,12 +1256,10 @@ glob_in_dir (const char *pattern, const 
     {
       size_t len = strlen (pattern);
       nfound = 1;
-      names = (struct globlink *) __alloca (sizeof (struct globlink));
-      names->next = NULL;
-      names->name = (char *) malloc (len + 1);
-      if (names->name == NULL)
+      names->name[cur] = (char *) malloc (len + 1);
+      if (names->name[cur] == NULL)
 	goto memory_error;
-      *((char *) mempcpy (names->name, pattern, len)) = '\0';
+      *((char *) mempcpy (names->name[cur++], pattern, len)) = '\0';
     }
 
   if (nfound != 0)
@@ -1265,8 +1274,23 @@ glob_in_dir (const char *pattern, const 
 	goto memory_error;
       pglob->gl_pathv = new_gl_pathv;
 
-      for (; names != NULL; names = names->next)
-	pglob->gl_pathv[pglob->gl_offs + pglob->gl_pathc++] = names->name;
+      while (1)
+	{
+	  size_t i;
+	  struct globnames *old = names;
+	  for (i = 0; i < cur; ++i)
+	    pglob->gl_pathv[pglob->gl_offs + pglob->gl_pathc++]
+	      = names->name[i];
+	  names = names->next;
+	  if (names == NULL)
+	    break;
+	  cur = names->count;
+	  if (old == names_alloca)
+	    names_alloca = names;
+	  else
+	    free (old);
+	}
+
       pglob->gl_pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
 
       pglob->gl_flags = flags;
@@ -1293,11 +1317,20 @@ glob_in_dir (const char *pattern, const 
       closedir (stream);
     __set_errno (save);
   }
-  while (names != NULL)
+  while (1)
     {
-      if (names->name != NULL)
-	free (names->name);
+      size_t i;
+      struct globnames *old = names;
+      for (i = 0; i < cur; ++i)
+	free (names->name[i]);
       names = names->next;
+      if (names == NULL)
+	break;
+      cur = names->count;
+      if (old == names_alloca)
+	names_alloca = names;
+      else
+	free (old);
     }
   return GLOB_NOSPACE;
 }

	Jakub

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2006-09-25 11:33 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-09-25 11:33 [PATCH] Handle really large number of files in glob_in_dir (BZ#3253) Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).