public inbox for libc-hacker@sourceware.org
 help / color / mirror / Atom feed
From: Jakub Jelinek <jakub@redhat.com>
To: Ulrich Drepper <drepper@redhat.com>
Cc: Glibc hackers <libc-hacker@sources.redhat.com>,
	Pavel Baudys <pasky@suse.cz>
Subject: [PATCH] Handle really large number of files in glob_in_dir (BZ#3253)
Date: Mon, 25 Sep 2006 11:33:00 -0000	[thread overview]
Message-ID: <20060925113250.GX4556@sunsite.mff.cuni.cz> (raw)

Hi!

This is my proposed patch for the BZ#3253 issue.  Allocating a pointer pair
with alloca one at a time is overkill and on some arches really bad
(on some arches each alloca allocates quite a big fixed area in addition
to the requested size), on the other side we want at least some initial
allocations done with alloca for speed and also copying memory around
on realloc would be costly.  So, this patch uses a chain of (decreasingly
big) arrays of pointers, the last few of them allocated with alloca and
the really big ones with malloc.  The last array is even in a local
variable, so that the allocation code can be only in one place (the other
two need just one pointer entry).

2006-09-25  Jakub Jelinek  <jakub@redhat.com>

	[BZ #3253]
	* posix/glob.c (glob_in_dir): Don't alloca one struct globlink at a
	time, rather allocate increasingly bigger arrays of pointers, if
	possible with alloca, if too large with malloc.  Reported by
	Petr Baudys <pasky@suse.cz>.

--- libc/posix/glob.c.jj	2006-01-11 16:55:30.000000000 +0100
+++ libc/posix/glob.c	2006-09-25 13:06:07.000000000 +0200
@@ -1090,16 +1090,20 @@ glob_in_dir (const char *pattern, const 
 {
   size_t dirlen = strlen (directory);
   void *stream = NULL;
-  struct globlink
+  struct globnames
     {
-      struct globlink *next;
-      char *name;
+      struct globnames *next;
+      size_t count;
+      char *name[16];
     };
-  struct globlink *names = NULL;
-  size_t nfound;
+  struct globnames init_names;
+  struct globnames *names = &init_names, *names_alloca = &init_names;
+  size_t nfound = 0, allocasize = sizeof (init_names), cur = 0;
   int meta;
   int save;
 
+  init_names.next = NULL;
+  init_names.count = 16;
   meta = __glob_pattern_p (pattern, !(flags & GLOB_NOESCAPE));
   if (meta == 0 && (flags & (GLOB_NOCHECK|GLOB_NOMAGIC)))
     {
@@ -1107,7 +1111,6 @@ glob_in_dir (const char *pattern, const 
 	 characters and we must not return an error therefore the
 	 result will always contain exactly one name.  */
       flags |= GLOB_NOCHECK;
-      nfound = 0;
     }
   else if (meta == 0 &&
 	   ((flags & GLOB_NOESCAPE) || strchr (pattern, '\\') == NULL))
@@ -1128,8 +1131,6 @@ glob_in_dir (const char *pattern, const 
 	/* We found this file to be existing.  Now tell the rest
 	   of the function to copy this name into the result.  */
 	flags |= GLOB_NOCHECK;
-
-      nfound = 0;
     }
   else
     {
@@ -1137,12 +1138,10 @@ glob_in_dir (const char *pattern, const 
 	{
 	  /* This is a special case for matching directories like in
 	     "*a/".  */
-	  names = (struct globlink *) __alloca (sizeof (struct globlink));
-	  names->name = (char *) malloc (1);
-	  if (names->name == NULL)
+	  names->name[cur] = (char *) malloc (1);
+	  if (names->name[cur] == NULL)
 	    goto memory_error;
-	  names->name[0] = '\0';
-	  names->next = NULL;
+	  *names->name[cur++] = '\0';
 	  nfound = 1;
 	  meta = 0;
 	}
@@ -1157,7 +1156,6 @@ glob_in_dir (const char *pattern, const 
 		  && ((errfunc != NULL && (*errfunc) (directory, errno))
 		      || (flags & GLOB_ERR)))
 		return GLOB_ABORTED;
-	      nfound = 0;
 	      meta = 0;
 	    }
 	  else
@@ -1168,7 +1166,6 @@ glob_in_dir (const char *pattern, const 
 			       | FNM_CASEFOLD
 #endif
 			       );
-	      nfound = 0;
 	      flags |= GLOB_MAGCHAR;
 
 	      while (1)
@@ -1224,15 +1221,29 @@ glob_in_dir (const char *pattern, const 
 			  || link_exists_p (directory, dirlen, name, pglob,
 					    flags))
 			{
-			  struct globlink *new = (struct globlink *)
-			    __alloca (sizeof (struct globlink));
+			  if (cur == names->count)
+			    {
+			      struct globnames *newnames;
+			      size_t count = names->count * 2;
+			      size_t size = sizeof (struct globnames)
+					    + (count - 16) * sizeof (char *);
+			      allocasize += size;
+			      if (__libc_use_alloca (allocasize))
+				newnames = names_alloca = __alloca (size);
+			      else if ((newnames = malloc (size))
+				       == NULL)
+				goto memory_error;
+			      newnames->count = count;
+			      newnames->next = names;
+			      names = newnames;
+			      cur = 0;
+			    }
 			  len = NAMLEN (d);
-			  new->name = (char *) malloc (len + 1);
-			  if (new->name == NULL)
+			  names->name[cur] = (char *) malloc (len + 1);
+			  if (names->name[cur] == NULL)
 			    goto memory_error;
-			  *((char *) mempcpy (new->name, name, len)) = '\0';
-			  new->next = names;
-			  names = new;
+			  *((char *) mempcpy (names->name[cur++], name, len))
+			    = '\0';
 			  ++nfound;
 			}
 		    }
@@ -1245,12 +1256,10 @@ glob_in_dir (const char *pattern, const 
     {
       size_t len = strlen (pattern);
       nfound = 1;
-      names = (struct globlink *) __alloca (sizeof (struct globlink));
-      names->next = NULL;
-      names->name = (char *) malloc (len + 1);
-      if (names->name == NULL)
+      names->name[cur] = (char *) malloc (len + 1);
+      if (names->name[cur] == NULL)
 	goto memory_error;
-      *((char *) mempcpy (names->name, pattern, len)) = '\0';
+      *((char *) mempcpy (names->name[cur++], pattern, len)) = '\0';
     }
 
   if (nfound != 0)
@@ -1265,8 +1274,23 @@ glob_in_dir (const char *pattern, const 
 	goto memory_error;
       pglob->gl_pathv = new_gl_pathv;
 
-      for (; names != NULL; names = names->next)
-	pglob->gl_pathv[pglob->gl_offs + pglob->gl_pathc++] = names->name;
+      while (1)
+	{
+	  size_t i;
+	  struct globnames *old = names;
+	  for (i = 0; i < cur; ++i)
+	    pglob->gl_pathv[pglob->gl_offs + pglob->gl_pathc++]
+	      = names->name[i];
+	  names = names->next;
+	  if (names == NULL)
+	    break;
+	  cur = names->count;
+	  if (old == names_alloca)
+	    names_alloca = names;
+	  else
+	    free (old);
+	}
+
       pglob->gl_pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
 
       pglob->gl_flags = flags;
@@ -1293,11 +1317,20 @@ glob_in_dir (const char *pattern, const 
       closedir (stream);
     __set_errno (save);
   }
-  while (names != NULL)
+  while (1)
     {
-      if (names->name != NULL)
-	free (names->name);
+      size_t i;
+      struct globnames *old = names;
+      for (i = 0; i < cur; ++i)
+	free (names->name[i]);
       names = names->next;
+      if (names == NULL)
+	break;
+      cur = names->count;
+      if (old == names_alloca)
+	names_alloca = names;
+      else
+	free (old);
     }
   return GLOB_NOSPACE;
 }

	Jakub

                 reply	other threads:[~2006-09-25 11:33 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20060925113250.GX4556@sunsite.mff.cuni.cz \
    --to=jakub@redhat.com \
    --cc=drepper@redhat.com \
    --cc=libc-hacker@sources.redhat.com \
    --cc=pasky@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).