public inbox for libc-hacker@sourceware.org
 help / color / mirror / Atom feed
From: Jakub Jelinek <jakub@redhat.com>
To: Ulrich Drepper <drepper@redhat.com>, Roland McGrath <roland@redhat.com>
Cc: Glibc hackers <libc-hacker@sources.redhat.com>
Subject: [PATCH] Fix regex memory handling
Date: Wed, 19 Nov 2003 16:56:00 -0000	[thread overview]
Message-ID: <20031119132031.GL12344@sunsite.ms.mff.cuni.cz> (raw)

Hi!

regcomp.c had a lot of places where it could either leak memory or
double free.  This patch tries to fix this.  In addition to this
it reduces (in my build) regex.os's .text section by 3.3KB (from 71KB)
and reduces number of malloc/free/realloc calls in tst-rxspencer
(wc -l tst-rxspencer.mtrace) from 98080 to 83305.

2003-11-19  Jakub Jelinek  <jakub@redhat.com>

	* posix/regcomp.c (free_bin_tree): Removed.
	(create_tree): Add dfa argument.  Don't call re_malloc for
	each tree, instead allocate from str_tree_storage.
	(re_dfa_add_tree_node): New function.
	(free_dfa_content): Handle freeing if dfa->nodes == NULL
	or dfa->state_table == NULL.
	(re_compile_internal): Call free_dfa_content if init_dfa
	fails.  Call free_workarea_compile, re_string_destruct
	and free_dfa_content for most of the other failure paths.
	(init_dfa): Initialize str_tree_storage_idx.
	Don't clear any fields on allocation failure.
	(free_workarea_compile): Free str_tree_storage chunks
	instead of free_bin_tree (dfa->str_tree).
	(parse): Call re_dfa_add_tree_node instead of re_dfa_add_node
	followed by create_tree.  Add dfa argument to remaining
	create_tree calls.  Remove new_idx variable.  Remove calls
	to free_bin_tree.
	(parse_reg_exp, parse_branch, parse_expression, parse_sub_exp,
	parse_dup_op, parse_bracket_exp, build_charclass_op): Likewise.
	(duplicate_tree): Remove calls to free_bin_tree, add dfa
	argument to create_tree.
	* posix/regex_internal.h (BIN_TREE_STORAGE_SIZE): Define.
	(bin_tree_storage_t): New type.
	(re_dfa_t): Add str_tree_storage and str_tree_storage_idx
	fields.
	* posix/Makefile (tests): Add bug-regex21.
	(generated): Add bug-regex21-mem, bug-regex21.mtrace,
	tst-rxspencer-mem and tst-rxspencer.mtrace.
	(tests): Depend on $(objpfx)bug-regex21-mem
	and $(objpfx)tst-rxspencer-mem.
	(bug-regex21-ENV, tst-rxspencer-ENV): Set.
	($(objpfx)bug-regex21-mem, $(objpfx)tst-rxspencer-mem): New.
	* posix/tst-rxspencer.c (main): Add call to mtrace.
	Free line at the end.
	* posix/bug-regex21.c: New test.

--- libc/posix/regcomp.c.jj	2003-11-19 10:24:36.000000000 +0100
+++ libc/posix/regcomp.c	2003-11-19 15:51:59.000000000 +0100
@@ -126,9 +126,13 @@ static bin_tree_t *build_charclass_op (r
 				       const unsigned char *class_name,
 				       const unsigned char *extra, int not,
 				       reg_errcode_t *err);
-static void free_bin_tree (bin_tree_t *tree);
-static bin_tree_t *create_tree (bin_tree_t *left, bin_tree_t *right,
+static bin_tree_t *create_tree (re_dfa_t *dfa,
+				bin_tree_t *left, bin_tree_t *right,
 				re_token_type_t type, int index);
+static bin_tree_t *re_dfa_add_tree_node (re_dfa_t *dfa,
+					 bin_tree_t *left, bin_tree_t *right,
+					 re_token_t)
+  __attribute ((noinline));
 static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
 \f
 /* This table gives an error message for each of the error codes listed
@@ -562,17 +566,18 @@ free_dfa_content (re_dfa_t *dfa)
 
   re_free (dfa->subexps);
 
-  for (i = 0; i < dfa->nodes_len; ++i)
-    {
-      re_token_t *node = dfa->nodes + i;
+  if (dfa->nodes)
+    for (i = 0; i < dfa->nodes_len; ++i)
+      {
+	re_token_t *node = dfa->nodes + i;
 #ifdef RE_ENABLE_I18N
-      if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
-	free_charset (node->opr.mbcset);
-      else
+	if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
+	  free_charset (node->opr.mbcset);
+	else
 #endif /* RE_ENABLE_I18N */
-	if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
-	  re_free (node->opr.sbcset);
-    }
+	  if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
+	    re_free (node->opr.sbcset);
+      }
   re_free (dfa->nexts);
   for (i = 0; i < dfa->nodes_len; ++i)
     {
@@ -588,20 +593,19 @@ free_dfa_content (re_dfa_t *dfa)
   re_free (dfa->inveclosures);
   re_free (dfa->nodes);
 
-  for (i = 0; i <= dfa->state_hash_mask; ++i)
-    {
-      struct re_state_table_entry *entry = dfa->state_table + i;
-      for (j = 0; j < entry->num; ++j)
-	{
-	  re_dfastate_t *state = entry->array[j];
-	  free_state (state);
-	}
-      re_free (entry->array);
-    }
+  if (dfa->state_table)
+    for (i = 0; i <= dfa->state_hash_mask; ++i)
+      {
+	struct re_state_table_entry *entry = dfa->state_table + i;
+	for (j = 0; j < entry->num; ++j)
+	  {
+	    re_dfastate_t *state = entry->array[j];
+	    free_state (state);
+	  }
+        re_free (entry->array);
+      }
   re_free (dfa->state_table);
-
-  if (dfa->word_char != NULL)
-    re_free (dfa->word_char);
+  re_free (dfa->word_char);
 #ifdef DEBUG
   re_free (dfa->re_str);
 #endif
@@ -738,7 +742,7 @@ re_compile_internal (preg, pattern, leng
   err = init_dfa (dfa, length);
   if (BE (err != REG_NOERROR, 0))
     {
-      re_free (dfa);
+      free_dfa_content (dfa);
       preg->buffer = NULL;
       preg->allocated = 0;
       return err;
@@ -752,7 +756,10 @@ re_compile_internal (preg, pattern, leng
 			     syntax & RE_ICASE, dfa);
   if (BE (err != REG_NOERROR, 0))
     {
-      re_free (dfa);
+    re_compile_internal_free_return:
+      free_workarea_compile (preg);
+      re_string_destruct (&regexp);
+      free_dfa_content (dfa);
       preg->buffer = NULL;
       preg->allocated = 0;
       return err;
@@ -785,7 +792,6 @@ re_compile_internal (preg, pattern, leng
 
   if (BE (err != REG_NOERROR, 0))
     {
-    re_compile_internal_free_return:
       free_dfa_content (dfa);
       preg->buffer = NULL;
       preg->allocated = 0;
@@ -806,6 +812,9 @@ init_dfa (dfa, pat_len)
 
   memset (dfa, '\0', sizeof (re_dfa_t));
 
+  /* Force allocation of str_tree_storage the first time.  */
+  dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
+
   dfa->nodes_alloc = pat_len + 1;
   dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
 
@@ -834,14 +843,7 @@ init_dfa (dfa, pat_len)
 
   if (BE (dfa->nodes == NULL || dfa->state_table == NULL
 	  || dfa->subexps == NULL, 0))
-    {
-      /* We don't bother to free anything which was allocated.  Very
-	 soon the process will go down anyway.  */
-      dfa->subexps = NULL;
-      dfa->state_table = NULL;
-      dfa->nodes = NULL;
-      return REG_ESPACE;
-    }
+    return REG_ESPACE;
   return REG_NOERROR;
 }
 
@@ -871,7 +873,14 @@ free_workarea_compile (preg)
      regex_t *preg;
 {
   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
-  free_bin_tree (dfa->str_tree);
+  bin_tree_storage_t *storage, *next;
+  for (storage = dfa->str_tree_storage; storage; storage = next)
+    {
+      next = storage->next;
+      re_free (storage);
+    }
+  dfa->str_tree_storage = NULL;
+  dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
   dfa->str_tree = NULL;
   re_free (dfa->org_indices);
   dfa->org_indices = NULL;
@@ -1918,18 +1927,16 @@ parse (regexp, preg, syntax, err)
   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
   bin_tree_t *tree, *eor, *root;
   re_token_t current_token;
-  int new_idx;
   current_token = fetch_token (regexp, syntax | RE_CARET_ANCHORS_HERE);
   tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
   if (BE (*err != REG_NOERROR && tree == NULL, 0))
     return NULL;
-  new_idx = re_dfa_add_node (dfa, current_token, 0);
-  eor = create_tree (NULL, NULL, 0, new_idx);
+  eor = re_dfa_add_tree_node (dfa, NULL, NULL, current_token);
   if (tree != NULL)
-    root = create_tree (tree, eor, CONCAT, 0);
+    root = create_tree (dfa, tree, eor, CONCAT, 0);
   else
     root = eor;
-  if (BE (new_idx == -1 || eor == NULL || root == NULL, 0))
+  if (BE (eor == NULL || root == NULL, 0))
     {
       *err = REG_ESPACE;
       return NULL;
@@ -1957,7 +1964,6 @@ parse_reg_exp (regexp, preg, token, synt
 {
   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
   bin_tree_t *tree, *branch = NULL;
-  int new_idx;
   tree = parse_branch (regexp, preg, token, syntax, nest, err);
   if (BE (*err != REG_NOERROR && tree == NULL, 0))
     return NULL;
@@ -1965,22 +1971,18 @@ parse_reg_exp (regexp, preg, token, synt
   while (token->type == OP_ALT)
     {
       re_token_t alt_token = *token;
-      new_idx = re_dfa_add_node (dfa, alt_token, 0);
       *token = fetch_token (regexp, syntax | RE_CARET_ANCHORS_HERE);
       if (token->type != OP_ALT && token->type != END_OF_RE
 	  && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
 	{
 	  branch = parse_branch (regexp, preg, token, syntax, nest, err);
 	  if (BE (*err != REG_NOERROR && branch == NULL, 0))
-	    {
-	      free_bin_tree (tree);
-	      return NULL;
-	    }
+	    return NULL;
 	}
       else
 	branch = NULL;
-      tree = create_tree (tree, branch, 0, new_idx);
-      if (BE (new_idx == -1 || tree == NULL, 0))
+      tree = re_dfa_add_tree_node (dfa, tree, branch, alt_token);
+      if (BE (tree == NULL, 0))
 	{
 	  *err = REG_ESPACE;
 	  return NULL;
@@ -2009,6 +2011,7 @@ parse_branch (regexp, preg, token, synta
      reg_errcode_t *err;
 {
   bin_tree_t *tree, *exp;
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
   tree = parse_expression (regexp, preg, token, syntax, nest, err);
   if (BE (*err != REG_NOERROR && tree == NULL, 0))
     return NULL;
@@ -2019,12 +2022,11 @@ parse_branch (regexp, preg, token, synta
       exp = parse_expression (regexp, preg, token, syntax, nest, err);
       if (BE (*err != REG_NOERROR && exp == NULL, 0))
 	{
-	  free_bin_tree (tree);
 	  return NULL;
 	}
       if (tree != NULL && exp != NULL)
 	{
-	  tree = create_tree (tree, exp, CONCAT, 0);
+	  tree = create_tree (dfa, tree, exp, CONCAT, 0);
 	  if (tree == NULL)
 	    {
 	      *err = REG_ESPACE;
@@ -2055,13 +2057,11 @@ parse_expression (regexp, preg, token, s
 {
   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
   bin_tree_t *tree;
-  int new_idx;
   switch (token->type)
     {
     case CHARACTER:
-      new_idx = re_dfa_add_node (dfa, *token, 0);
-      tree = create_tree (NULL, NULL, 0, new_idx);
-      if (BE (new_idx == -1 || tree == NULL, 0))
+      tree = re_dfa_add_tree_node (dfa, NULL, NULL, *token);
+      if (BE (tree == NULL, 0))
 	{
 	  *err = REG_ESPACE;
 	  return NULL;
@@ -2074,10 +2074,9 @@ parse_expression (regexp, preg, token, s
 	    {
 	      bin_tree_t *mbc_remain;
 	      *token = fetch_token (regexp, syntax);
-	      new_idx = re_dfa_add_node (dfa, *token, 0);
-	      mbc_remain = create_tree (NULL, NULL, 0, new_idx);
-	      tree = create_tree (tree, mbc_remain, CONCAT, 0);
-	      if (BE (new_idx == -1 || mbc_remain == NULL || tree == NULL, 0))
+	      mbc_remain = re_dfa_add_tree_node (dfa, NULL, NULL, *token);
+	      tree = create_tree (dfa, tree, mbc_remain, CONCAT, 0);
+	      if (BE (mbc_remain == NULL || tree == NULL, 0))
 		{
 		  *err = REG_ESPACE;
 		  return NULL;
@@ -2104,9 +2103,8 @@ parse_expression (regexp, preg, token, s
 	  return NULL;
 	}
       dfa->used_bkref_map |= 1 << (token->opr.idx - 1);
-      new_idx = re_dfa_add_node (dfa, *token, 0);
-      tree = create_tree (NULL, NULL, 0, new_idx);
-      if (BE (new_idx == -1 || tree == NULL, 0))
+      tree = re_dfa_add_tree_node (dfa, NULL, NULL, *token);
+      if (BE (tree == NULL, 0))
 	{
 	  *err = REG_ESPACE;
 	  return NULL;
@@ -2148,9 +2146,8 @@ parse_expression (regexp, preg, token, s
 
       /* Then we can these characters as normal characters.  */
       token->type = CHARACTER;
-      new_idx = re_dfa_add_node (dfa, *token, 0);
-      tree = create_tree (NULL, NULL, 0, new_idx);
-      if (BE (new_idx == -1 || tree == NULL, 0))
+      tree = re_dfa_add_tree_node (dfa, NULL, NULL, *token);
+      if (BE (tree == NULL, 0))
 	{
 	  *err = REG_ESPACE;
 	  return NULL;
@@ -2166,19 +2163,13 @@ parse_expression (regexp, preg, token, s
       if (token->opr.ctx_type == WORD_DELIM)
 	{
 	  bin_tree_t *tree_first, *tree_last;
-	  int idx_first, idx_last;
 	  token->opr.ctx_type = WORD_FIRST;
-	  idx_first = re_dfa_add_node (dfa, *token, 0);
-	  tree_first = create_tree (NULL, NULL, 0, idx_first);
+	  tree_first = re_dfa_add_tree_node (dfa, NULL, NULL, *token);
 	  token->opr.ctx_type = WORD_LAST;
-	  idx_last = re_dfa_add_node (dfa, *token, 0);
-	  tree_last = create_tree (NULL, NULL, 0, idx_last);
+	  tree_last = re_dfa_add_tree_node (dfa, NULL, NULL, *token);
 	  token->type = OP_ALT;
-	  new_idx = re_dfa_add_node (dfa, *token, 0);
-	  tree = create_tree (tree_first, tree_last, 0, new_idx);
-	  if (BE (idx_first == -1 || idx_last == -1 || new_idx == -1
-		  || tree_first == NULL || tree_last == NULL
-		  || tree == NULL, 0))
+	  tree = re_dfa_add_tree_node (dfa, tree_first, tree_last, *token);
+	  if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
 	    {
 	      *err = REG_ESPACE;
 	      return NULL;
@@ -2186,9 +2177,8 @@ parse_expression (regexp, preg, token, s
 	}
       else
 	{
-	  new_idx = re_dfa_add_node (dfa, *token, 0);
-	  tree = create_tree (NULL, NULL, 0, new_idx);
-	  if (BE (new_idx == -1 || tree == NULL, 0))
+	  tree = re_dfa_add_tree_node (dfa, NULL, NULL, *token);
+	  if (BE (tree == NULL, 0))
 	    {
 	      *err = REG_ESPACE;
 	      return NULL;
@@ -2201,9 +2191,8 @@ parse_expression (regexp, preg, token, s
       *token = fetch_token (regexp, syntax);
       return tree;
     case OP_PERIOD:
-      new_idx = re_dfa_add_node (dfa, *token, 0);
-      tree = create_tree (NULL, NULL, 0, new_idx);
-      if (BE (new_idx == -1 || tree == NULL, 0))
+      tree = re_dfa_add_tree_node (dfa, NULL, NULL, *token);
+      if (BE (tree == NULL, 0))
 	{
 	  *err = REG_ESPACE;
 	  return NULL;
@@ -2285,7 +2274,6 @@ parse_sub_exp (regexp, preg, token, synt
   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
   bin_tree_t *tree, *left_par, *right_par;
   size_t cur_nsub;
-  int new_idx;
   cur_nsub = preg->re_nsub++;
   if (dfa->subexps_alloc < preg->re_nsub)
     {
@@ -2303,14 +2291,13 @@ parse_sub_exp (regexp, preg, token, synt
   dfa->subexps[cur_nsub].start = dfa->nodes_len;
   dfa->subexps[cur_nsub].end = -1;
 
-  new_idx = re_dfa_add_node (dfa, *token, 0);
-  left_par = create_tree (NULL, NULL, 0, new_idx);
-  if (BE (new_idx == -1 || left_par == NULL, 0))
+  left_par = re_dfa_add_tree_node (dfa, NULL, NULL, *token);
+  if (BE (left_par == NULL, 0))
     {
       *err = REG_ESPACE;
       return NULL;
     }
-  dfa->nodes[new_idx].opr.idx = cur_nsub;
+  dfa->nodes[left_par->node_idx].opr.idx = cur_nsub;
   *token = fetch_token (regexp, syntax | RE_CARET_ANCHORS_HERE);
 
   /* The subexpression may be a null string.  */
@@ -2324,22 +2311,20 @@ parse_sub_exp (regexp, preg, token, synt
     }
   if (BE (token->type != OP_CLOSE_SUBEXP, 0))
     {
-      free_bin_tree (tree);
       *err = REG_EPAREN;
       return NULL;
     }
-  new_idx = re_dfa_add_node (dfa, *token, 0);
+  right_par = re_dfa_add_tree_node (dfa, NULL, NULL, *token);
   dfa->subexps[cur_nsub].end = dfa->nodes_len;
-  right_par = create_tree (NULL, NULL, 0, new_idx);
   tree = ((tree == NULL) ? right_par
-	  : create_tree (tree, right_par, CONCAT, 0));
-  tree = create_tree (left_par, tree, CONCAT, 0);
-  if (BE (new_idx == -1 || right_par == NULL || tree == NULL, 0))
+	  : create_tree (dfa, tree, right_par, CONCAT, 0));
+  tree = create_tree (dfa, left_par, tree, CONCAT, 0);
+  if (BE (right_par == NULL || tree == NULL, 0))
     {
       *err = REG_ESPACE;
       return NULL;
     }
-  dfa->nodes[new_idx].opr.idx = cur_nsub;
+  dfa->nodes[right_par->node_idx].opr.idx = cur_nsub;
 
   return tree;
 }
@@ -2357,7 +2342,7 @@ parse_dup_op (dup_elem, regexp, dfa, tok
 {
   re_token_t dup_token;
   bin_tree_t *tree = dup_elem, *work_tree;
-  int new_idx, start_idx = re_string_cur_idx (regexp);
+  int start_idx = re_string_cur_idx (regexp);
   re_token_t start_token = *token;
   if (token->type == OP_OPEN_DUP_NUM)
     {
@@ -2394,7 +2379,6 @@ parse_dup_op (dup_elem, regexp, dfa, tok
 	{
 	  /* We treat "<re>{0}" and "<re>{0,0}" as null string.  */
 	  *token = fetch_token (regexp, syntax);
-	  free_bin_tree (dup_elem);
 	  return NULL;
 	}
 
@@ -2404,7 +2388,7 @@ parse_dup_op (dup_elem, regexp, dfa, tok
 	if (i != 0)
 	  {
 	    work_tree = duplicate_tree (elem, dfa);
-	    tree = create_tree (tree, work_tree, CONCAT, 0);
+	    tree = create_tree (dfa, tree, work_tree, CONCAT, 0);
 	    if (BE (work_tree == NULL || tree == NULL, 0))
 	      goto parse_dup_op_espace;
 	  }
@@ -2416,18 +2400,15 @@ parse_dup_op (dup_elem, regexp, dfa, tok
 	  if (start > 0)
 	    {
 	      elem = duplicate_tree (elem, dfa);
-	      new_idx = re_dfa_add_node (dfa, dup_token, 0);
-	      work_tree = create_tree (elem, NULL, 0, new_idx);
-	      tree = create_tree (tree, work_tree, CONCAT, 0);
-	      if (BE (elem == NULL || new_idx == -1 || work_tree == NULL
-		      || tree == NULL, 0))
+	      work_tree = re_dfa_add_tree_node (dfa, elem, NULL, dup_token);
+	      tree = create_tree (dfa, tree, work_tree, CONCAT, 0);
+	      if (BE (elem == NULL || work_tree == NULL || tree == NULL, 0))
 		goto parse_dup_op_espace;
 	    }
 	  else
 	    {
-	      new_idx = re_dfa_add_node (dfa, dup_token, 0);
-	      tree = create_tree (elem, NULL, 0, new_idx);
-	      if (BE (new_idx == -1 || tree == NULL, 0))
+	      tree = re_dfa_add_tree_node (dfa, elem, NULL, dup_token);
+	      if (BE (tree == NULL, 0))
 		goto parse_dup_op_espace;
 	    }
 	}
@@ -2444,23 +2425,21 @@ parse_dup_op (dup_elem, regexp, dfa, tok
 	  if (start > 0)
 	    {
 	      elem = duplicate_tree (elem, dfa);
-	      new_idx = re_dfa_add_node (dfa, dup_token, 0);
-	      elem = create_tree (elem, NULL, 0, new_idx);
-	      tree = create_tree (tree, elem, CONCAT, 0);
-	      if (BE (elem == NULL || new_idx == -1 || tree == NULL, 0))
+	      elem = re_dfa_add_tree_node (dfa, elem, NULL, dup_token);
+	      tree = create_tree (dfa, tree, elem, CONCAT, 0);
+	      if (BE (elem == NULL || tree == NULL, 0))
 		goto parse_dup_op_espace;
 	    }
 	  else
 	    {
-	      new_idx = re_dfa_add_node (dfa, dup_token, 0);
-	      tree = elem = create_tree (elem, NULL, 0, new_idx);
-	      if (BE (new_idx == -1 || tree == NULL, 0))
+	      tree = elem = re_dfa_add_tree_node (dfa, elem, NULL, dup_token);
+	      if (BE (tree == NULL, 0))
 		goto parse_dup_op_espace;
 	    }
 	  for (i = 1; i < end - start; ++i)
 	    {
 	      work_tree = duplicate_tree (elem, dfa);
-	      tree = create_tree (tree, work_tree, CONCAT, 0);
+	      tree = create_tree (dfa, tree, work_tree, CONCAT, 0);
 	      if (BE (work_tree == NULL || tree == NULL, 0))
 		{
 		  *err = REG_ESPACE;
@@ -2471,9 +2450,8 @@ parse_dup_op (dup_elem, regexp, dfa, tok
     }
   else
     {
-      new_idx = re_dfa_add_node (dfa, *token, 0);
-      tree = create_tree (tree, NULL, 0, new_idx);
-      if (BE (new_idx == -1 || tree == NULL, 0))
+      tree = re_dfa_add_tree_node (dfa, tree, NULL, *token);
+      if (BE (tree == NULL, 0))
 	{
 	  *err = REG_ESPACE;
 	  return NULL;
@@ -2483,7 +2461,6 @@ parse_dup_op (dup_elem, regexp, dfa, tok
   return tree;
 
  parse_dup_op_espace:
-  free_bin_tree (tree);
   *err = REG_ESPACE;
   return NULL;
 
@@ -2938,7 +2915,7 @@ parse_bracket_exp (regexp, dfa, token, s
   int non_match = 0;
 #endif /* not RE_ENABLE_I18N */
   bin_tree_t *work_tree;
-  int token_len, new_idx;
+  int token_len;
 #ifdef _LIBC
   collseqmb = (const unsigned char *)
     _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
@@ -3155,9 +3132,8 @@ parse_bracket_exp (regexp, dfa, token, s
   /* Build a tree for simple bracket.  */
   br_token.type = SIMPLE_BRACKET;
   br_token.opr.sbcset = sbcset;
-  new_idx = re_dfa_add_node (dfa, br_token, 0);
-  work_tree = create_tree (NULL, NULL, 0, new_idx);
-  if (BE (new_idx == -1 || work_tree == NULL, 0))
+  work_tree = re_dfa_add_tree_node (dfa, NULL, NULL, br_token);
+  if (BE (work_tree == NULL, 0))
     goto parse_bracket_exp_espace;
 
 #ifdef RE_ENABLE_I18N
@@ -3179,21 +3155,19 @@ parse_bracket_exp (regexp, dfa, token, s
       if (sbc_idx == BITSET_UINTS)
 	{
 	  re_free (sbcset);
-	  dfa->nodes[new_idx].type = COMPLEX_BRACKET;
-	  dfa->nodes[new_idx].opr.mbcset = mbcset;
+	  dfa->nodes[work_tree->node_idx].type = COMPLEX_BRACKET;
+	  dfa->nodes[work_tree->node_idx].opr.mbcset = mbcset;
 	  return work_tree;
 	}
       br_token.type = COMPLEX_BRACKET;
       br_token.opr.mbcset = mbcset;
-      new_idx = re_dfa_add_node (dfa, br_token, 0);
-      mbc_tree = create_tree (NULL, NULL, 0, new_idx);
-      if (BE (new_idx == -1 || mbc_tree == NULL, 0))
+      mbc_tree = re_dfa_add_tree_node (dfa, NULL, NULL, br_token);
+      if (BE (mbc_tree == NULL, 0))
 	goto parse_bracket_exp_espace;
       /* Then join them by ALT node.  */
       alt_token.type = OP_ALT;
-      new_idx = re_dfa_add_node (dfa, alt_token, 0);
-      work_tree = create_tree (work_tree, mbc_tree, 0, new_idx);
-      if (BE (new_idx != -1 && mbc_tree != NULL, 1))
+      work_tree = re_dfa_add_tree_node (dfa, work_tree, mbc_tree, alt_token);
+      if (BE (mbc_tree != NULL, 1))
 	return work_tree;
     }
   else
@@ -3497,7 +3471,6 @@ build_charclass_op (dfa, trans, class_na
   reg_errcode_t ret;
   re_token_t br_token;
   bin_tree_t *tree;
-  int new_idx;
 
   sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS);
 #ifdef RE_ENABLE_I18N
@@ -3563,9 +3536,8 @@ build_charclass_op (dfa, trans, class_na
   /* Build a tree for simple bracket.  */
   br_token.type = SIMPLE_BRACKET;
   br_token.opr.sbcset = sbcset;
-  new_idx = re_dfa_add_node (dfa, br_token, 0);
-  tree = create_tree (NULL, NULL, 0, new_idx);
-  if (BE (new_idx == -1 || tree == NULL, 0))
+  tree = re_dfa_add_tree_node (dfa, NULL, NULL, br_token);
+  if (BE (tree == NULL, 0))
     goto build_word_op_espace;
 
 #ifdef RE_ENABLE_I18N
@@ -3577,15 +3549,13 @@ build_charclass_op (dfa, trans, class_na
       br_token.type = COMPLEX_BRACKET;
       br_token.opr.mbcset = mbcset;
       dfa->has_mb_node = 1;
-      new_idx = re_dfa_add_node (dfa, br_token, 0);
-      mbc_tree = create_tree (NULL, NULL, 0, new_idx);
-      if (BE (new_idx == -1 || mbc_tree == NULL, 0))
+      mbc_tree = re_dfa_add_tree_node (dfa, NULL, NULL, br_token);
+      if (BE (mbc_tree == NULL, 0))
 	goto build_word_op_espace;
       /* Then join them by ALT node.  */
       alt_token.type = OP_ALT;
-      new_idx = re_dfa_add_node (dfa, alt_token, 0);
-      tree = create_tree (tree, mbc_tree, 0, new_idx);
-      if (BE (new_idx != -1 && mbc_tree != NULL, 1))
+      tree = re_dfa_add_tree_node (dfa, tree, mbc_tree, alt_token);
+      if (BE (mbc_tree != NULL, 1))
 	return tree;
     }
   else
@@ -3652,24 +3622,29 @@ free_charset (re_charset_t *cset)
 \f
 /* Functions for binary tree operation.  */
 
-/* Create a node of tree.
-   Note: This function automatically free left and right if malloc fails.  */
+/* Create a tree node.  */
 
 static bin_tree_t *
-create_tree (left, right, type, index)
+create_tree (dfa, left, right, type, index)
+     re_dfa_t *dfa;
      bin_tree_t *left;
      bin_tree_t *right;
      re_token_type_t type;
      int index;
 {
   bin_tree_t *tree;
-  tree = re_malloc (bin_tree_t, 1);
-  if (BE (tree == NULL, 0))
+  if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
     {
-      free_bin_tree (left);
-      free_bin_tree (right);
-      return NULL;
+      bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
+
+      if (storage == NULL)
+	return NULL;
+      storage->next = dfa->str_tree_storage;
+      dfa->str_tree_storage = storage;
+      dfa->str_tree_storage_idx = 0;
     }
+  tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
+
   tree->parent = NULL;
   tree->left = left;
   tree->right = right;
@@ -3686,20 +3661,24 @@ create_tree (left, right, type, index)
   return tree;
 }
 
-/* Free the sub tree pointed by TREE.  */
+/* Create both a DFA node and a tree for it.  */
 
-static void
-free_bin_tree (tree)
-     bin_tree_t *tree;
-{
-  if (tree == NULL)
-    return;
-  /*re_node_set_free (&tree->eclosure);*/
-  free_bin_tree (tree->left);
-  free_bin_tree (tree->right);
-  re_free (tree);
+static bin_tree_t *
+re_dfa_add_tree_node (dfa, left, right, token)
+     re_dfa_t *dfa;
+     bin_tree_t *left;
+     bin_tree_t *right;
+     re_token_t token;
+{     
+  int new_idx = re_dfa_add_node (dfa, token, 0);
+
+  if (new_idx == -1)
+    return NULL;
+
+  return create_tree (dfa, left, right, 0, new_idx);
 }
 
+
 /* Duplicate the node SRC, and return new node.  */
 
 static bin_tree_t *
@@ -3723,10 +3702,7 @@ duplicate_tree (src, dfa)
     {
       right = duplicate_tree (src->right, dfa);
       if (right == NULL)
-	{
-	  free_bin_tree (left);
-	  return NULL;
-	}
+	return NULL;
     }
 
   /* At last, duplicate itself.  */
@@ -3735,20 +3711,11 @@ duplicate_tree (src, dfa)
       new_node_idx = re_dfa_add_node (dfa, dfa->nodes[src->node_idx], 0);
       dfa->nodes[new_node_idx].duplicated = 1;
       if (BE (new_node_idx == -1, 0))
-	{
-	  free_bin_tree (left);
-	  free_bin_tree (right);
-	  return NULL;
-	}
+	return NULL;
     }
   else
     new_node_idx = src->type;
 
-  new_tree = create_tree (left, right, src->type, new_node_idx);
-  if (BE (new_tree == NULL, 0))
-    {
-      free_bin_tree (left);
-      free_bin_tree (right);
-    }
+  new_tree = create_tree (dfa, left, right, src->type, new_node_idx);
   return new_tree;
 }
--- libc/posix/tst-rxspencer.c.jj	2003-11-15 10:02:26.000000000 +0100
+++ libc/posix/tst-rxspencer.c	2003-11-19 15:33:17.000000000 +0100
@@ -384,6 +384,8 @@ main (int argc, char **argv)
       {NULL,	0,		NULL,		0 }
     };
 
+  mtrace ();
+
   while (getopt_long (argc, argv, "", options, NULL) >= 0);
 
   if (optind + 1 != argc)
@@ -510,6 +512,7 @@ main (int argc, char **argv)
 	}
     }
 
+  free (line);
   fclose (f);
   return ret;
 }
--- libc/posix/Makefile.jj	2003-11-13 22:04:19.000000000 +0100
+++ libc/posix/Makefile	2003-11-19 15:47:10.000000000 +0100
@@ -78,7 +78,7 @@ tests		:= tstgetopt testfnm runtests run
 		   bug-regex8 bug-regex9 bug-regex10 bug-regex11 bug-regex12 \
 		   bug-regex13 bug-regex14 bug-regex15 bug-regex16 \
 		   bug-regex17 bug-regex18 bug-regex19 bug-regex20 \
-		   tst-nice tst-nanosleep transbug tst-rxspencer
+		   bug-regex21 tst-nice tst-nanosleep transbug tst-rxspencer
 ifeq (yes,$(build-shared))
 test-srcs	:= globtest
 tests           += wordexp-test tst-exec tst-spawn
@@ -93,7 +93,8 @@ before-compile	:= testcases.h ptestcases
 generated := $(addprefix wordexp-test-result, 1 2 3 4 5 6 7 8 9 10) \
 	     annexc annexc.out wordexp-tst.out bug-regex2-mem \
 	     bug-regex2.mtrace bug-regex14-mem bug-regex14.mtrace \
-	     tst-getconf.out
+	     bug-regex21-mem bug-regex21.mtrace \
+	     tst-rxspencer-mem tst-rxspencer.mtrace tst-getconf.out
 
 include ../Rules
 
@@ -178,7 +179,9 @@ endif
 # XXX Please note that for now we ignore the result of this test.
 tests: $(objpfx)annexc.out
 ifeq (no,$(cross-compiling))
-tests:$(objpfx)bug-regex2-mem $(objpfx)bug-regex14-mem $(objpfx)tst-getconf.out
+tests: $(objpfx)bug-regex2-mem $(objpfx)bug-regex14-mem \
+  $(objpfx)bug-regex21-mem $(objpfx)tst-rxspencer-mem \
+  $(objpfx)tst-getconf.out
 endif
 
 $(objpfx)annexc.out: $(objpfx)annexc
@@ -199,6 +202,16 @@ bug-regex14-ENV = MALLOC_TRACE=$(objpfx)
 $(objpfx)bug-regex14-mem: $(objpfx)bug-regex14.out
 	$(common-objpfx)malloc/mtrace $(objpfx)bug-regex14.mtrace > $@
 
+bug-regex21-ENV = MALLOC_TRACE=$(objpfx)bug-regex21.mtrace
+
+$(objpfx)bug-regex21-mem: $(objpfx)bug-regex21.out
+	$(common-objpfx)malloc/mtrace $(objpfx)bug-regex21.mtrace > $@
+
+tst-rxspencer-ENV = MALLOC_TRACE=$(objpfx)tst-rxspencer.mtrace
+
+$(objpfx)tst-rxspencer-mem: $(objpfx)tst-rxspencer.out
+	$(common-objpfx)malloc/mtrace $(objpfx)tst-rxspencer.mtrace > $@
+
 $(objpfx)tst-getconf.out: tst-getconf.sh $(objpfx)getconf
 	$(SHELL) -e $< $(common-objpfx) $(elf-objpfx) $(rtld-installed-name)
 
--- libc/posix/regex_internal.h.jj	2003-11-19 10:24:36.000000000 +0100
+++ libc/posix/regex_internal.h	2003-11-19 14:59:21.000000000 +0100
@@ -412,6 +412,15 @@ struct bin_tree_t
 };
 typedef struct bin_tree_t bin_tree_t;
 
+#define BIN_TREE_STORAGE_SIZE \
+  ((1024 - sizeof (void *)) / sizeof (bin_tree_t))
+
+struct bin_tree_storage_t
+{
+  struct bin_tree_storage_t *next;
+  bin_tree_t data[BIN_TREE_STORAGE_SIZE];
+};
+typedef struct bin_tree_storage_t bin_tree_storage_t;
 
 #define CONTEXT_WORD 1
 #define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
@@ -578,7 +587,6 @@ struct re_dfa_t
   re_token_t *nodes;
   int nodes_alloc;
   int nodes_len;
-  bin_tree_t *str_tree;
   int *nexts;
   int *org_indices;
   re_node_set *edests;
@@ -589,6 +597,9 @@ struct re_dfa_t
   re_dfastate_t *init_state_word;
   re_dfastate_t *init_state_nl;
   re_dfastate_t *init_state_begbuf;
+  bin_tree_t *str_tree;
+  bin_tree_storage_t *str_tree_storage;
+  int str_tree_storage_idx;
 
   /* number of subexpressions `re_nsub' is in regex_t.  */
   int subexps_alloc;
@@ -598,9 +609,6 @@ struct re_dfa_t
   int nbackref; /* The number of backreference in this dfa.  */
   /* Bitmap expressing which backreference is used.  */
   unsigned int used_bkref_map;
-#ifdef DEBUG
-  char* re_str;
-#endif
   unsigned int has_plural_match : 1;
   /* If this dfa has "multibyte node", which is a backreference or
      a node which can accept multibyte character or multi character
@@ -609,6 +617,9 @@ struct re_dfa_t
   unsigned int is_utf8 : 1;
   unsigned int map_notascii : 1;
   int mb_cur_max;
+#ifdef DEBUG
+  char* re_str;
+#endif
 };
 
 #ifndef RE_NO_INTERNAL_PROTOTYPES
--- libc/posix/bug-regex21.c.jj	2003-11-19 15:37:21.000000000 +0100
+++ libc/posix/bug-regex21.c	2003-11-19 15:51:52.000000000 +0100
@@ -0,0 +1,45 @@
+/* Test for memory leaks in regcomp.
+   Copyright (C) 2003 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <mcheck.h>
+#include <regex.h>
+#include <stdio.h>
+
+int main (void)
+{
+  regex_t re;
+  int i;
+  int ret = 0;
+
+  mtrace ();
+
+  for (i = 0; i < 32; ++i)
+    {
+      if (regcomp (&re, "X-.+:.+Y=\".*\\.(A|B|C|D|E|F|G|H|I",
+		   REG_EXTENDED | REG_ICASE) == 0)
+	{
+	  puts ("regcomp unexpectedly succeeded");
+	  ret = 1;
+	}
+      else
+	regfree (&re);
+    }
+  return ret;
+}

	Jakub

             reply	other threads:[~2003-11-19 15:25 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-11-19 16:56 Jakub Jelinek [this message]
2003-11-19 19:29 ` Ulrich Drepper

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20031119132031.GL12344@sunsite.ms.mff.cuni.cz \
    --to=jakub@redhat.com \
    --cc=drepper@redhat.com \
    --cc=libc-hacker@sources.redhat.com \
    --cc=roland@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).