From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 25090 invoked by alias); 30 Oct 2002 15:00:00 -0000 Mailing-List: contact libc-hacker-help@sources.redhat.com; run by ezmlm Precedence: bulk List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-hacker-owner@sources.redhat.com Received: (qmail 25029 invoked from network); 30 Oct 2002 14:59:59 -0000 Received: from unknown (HELO sunsite.mff.cuni.cz) (195.113.19.66) by sources.redhat.com with SMTP; 30 Oct 2002 14:59:59 -0000 Received: (from jakub@localhost) by sunsite.mff.cuni.cz (8.11.6/8.11.6) id g9UExWY05573; Wed, 30 Oct 2002 15:59:32 +0100 Date: Wed, 30 Oct 2002 07:56:00 -0000 From: Jakub Jelinek To: Isamu Hasegawa Cc: Glibc hackers Subject: Regex memory management Message-ID: <20021030155931.W3451@sunsite.ms.mff.cuni.cz> Reply-To: Jakub Jelinek Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.2.5.1i X-SW-Source: 2002-10/txt/msg00109.txt.bz2 Hi! I wonder whether it wouldn't be a win to switch to using obstacks for regex (one created by regcomp and destroyed in regfree, one created at start of regexec and destroyed when it is done searching). Looking at typical regex malloc/free patterns (just read bug-regex2.mtrace or bug-regex14.mtrace), it is something like ~ 40 mallocs followed by ~15 free calls (regcomp), then a couple of (~ 12 mallocs followed by ~ 7 frees of these; during first regexec) and then always a one or more mallocs followed by one or more frees for each remaining regexec. >From this, I think it could be a win to have 2 different obstacks, one permanent, inited at regcomp start, freed in regfree, the other would be temporary, inited at regcomp start and freed at regcomp end and inited at regexec start, freed at regexec end. The obstack_chunk_alloc routine would have to either longjmp to regcomp/regexec cleanup handler, or when glibc starts to use try/finally it could throw exception. The temporary allocation obstacks might even use alloca for the initial (say 2K) buffer - the only user of big malloc chunk seems to be build_trtable. Attached is just a patch which changes big temporary allocations in build_trtable from set of 5 malloc calls to 2 alloca calls and adds some ENOMEM handling. group_nodes_into_DFAstates can still leak if malloc returns NULL and I haven't checked other routines. 2002-10-30 Jakub Jelinek * posix/regexec.c (build_trtable): Alloca or malloc dests_node and dests_ch arrays together. Alloca or malloc dest_states, dest_states_word and dest_states_nl arrays together. Free memory on error exit. --- libc/posix/regexec.c.jj 2002-10-21 19:14:52.000000000 +0200 +++ libc/posix/regexec.c 2002-10-30 15:41:50.000000000 +0100 @@ -2469,8 +2469,10 @@ build_trtable (preg, state, fl_search) reg_errcode_t err; re_dfa_t *dfa = (re_dfa_t *) preg->buffer; int i, j, k, ch; + int dests_node_malloced = 0, dest_states_malloced = 0; int ndests; /* Number of the destination states from `state'. */ - re_dfastate_t **trtable, **dest_states, **dest_states_word, **dest_states_nl; + re_dfastate_t **trtable; + re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; re_node_set follows, *dests_node; bitset *dests_ch; bitset acceptable; @@ -2479,34 +2481,76 @@ build_trtable (preg, state, fl_search) from `state'. `dests_node[i]' represents the nodes which i-th destination state contains, and `dests_ch[i]' represents the characters which i-th destination state accepts. */ - dests_node = re_malloc (re_node_set, SBC_MAX); - dests_ch = re_malloc (bitset, SBC_MAX); +#ifdef _LIBC + if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX)) + dests_node = (re_node_set *) + alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX); + else +#endif + { + dests_node = (re_node_set *) + malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX); + if (BE (dests_node == NULL, 0)) + return NULL; + dests_node_malloced = 1; + } + dests_ch = (bitset *) (dests_node + SBC_MAX); /* Initialize transiton table. */ trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); - if (BE (dests_node == NULL || dests_ch == NULL || trtable == NULL, 0)) - return NULL; + if (BE (trtable == NULL, 0)) + { + if (dests_node_malloced) + free (dests_node); + return NULL; + } /* At first, group all nodes belonging to `state' into several destinations. */ ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch); if (BE (ndests <= 0, 0)) { - re_free (dests_node); - re_free (dests_ch); + if (dests_node_malloced) + free (dests_node); /* Return NULL in case of an error, trtable otherwise. */ - return (ndests < 0) ? NULL : trtable; + if (ndests == 0) + return trtable; + free (trtable); + return NULL; } - dest_states = re_malloc (re_dfastate_t *, ndests); - dest_states_word = re_malloc (re_dfastate_t *, ndests); - dest_states_nl = re_malloc (re_dfastate_t *, ndests); - bitset_empty (acceptable); - err = re_node_set_alloc (&follows, ndests + 1); - if (BE (dest_states == NULL || dest_states_word == NULL - || dest_states_nl == NULL || err != REG_NOERROR, 0)) - return NULL; + if (BE (err != REG_NOERROR, 0)) + goto out_free; + +#ifdef _LIBC + if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX + + ndests * 3 * sizeof (re_dfastate_t *))) + dest_states = (re_dfastate_t **) + alloca (ndests * 3 * sizeof (re_dfastate_t *)); + else +#endif + { + dest_states = (re_dfastate_t **) + malloc (ndests * 3 * sizeof (re_dfastate_t *)); + if (BE (dest_states == NULL, 0)) + { +out_free: + if (dest_states_malloced) + free (dest_states); + re_node_set_free (&follows); + for (i = 0; i < ndests; ++i) + re_node_set_free (dests_node + i); + free (trtable); + if (dests_node_malloced) + free (dests_node); + return NULL; + } + dest_states_malloced = 1; + } + dest_states_word = dest_states + ndests; + dest_states_nl = dest_states_word + ndests; + bitset_empty (acceptable); /* Then build the states for all destinations. */ for (i = 0; i < ndests; ++i) @@ -2521,7 +2565,7 @@ build_trtable (preg, state, fl_search) { err = re_node_set_merge (&follows, dfa->eclosures + next_node); if (BE (err != REG_NOERROR, 0)) - return NULL; + goto out_free; } } /* If search flag is set, merge the initial state. */ @@ -2541,12 +2585,12 @@ build_trtable (preg, state, fl_search) err = re_node_set_merge (&follows, dfa->init_state->entrance_nodes); if (BE (err != REG_NOERROR, 0)) - return NULL; + goto out_free; } } dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0); if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0)) - return NULL; + goto out_free; /* If the new state has context constraint, build appropriate states for these contexts. */ if (dest_states[i]->has_constraint) @@ -2554,11 +2598,11 @@ build_trtable (preg, state, fl_search) dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows, CONTEXT_WORD); if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0)) - return NULL; + goto out_free; dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, CONTEXT_NEWLINE); if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0)) - return NULL; + goto out_free; } else { @@ -2615,16 +2659,15 @@ build_trtable (preg, state, fl_search) } } - re_free (dest_states_nl); - re_free (dest_states_word); - re_free (dest_states); + if (dest_states_malloced) + free (dest_states); re_node_set_free (&follows); for (i = 0; i < ndests; ++i) re_node_set_free (dests_node + i); - re_free (dests_ch); - re_free (dests_node); + if (dests_node_malloced) + free (dests_node); return trtable; } Jakub