From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 19358 invoked by alias); 21 Jul 2010 11:58:53 -0000 Received: (qmail 19344 invoked by uid 9449); 21 Jul 2010 11:58:53 -0000 Date: Wed, 21 Jul 2010 11:58:00 -0000 Message-ID: <20100721115853.19342.qmail@sourceware.org> From: thornber@sourceware.org To: lvm-devel@redhat.com, lvm2-cvs@sourceware.org Subject: LVM2/libdm/regex matcher.c parse_rx.c parse_rx.h Mailing-List: contact lvm2-cvs-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Post: List-Help: , Sender: lvm2-cvs-owner@sourceware.org X-SW-Source: 2010-07/txt/msg00076.txt.bz2 CVSROOT: /cvs/lvm2 Module name: LVM2 Changes by: thornber@sourceware.org 2010-07-21 11:58:50 Modified files: libdm/regex : matcher.c parse_rx.c parse_rx.h Log message: [REGEX] reduce the number of charset nodes that are produced Patches: http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/libdm/regex/matcher.c.diff?cvsroot=lvm2&r1=1.7&r2=1.8 http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/libdm/regex/parse_rx.c.diff?cvsroot=lvm2&r1=1.11&r2=1.12 http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/libdm/regex/parse_rx.h.diff?cvsroot=lvm2&r1=1.3&r2=1.4 --- LVM2/libdm/regex/matcher.c 2010/07/20 15:32:07 1.7 +++ LVM2/libdm/regex/matcher.c 2010/07/21 11:58:49 1.8 @@ -1,5 +1,5 @@ /* - * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. * * This file is part of the device-mapper userspace tools. @@ -32,8 +32,11 @@ struct dm_regex { /* Instance variables for the lexer */ struct dfa_state *start; unsigned num_nodes; + unsigned num_charsets; int nodes_entered; struct rx_node **nodes; + int charsets_entered; + struct rx_node **charsets; struct dm_pool *scratch, *mem; }; @@ -50,6 +53,33 @@ return r; } +static unsigned _count_charsets(struct rx_node *rx) +{ + if (rx->type == CHARSET) + return 1; + + return (rx->left ? _count_charsets(rx->left) : 0) + + (rx->right ? _count_charsets(rx->right) : 0); +} + +static void _enumerate_charsets_internal(struct rx_node *rx, unsigned *i) +{ + if (rx->type == CHARSET) + rx->charset_index = (*i)++; + else { + if (rx->left) + _enumerate_charsets_internal(rx->left, i); + if (rx->right) + _enumerate_charsets_internal(rx->right, i); + } +} + +static void _enumerate_charsets(struct rx_node *rx) +{ + unsigned i = 0; + _enumerate_charsets_internal(rx, &i); +} + static void _fill_table(struct dm_regex *m, struct rx_node *rx) { assert((rx->type != OR) || (rx->left && rx->right)); @@ -61,6 +91,8 @@ _fill_table(m, rx->right); m->nodes[m->nodes_entered++] = rx; + if (rx->type == CHARSET) + m->charsets[m->charsets_entered++] = rx; } static void _create_bitsets(struct dm_regex *m) @@ -69,9 +101,9 @@ for (i = 0; i < m->num_nodes; i++) { struct rx_node *n = m->nodes[i]; - n->firstpos = dm_bitset_create(m->scratch, m->num_nodes); - n->lastpos = dm_bitset_create(m->scratch, m->num_nodes); - n->followpos = dm_bitset_create(m->scratch, m->num_nodes); + n->firstpos = dm_bitset_create(m->scratch, m->num_charsets); + n->lastpos = dm_bitset_create(m->scratch, m->num_charsets); + n->followpos = dm_bitset_create(m->scratch, m->num_charsets); } } @@ -85,7 +117,7 @@ c1 = rx->left; c2 = rx->right; - if (dm_bit(rx->charset, TARGET_TRANS)) + if (rx->type == CHARSET && dm_bit(rx->charset, TARGET_TRANS)) rx->final = final++; switch (rx->type) { @@ -125,8 +157,8 @@ break; case CHARSET: - dm_bit_set(rx->firstpos, i); - dm_bit_set(rx->lastpos, i); + dm_bit_set(rx->firstpos, rx->charset_index); + dm_bit_set(rx->lastpos, rx->charset_index); rx->nullable = 0; break; @@ -141,23 +173,21 @@ */ switch (rx->type) { case CAT: - for (j = 0; j < m->num_nodes; j++) { - if (dm_bit(c1->lastpos, j)) { - struct rx_node *n = m->nodes[j]; + for (j = 0; j < m->num_charsets; j++) { + struct rx_node *n = m->charsets[j]; + if (dm_bit(c1->lastpos, j)) dm_bit_union(n->followpos, - n->followpos, c2->firstpos); - } + n->followpos, c2->firstpos); } break; case PLUS: case STAR: - for (j = 0; j < m->num_nodes; j++) { - if (dm_bit(rx->lastpos, j)) { - struct rx_node *n = m->nodes[j]; + for (j = 0; j < m->num_charsets; j++) { + struct rx_node *n = m->charsets[j]; + if (dm_bit(rx->lastpos, j)) dm_bit_union(n->followpos, - n->followpos, rx->firstpos); - } + n->followpos, rx->firstpos); } break; } @@ -189,7 +219,7 @@ static int _calc_states(struct dm_regex *m, struct rx_node *rx) { - unsigned iwidth = (m->num_nodes / DM_BITS_PER_INT) + 1; + unsigned iwidth = (m->num_charsets / DM_BITS_PER_INT) + 1; struct ttree *tt = ttree_create(m->scratch, iwidth); struct state_queue *h, *t, *tmp; struct dfa_state *dfa, *ldfa; @@ -199,9 +229,26 @@ if (!tt) return_0; - if (!(bs = dm_bitset_create(m->scratch, m->num_nodes))) + if (!(bs = dm_bitset_create(m->scratch, m->num_charsets))) return_0; + /* build some char maps */ + dm_bitset_t charmap[256]; + for (a = 0; a < 256; a++) { + charmap[a] = dm_bitset_create(m->scratch, m->num_charsets); + if (!charmap[a]) + return_0; + } + + for (i = 0; i < m->num_nodes; i++) { + struct rx_node *n = m->nodes[i]; + if (n->type == CHARSET) { + for (a = dm_bit_get_first(n->charset); + a >= 0; a = dm_bit_get_next(n->charset, a)) + dm_bit_set(charmap[a], n->charset_index); + } + } + /* create first state */ dfa = _create_dfa_state(m->mem); m->start = dfa; @@ -209,8 +256,9 @@ /* prime the queue */ h = t = _create_state_queue(m->scratch, dfa, rx->firstpos); + dm_bitset_t dfa_copy = dm_bitset_create(m->scratch, m->num_charsets); while (h) { - int elems, j, idx[h->bits[0]]; + int elems, idx[h->bits[0]]; /* pop state off front of the queue */ dfa = h->s; @@ -227,18 +275,18 @@ idx[elems++] = i; for (a = 0; a < 256; a++) { + dm_bit_and(dfa_copy, charmap[a], dfa_bits); + /* iterate through all the states in firstpos */ - for (j = 0; j < elems; j++) { - i = idx[j]; - if (dm_bit(m->nodes[i]->charset, a)) { - if (a == TARGET_TRANS) - dfa->final = m->nodes[i]->final; - - dm_bit_union(bs, bs, - m->nodes[i]->followpos); - set_bits = 1; - } - } + for (i = dm_bit_get_first(dfa_copy); + i >= 0; i = dm_bit_get_next(dfa_copy, i)) { + if (a == TARGET_TRANS) + dfa->final = m->charsets[i]->final; + + dm_bit_union(bs, bs, + m->charsets[i]->followpos); + set_bits = 1; + } if (set_bits) { ldfa = ttree_lookup(tt, bs + 1); @@ -314,11 +362,16 @@ m->mem = mem; m->scratch = scratch; m->num_nodes = _count_nodes(rx); + m->num_charsets = _count_charsets(rx); + _enumerate_charsets(rx); m->nodes = dm_pool_alloc(scratch, sizeof(*m->nodes) * m->num_nodes); - if (!m->nodes) goto_bad; + m->charsets = dm_pool_alloc(scratch, sizeof(*m->charsets) * m->num_charsets); + if (!m->charsets) + goto_bad; + _fill_table(m, rx); _create_bitsets(m); _calc_functions(m); --- LVM2/libdm/regex/parse_rx.c 2010/04/22 23:09:18 1.11 +++ LVM2/libdm/regex/parse_rx.c 2010/07/21 11:58:49 1.12 @@ -284,7 +284,7 @@ struct rx_node *n = dm_pool_zalloc(mem, sizeof(*n)); if (n) { - if (!(n->charset = dm_bitset_create(mem, 256))) { + if (type == CHARSET && !(n->charset = dm_bitset_create(mem, 256))) { dm_pool_free(mem, n); return NULL; } --- LVM2/libdm/regex/parse_rx.h 2010/04/22 23:09:18 1.3 +++ LVM2/libdm/regex/parse_rx.h 2010/07/21 11:58:49 1.4 @@ -41,6 +41,7 @@ struct rx_node *left, *right; /* used to build the dfa for the toker */ + unsigned charset_index; int nullable, final; dm_bitset_t firstpos; dm_bitset_t lastpos;