From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 20139 invoked by alias); 28 Feb 2011 06:45:50 -0000 Mailing-List: contact archer-commits-help@sourceware.org; run by ezmlm Sender: Precedence: bulk List-Post: List-Help: List-Subscribe: Received: (qmail 20107 invoked by uid 9813); 28 Feb 2011 06:45:49 -0000 Date: Mon, 28 Feb 2011 06:45:00 -0000 Message-ID: <20110228064549.20091.qmail@sourceware.org> From: sergiodj@sourceware.org To: archer-commits@sourceware.org Subject: [SCM] archer-sergiodj-stap: Re-doing the stap argument parser. X-Git-Refname: refs/heads/archer-sergiodj-stap X-Git-Reftype: branch X-Git-Oldrev: 87b36e08466e835177114be74445d7ca94d3072c X-Git-Newrev: 4483ce291bed02f60661284b4eeed2757d417a0f X-SW-Source: 2011-q1/txt/msg00160.txt.bz2 List-Id: The branch, archer-sergiodj-stap has been updated via 4483ce291bed02f60661284b4eeed2757d417a0f (commit) from 87b36e08466e835177114be74445d7ca94d3072c (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email. - Log ----------------------------------------------------------------- commit 4483ce291bed02f60661284b4eeed2757d417a0f Author: Sergio Durigan Junior Date: Mon Feb 28 03:43:38 2011 -0300 Re-doing the stap argument parser. I have decided that the "recursive-descent parser" approach was not so good after all, so I started to code a new parser which is simpler than that, based on state machines. Still need to code the argument parser itself, but it already deals with the `N@' part of the string. ----------------------------------------------------------------------- Summary of changes: gdb/stap-probe.c | 467 ++++++++++------------------------------------------- 1 files changed, 88 insertions(+), 379 deletions(-) First 500 lines of diff: diff --git a/gdb/stap-probe.c b/gdb/stap-probe.c index f05d859..d21a138 100644 --- a/gdb/stap-probe.c +++ b/gdb/stap-probe.c @@ -33,408 +33,120 @@ #include - -/* Beginning of the parser code. - - Systemtap probes can have argument string (in the form of assembly - code) "attached" to them. In order to parser this string, we created - a recursive-descent parser based on GCC's parser. */ - -/* Lexer token table. - OP(name, string description): operator tokens - TK(name, string description): parser tokens. */ -#define TOKEN_TABLE \ - OP(EQ, "=") \ - OP(NOT, "!") \ - OP(GREATER, ">") \ - OP(LESS, "<") \ - OP(GREATER_EQ, ">=") \ - OP(LESS_EQ, "<=") \ - OP(EQ_EQ, "==") \ - OP(NOT_EQ, "!=") \ - OP(NOT_EQ2, "<>") \ - OP(OR, "|") \ - OP(AND, "&") \ - OP(OR_OR, "||") \ - OP(AND_AND, "&&") \ - OP(RSHIFT, ">>") \ - OP(LSHIFT, "<<") \ - OP(PLUS, "+") \ - OP(MINUS, "-") \ - OP(MULT, "*") \ - OP(DIV, "/") \ - OP(MOD, "%") \ - OP(OPEN_PAREN, "(") \ - OP(CLOSE_PAREN, ")") \ - OP(OPEN_SQUARE, "[") \ - OP(CLOSE_SQUARE, "]") \ - OP(AT_SIGN, "@") \ - TK(EOF, NONE) - -#define OP(e, s) TTYPE_ ## e, -#define TK(e, s) TTYPE_ ## e, - -typedef enum -{ - TOKEN_TABLE - N_TOKEN_TYPES -} token_type; - -#undef OP -#undef TK - -#define OP(e, s) "TTYPE_" # e, -#define TK(e, s) "TTYPE_" # e, - -static const char *token_table_strings[(int) N_TOKEN_TYPES] = - { - TOKEN_TABLE - }; - -#undef OP -#undef TK - -static struct stap_token *EOF_token; - -typedef unsigned char token_value; - -struct stap_token -{ - token_type type; - token_value *value; -}; - -struct stap_token_list -{ - struct stap_token *token; - struct stap_token_list *next; -}; - -struct stap_parser_buffer +enum stap_arg_offset { - /* The actual buffer holding the input. */ - const char *buf; - - /* A pointer to the current lexer location. */ - const char *cur; + STAP_ARG_OFFSET_32BIT_UNSIGNED, + STAP_ARG_OFFSET_32BIT_SIGNED, + STAP_ARG_OFFSET_64BIT_UNSIGNED, + STAP_ARG_OFFSET_64BIT_SIGNED, }; -struct stap_lexer +struct stap_arg_info { - /* Buffer used for lexing. */ - struct stap_parser_buffer buffer; + /* The number of this argument. */ + int number; - /* The token stream. */ - struct stap_token_list *tokens; - struct stap_token_list *head; + enum stap_arg_offset offset; + /* FIXME Add more fields to this struct. */ }; -struct stap_parser -{ - /* The lexer. */ - struct stap_lexer *lexer; -}; - -static struct stap_token * -new_stap_token (void) -{ - struct stap_token *t - = (struct stap_token *) xcalloc (1, sizeof (struct stap_token)); - - return t; -} - -static void -stap_lexer_push_token (struct stap_lexer *lexer, struct stap_token *token) -{ - struct stap_token_list *new - = (struct stap_token_list *) xmalloc (sizeof (struct stap_token_list)); - - new->token = token; - new->next = NULL; - - if (lexer->tokens == NULL) - { - lexer->tokens = new; - lexer->head = new; - } - else - { - struct stap_token_list *l; - - for (l = lexer->tokens; l->next != NULL; l = l->next); - - l->next = new; - } -} - -static struct stap_token * -stap_lexer_peek_token (const struct stap_lexer *lexer) -{ - if (lexer->tokens == NULL) - return EOF_token; - - return lexer->tokens->token; -} - -#if 0 -static struct stap_token * -stap_lexer_consume_token (struct stap_lexer *lexer) -{ - struct stap_token_list *h = lexer->tokens; - - lexer->tokens = h->next; - - return h->token; -} - -static int -stap_lexer_next_token_is (struct stap_lexer *lexer, token_type type) -{ - return stap_lexer_peek_token (lexer)->type == type; -} -#endif - -static int -stap_is_eof_token (struct stap_token *token) -{ - return token == EOF_token; -} - -#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \ - do { \ - result->type = ELSE_TYPE; \ - if (*lexer->buffer.cur == CHAR) \ - { \ - ++lexer->buffer.cur; \ - result->type = ELSE_TYPE; \ - } \ - } while (0) - -#define IF_NEXT_IS_ELSE_IF(CHAR_THEN, THEN_TYPE, \ - CHAR_ELSE_IF, ELSE_IF_TYPE, \ - ELSE_TYPE) \ - do { \ - result->type = ELSE_TYPE; \ - if (*lexer->buffer.cur == CHAR_THEN) \ - { \ - ++lexer->buffer.cur; \ - result->type = THEN_TYPE; \ - } \ - else if (*lexer->buffer.cur == CHAR_ELSE_IF) \ - { \ - ++lexer->buffer.cur; \ - result->type = ELSE_IF_TYPE; \ - } \ - } while (0) - -static struct stap_token * -stap_lex_one_token (struct stap_lexer *lexer) -{ - char c; - struct stap_token *result = new_stap_token (); - -retry: - c = *lexer->buffer.cur++; - - switch (c) - { - case ' ': - case '\t': - case '\n': - goto retry; - - case '=': - IF_NEXT_IS ('=', TTYPE_EQ_EQ, TTYPE_EQ); - break; - - case '!': - IF_NEXT_IS ('=', TTYPE_NOT_EQ, TTYPE_NOT); - break; - - case '>': - IF_NEXT_IS_ELSE_IF ('=', TTYPE_GREATER_EQ, - '>', TTYPE_RSHIFT, - TTYPE_GREATER); - break; - - case '<': - if (*lexer->buffer.cur == '=') - { - ++lexer->buffer.cur; - result->type = TTYPE_LESS_EQ; - } - else if (*lexer->buffer.cur == '>') - { - ++lexer->buffer.cur; - result->type = TTYPE_NOT_EQ2; - } - else if (*lexer->buffer.cur == '<') - { - ++lexer->buffer.cur; - result->type = TTYPE_LSHIFT; - } - else - result->type = TTYPE_LESS; - break; - - case '|': - IF_NEXT_IS ('|', TTYPE_OR_OR, TTYPE_OR); - break; - - case '&': - IF_NEXT_IS ('&', TTYPE_AND_AND, TTYPE_AND); - break; - - case '+': - result->type = TTYPE_PLUS; - break; - - case '-': - result->type = TTYPE_MINUS; - break; - - case '*': - result->type = TTYPE_MULT; - break; - - case '/': - result->type = TTYPE_DIV; - break; - - case '(': - result->type = TTYPE_OPEN_PAREN; - break; - - case ')': - result->type = TTYPE_CLOSE_PAREN; - break; - - case '[': - result->type = TTYPE_OPEN_SQUARE; - break; - - case ']': - result->type = TTYPE_CLOSE_SQUARE; - break; - - case '@': - result->type = TTYPE_AT_SIGN; - break; - - case 0: - --lexer->buffer.cur; - xfree (result); - result = EOF_token; - break; - - default: - xfree (result); - return NULL; - } - - return result; -} - -static void -stap_lex (struct stap_lexer *lexer) -{ - struct stap_token *token; - - do - { - token = stap_lex_one_token (lexer); - stap_lexer_push_token (lexer, token); - } - while (!stap_is_eof_token (token)); -} - -static struct stap_parser * -new_stap_parser (const char *start) -{ - struct stap_parser *parser - = (struct stap_parser *) xcalloc (1, sizeof (struct stap_parser)); - - parser->lexer - = (struct stap_lexer *) xcalloc (1, sizeof (struct stap_lexer)); - - parser->lexer->buffer.buf = start; - parser->lexer->buffer.cur = parser->lexer->buffer.buf; - - return parser; -} - -static void -free_stap_parser (void *parser_ptr) -{ - struct stap_parser *p = (struct stap_parser *) parser_ptr; - - /* FIXME: Free tokens. */ - - xfree (p->lexer); - xfree (p); -} - -static void -stap_dump_token_stream (const struct stap_lexer *lexer) -{ - if (!lexer->tokens) - printf ("EMPTY!!!!!\n"); - else - { - int i; - struct stap_token_list *l; - - for (i = 0, l = lexer->tokens; l; l = l->next) - { - struct stap_token *t = l->token; - - printf ("\t[%d] token = %s, value = \"%s\"\n", i++, - token_table_strings[(int) t->type], t->value); - } - } -} +#define STAP_MAX_ARGS 10 int stap_parse_probe_arguments (const char *args, int *n_args) { - struct stap_parser *parser; - struct cleanup *c; + struct stap_arg_info arg_info[STAP_MAX_ARGS]; + const char *cur = args; + int current_arg = -1; + enum + { + NEW_ARG, + OFFSET, + PARSE_ARG, + } current_state; *n_args = 0; - /* No arguments. */ if (!args || !*args) /* The parsing has not failed, so there is no need to return zero here. */ return 1; - parser = new_stap_parser (args); - c = make_cleanup (free_stap_parser, parser); - - stap_lex (parser->lexer); + /* Ok, let's start. */ + current_state = NEW_ARG; - stap_dump_token_stream (parser->lexer); - - /* FIXME: This will probably return an struct expression. */ -// stap_parse_expression (parser); - - if (!stap_is_eof_token (stap_lexer_peek_token (parser->lexer))) + while (*cur) { - /* Error. */ - /* FIXME: Maybe we should be more verbose at reporting error - situations. Printing a message would be good. complaint()?? */ - do_cleanups (c); - return 0; + switch (current_state) + { + case NEW_ARG: + ++current_arg; + if (current_arg >= STAP_MAX_ARGS) + /* More args than we can handle. */ + return 0; + current_state = OFFSET; + arg_info[current_arg].number = current_arg; + break; + + case OFFSET: + { + enum stap_arg_offset o; + int got_minus = 0; + + /* We expect to find something like: + + N@OP + + Where `N' can be [+,-][4,8]. This is not mandatory, so + we check it here. If we don't find it, go to the next + state. */ + if ((*cur == '-' && cur[2] != '@') + && cur[1] != '@') + { + current_state = PARSE_ARG; + break; + } + + if (*cur == '-') + { + /* Discard the `-'. */ + ++cur; + got_minus = 1; + } + + if (*cur == '4') + o = got_minus ? STAP_ARG_OFFSET_32BIT_SIGNED + : STAP_ARG_OFFSET_32BIT_UNSIGNED; + else if (*cur == '8') + o = got_minus ? STAP_ARG_OFFSET_64BIT_SIGNED + : STAP_ARG_OFFSET_64BIT_UNSIGNED; + else + /* We have an error, because we don't expect anything + except 4 and 8. */ + return 1; + + arg_info[current_arg].offset = o; + /* Discard the number and the `@' sign. */ + cur += 2; + /* Move on. */ + current_state = PARSE_ARG; + } + break; + + case PARSE_ARG: + /* FIXME THIS IS JUST FOR TESTING */ + while (*cur++ != ' '); + /* Start it over again. */ + current_state = NEW_ARG; + break; + } } + *n_args = current_arg + 1; + return 1; } - -/* End of the parser code. */ - - static void ep_skip_leading_whitespace (char **s) hooks/post-receive -- Repository for Project Archer.