public inbox for archer-commits@sourceware.org
help / color / mirror / Atom feed
* [SCM] archer-sergiodj-stap: Re-doing the stap argument parser.
@ 2011-02-28 6:45 sergiodj
0 siblings, 0 replies; only message in thread
From: sergiodj @ 2011-02-28 6:45 UTC (permalink / raw)
To: archer-commits
The branch, archer-sergiodj-stap has been updated
via 4483ce291bed02f60661284b4eeed2757d417a0f (commit)
from 87b36e08466e835177114be74445d7ca94d3072c (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email.
- Log -----------------------------------------------------------------
commit 4483ce291bed02f60661284b4eeed2757d417a0f
Author: Sergio Durigan Junior <sergiodj@redhat.com>
Date: Mon Feb 28 03:43:38 2011 -0300
Re-doing the stap argument parser.
I have decided that the "recursive-descent parser" approach was not so
good after all, so I started to code a new parser which is simpler than
that, based on state machines. Still need to code the argument parser
itself, but it already deals with the `N@' part of the string.
-----------------------------------------------------------------------
Summary of changes:
gdb/stap-probe.c | 467 ++++++++++-------------------------------------------
1 files changed, 88 insertions(+), 379 deletions(-)
First 500 lines of diff:
diff --git a/gdb/stap-probe.c b/gdb/stap-probe.c
index f05d859..d21a138 100644
--- a/gdb/stap-probe.c
+++ b/gdb/stap-probe.c
@@ -33,408 +33,120 @@
#include <ctype.h>
-
-/* Beginning of the parser code.
-
- Systemtap probes can have argument string (in the form of assembly
- code) "attached" to them. In order to parser this string, we created
- a recursive-descent parser based on GCC's parser. */
-
-/* Lexer token table.
- OP(name, string description): operator tokens
- TK(name, string description): parser tokens. */
-#define TOKEN_TABLE \
- OP(EQ, "=") \
- OP(NOT, "!") \
- OP(GREATER, ">") \
- OP(LESS, "<") \
- OP(GREATER_EQ, ">=") \
- OP(LESS_EQ, "<=") \
- OP(EQ_EQ, "==") \
- OP(NOT_EQ, "!=") \
- OP(NOT_EQ2, "<>") \
- OP(OR, "|") \
- OP(AND, "&") \
- OP(OR_OR, "||") \
- OP(AND_AND, "&&") \
- OP(RSHIFT, ">>") \
- OP(LSHIFT, "<<") \
- OP(PLUS, "+") \
- OP(MINUS, "-") \
- OP(MULT, "*") \
- OP(DIV, "/") \
- OP(MOD, "%") \
- OP(OPEN_PAREN, "(") \
- OP(CLOSE_PAREN, ")") \
- OP(OPEN_SQUARE, "[") \
- OP(CLOSE_SQUARE, "]") \
- OP(AT_SIGN, "@") \
- TK(EOF, NONE)
-
-#define OP(e, s) TTYPE_ ## e,
-#define TK(e, s) TTYPE_ ## e,
-
-typedef enum
-{
- TOKEN_TABLE
- N_TOKEN_TYPES
-} token_type;
-
-#undef OP
-#undef TK
-
-#define OP(e, s) "TTYPE_" # e,
-#define TK(e, s) "TTYPE_" # e,
-
-static const char *token_table_strings[(int) N_TOKEN_TYPES] =
- {
- TOKEN_TABLE
- };
-
-#undef OP
-#undef TK
-
-static struct stap_token *EOF_token;
-
-typedef unsigned char token_value;
-
-struct stap_token
-{
- token_type type;
- token_value *value;
-};
-
-struct stap_token_list
-{
- struct stap_token *token;
- struct stap_token_list *next;
-};
-
-struct stap_parser_buffer
+enum stap_arg_offset
{
- /* The actual buffer holding the input. */
- const char *buf;
-
- /* A pointer to the current lexer location. */
- const char *cur;
+ STAP_ARG_OFFSET_32BIT_UNSIGNED,
+ STAP_ARG_OFFSET_32BIT_SIGNED,
+ STAP_ARG_OFFSET_64BIT_UNSIGNED,
+ STAP_ARG_OFFSET_64BIT_SIGNED,
};
-struct stap_lexer
+struct stap_arg_info
{
- /* Buffer used for lexing. */
- struct stap_parser_buffer buffer;
+ /* The number of this argument. */
+ int number;
- /* The token stream. */
- struct stap_token_list *tokens;
- struct stap_token_list *head;
+ enum stap_arg_offset offset;
+ /* FIXME Add more fields to this struct. */
};
-struct stap_parser
-{
- /* The lexer. */
- struct stap_lexer *lexer;
-};
-
-static struct stap_token *
-new_stap_token (void)
-{
- struct stap_token *t
- = (struct stap_token *) xcalloc (1, sizeof (struct stap_token));
-
- return t;
-}
-
-static void
-stap_lexer_push_token (struct stap_lexer *lexer, struct stap_token *token)
-{
- struct stap_token_list *new
- = (struct stap_token_list *) xmalloc (sizeof (struct stap_token_list));
-
- new->token = token;
- new->next = NULL;
-
- if (lexer->tokens == NULL)
- {
- lexer->tokens = new;
- lexer->head = new;
- }
- else
- {
- struct stap_token_list *l;
-
- for (l = lexer->tokens; l->next != NULL; l = l->next);
-
- l->next = new;
- }
-}
-
-static struct stap_token *
-stap_lexer_peek_token (const struct stap_lexer *lexer)
-{
- if (lexer->tokens == NULL)
- return EOF_token;
-
- return lexer->tokens->token;
-}
-
-#if 0
-static struct stap_token *
-stap_lexer_consume_token (struct stap_lexer *lexer)
-{
- struct stap_token_list *h = lexer->tokens;
-
- lexer->tokens = h->next;
-
- return h->token;
-}
-
-static int
-stap_lexer_next_token_is (struct stap_lexer *lexer, token_type type)
-{
- return stap_lexer_peek_token (lexer)->type == type;
-}
-#endif
-
-static int
-stap_is_eof_token (struct stap_token *token)
-{
- return token == EOF_token;
-}
-
-#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
- do { \
- result->type = ELSE_TYPE; \
- if (*lexer->buffer.cur == CHAR) \
- { \
- ++lexer->buffer.cur; \
- result->type = ELSE_TYPE; \
- } \
- } while (0)
-
-#define IF_NEXT_IS_ELSE_IF(CHAR_THEN, THEN_TYPE, \
- CHAR_ELSE_IF, ELSE_IF_TYPE, \
- ELSE_TYPE) \
- do { \
- result->type = ELSE_TYPE; \
- if (*lexer->buffer.cur == CHAR_THEN) \
- { \
- ++lexer->buffer.cur; \
- result->type = THEN_TYPE; \
- } \
- else if (*lexer->buffer.cur == CHAR_ELSE_IF) \
- { \
- ++lexer->buffer.cur; \
- result->type = ELSE_IF_TYPE; \
- } \
- } while (0)
-
-static struct stap_token *
-stap_lex_one_token (struct stap_lexer *lexer)
-{
- char c;
- struct stap_token *result = new_stap_token ();
-
-retry:
- c = *lexer->buffer.cur++;
-
- switch (c)
- {
- case ' ':
- case '\t':
- case '\n':
- goto retry;
-
- case '=':
- IF_NEXT_IS ('=', TTYPE_EQ_EQ, TTYPE_EQ);
- break;
-
- case '!':
- IF_NEXT_IS ('=', TTYPE_NOT_EQ, TTYPE_NOT);
- break;
-
- case '>':
- IF_NEXT_IS_ELSE_IF ('=', TTYPE_GREATER_EQ,
- '>', TTYPE_RSHIFT,
- TTYPE_GREATER);
- break;
-
- case '<':
- if (*lexer->buffer.cur == '=')
- {
- ++lexer->buffer.cur;
- result->type = TTYPE_LESS_EQ;
- }
- else if (*lexer->buffer.cur == '>')
- {
- ++lexer->buffer.cur;
- result->type = TTYPE_NOT_EQ2;
- }
- else if (*lexer->buffer.cur == '<')
- {
- ++lexer->buffer.cur;
- result->type = TTYPE_LSHIFT;
- }
- else
- result->type = TTYPE_LESS;
- break;
-
- case '|':
- IF_NEXT_IS ('|', TTYPE_OR_OR, TTYPE_OR);
- break;
-
- case '&':
- IF_NEXT_IS ('&', TTYPE_AND_AND, TTYPE_AND);
- break;
-
- case '+':
- result->type = TTYPE_PLUS;
- break;
-
- case '-':
- result->type = TTYPE_MINUS;
- break;
-
- case '*':
- result->type = TTYPE_MULT;
- break;
-
- case '/':
- result->type = TTYPE_DIV;
- break;
-
- case '(':
- result->type = TTYPE_OPEN_PAREN;
- break;
-
- case ')':
- result->type = TTYPE_CLOSE_PAREN;
- break;
-
- case '[':
- result->type = TTYPE_OPEN_SQUARE;
- break;
-
- case ']':
- result->type = TTYPE_CLOSE_SQUARE;
- break;
-
- case '@':
- result->type = TTYPE_AT_SIGN;
- break;
-
- case 0:
- --lexer->buffer.cur;
- xfree (result);
- result = EOF_token;
- break;
-
- default:
- xfree (result);
- return NULL;
- }
-
- return result;
-}
-
-static void
-stap_lex (struct stap_lexer *lexer)
-{
- struct stap_token *token;
-
- do
- {
- token = stap_lex_one_token (lexer);
- stap_lexer_push_token (lexer, token);
- }
- while (!stap_is_eof_token (token));
-}
-
-static struct stap_parser *
-new_stap_parser (const char *start)
-{
- struct stap_parser *parser
- = (struct stap_parser *) xcalloc (1, sizeof (struct stap_parser));
-
- parser->lexer
- = (struct stap_lexer *) xcalloc (1, sizeof (struct stap_lexer));
-
- parser->lexer->buffer.buf = start;
- parser->lexer->buffer.cur = parser->lexer->buffer.buf;
-
- return parser;
-}
-
-static void
-free_stap_parser (void *parser_ptr)
-{
- struct stap_parser *p = (struct stap_parser *) parser_ptr;
-
- /* FIXME: Free tokens. */
-
- xfree (p->lexer);
- xfree (p);
-}
-
-static void
-stap_dump_token_stream (const struct stap_lexer *lexer)
-{
- if (!lexer->tokens)
- printf ("EMPTY!!!!!\n");
- else
- {
- int i;
- struct stap_token_list *l;
-
- for (i = 0, l = lexer->tokens; l; l = l->next)
- {
- struct stap_token *t = l->token;
-
- printf ("\t[%d] token = %s, value = \"%s\"\n", i++,
- token_table_strings[(int) t->type], t->value);
- }
- }
-}
+#define STAP_MAX_ARGS 10
int
stap_parse_probe_arguments (const char *args, int *n_args)
{
- struct stap_parser *parser;
- struct cleanup *c;
+ struct stap_arg_info arg_info[STAP_MAX_ARGS];
+ const char *cur = args;
+ int current_arg = -1;
+ enum
+ {
+ NEW_ARG,
+ OFFSET,
+ PARSE_ARG,
+ } current_state;
*n_args = 0;
- /* No arguments. */
if (!args || !*args)
/* The parsing has not failed, so there is no need to return
zero here. */
return 1;
- parser = new_stap_parser (args);
- c = make_cleanup (free_stap_parser, parser);
-
- stap_lex (parser->lexer);
+ /* Ok, let's start. */
+ current_state = NEW_ARG;
- stap_dump_token_stream (parser->lexer);
-
- /* FIXME: This will probably return an struct expression. */
-// stap_parse_expression (parser);
-
- if (!stap_is_eof_token (stap_lexer_peek_token (parser->lexer)))
+ while (*cur)
{
- /* Error. */
- /* FIXME: Maybe we should be more verbose at reporting error
- situations. Printing a message would be good. complaint()?? */
- do_cleanups (c);
- return 0;
+ switch (current_state)
+ {
+ case NEW_ARG:
+ ++current_arg;
+ if (current_arg >= STAP_MAX_ARGS)
+ /* More args than we can handle. */
+ return 0;
+ current_state = OFFSET;
+ arg_info[current_arg].number = current_arg;
+ break;
+
+ case OFFSET:
+ {
+ enum stap_arg_offset o;
+ int got_minus = 0;
+
+ /* We expect to find something like:
+
+ N@OP
+
+ Where `N' can be [+,-][4,8]. This is not mandatory, so
+ we check it here. If we don't find it, go to the next
+ state. */
+ if ((*cur == '-' && cur[2] != '@')
+ && cur[1] != '@')
+ {
+ current_state = PARSE_ARG;
+ break;
+ }
+
+ if (*cur == '-')
+ {
+ /* Discard the `-'. */
+ ++cur;
+ got_minus = 1;
+ }
+
+ if (*cur == '4')
+ o = got_minus ? STAP_ARG_OFFSET_32BIT_SIGNED
+ : STAP_ARG_OFFSET_32BIT_UNSIGNED;
+ else if (*cur == '8')
+ o = got_minus ? STAP_ARG_OFFSET_64BIT_SIGNED
+ : STAP_ARG_OFFSET_64BIT_UNSIGNED;
+ else
+ /* We have an error, because we don't expect anything
+ except 4 and 8. */
+ return 1;
+
+ arg_info[current_arg].offset = o;
+ /* Discard the number and the `@' sign. */
+ cur += 2;
+ /* Move on. */
+ current_state = PARSE_ARG;
+ }
+ break;
+
+ case PARSE_ARG:
+ /* FIXME THIS IS JUST FOR TESTING */
+ while (*cur++ != ' ');
+ /* Start it over again. */
+ current_state = NEW_ARG;
+ break;
+ }
}
+ *n_args = current_arg + 1;
+
return 1;
}
-
-/* End of the parser code. */
-
-
static void
ep_skip_leading_whitespace (char **s)
hooks/post-receive
--
Repository for Project Archer.
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2011-02-28 6:45 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-02-28 6:45 [SCM] archer-sergiodj-stap: Re-doing the stap argument parser sergiodj
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).