[SCM] archer-sergiodj-stap: Re-doing the stap argument parser.

public inbox for archer-commits@sourceware.org
help / color / mirror / Atom feed

* [SCM]  archer-sergiodj-stap: Re-doing the stap argument parser.
@ 2011-02-28  6:45 sergiodj
  0 siblings, 0 replies; only message in thread
From: sergiodj @ 2011-02-28  6:45 UTC (permalink / raw)
  To: archer-commits

The branch, archer-sergiodj-stap has been updated
       via  4483ce291bed02f60661284b4eeed2757d417a0f (commit)
      from  87b36e08466e835177114be74445d7ca94d3072c (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email.

- Log -----------------------------------------------------------------
commit 4483ce291bed02f60661284b4eeed2757d417a0f
Author: Sergio Durigan Junior <sergiodj@redhat.com>
Date:   Mon Feb 28 03:43:38 2011 -0300

    Re-doing the stap argument parser.
    
    I have decided that the "recursive-descent parser" approach was not so
    good after all, so I started to code a new parser which is simpler than
    that, based on state machines.  Still need to code the argument parser
    itself, but it already deals with the `N@' part of the string.

-----------------------------------------------------------------------

Summary of changes:
 gdb/stap-probe.c |  467 ++++++++++-------------------------------------------
 1 files changed, 88 insertions(+), 379 deletions(-)

First 500 lines of diff:
diff --git a/gdb/stap-probe.c b/gdb/stap-probe.c
index f05d859..d21a138 100644
--- a/gdb/stap-probe.c
+++ b/gdb/stap-probe.c
@@ -33,408 +33,120 @@
 
 #include <ctype.h>
 
-
-/* Beginning of the parser code.
-
-   Systemtap probes can have argument string (in the form of assembly
-   code) "attached" to them.  In order to parser this string, we created
-   a recursive-descent parser based on GCC's parser.  */
-
-/* Lexer token table.
-   OP(name, string description): operator tokens
-   TK(name, string description): parser tokens.  */
-#define TOKEN_TABLE		    \
-  OP(EQ,		"=")	    \
-  OP(NOT,		"!")	    \
-  OP(GREATER,		">")	    \
-  OP(LESS,		"<")	    \
-  OP(GREATER_EQ,	">=")	    \
-  OP(LESS_EQ,		"<=")	    \
-  OP(EQ_EQ,		"==")	    \
-  OP(NOT_EQ,		"!=")	    \
-  OP(NOT_EQ2,		"<>")	    \
-  OP(OR,		"|")	    \
-  OP(AND,		"&")	    \
-  OP(OR_OR,		"||")	    \
-  OP(AND_AND,		"&&")	    \
-  OP(RSHIFT,		">>")	    \
-  OP(LSHIFT,		"<<")	    \
-  OP(PLUS,		"+")	    \
-  OP(MINUS,		"-")	    \
-  OP(MULT,		"*")	    \
-  OP(DIV,		"/")	    \
-  OP(MOD,		"%")	    \
-  OP(OPEN_PAREN,	"(")	    \
-  OP(CLOSE_PAREN,	")")	    \
-  OP(OPEN_SQUARE,	"[")	    \
-  OP(CLOSE_SQUARE,	"]")	    \
-  OP(AT_SIGN,		"@")	    \
-    TK(EOF,		NONE)
-
-#define OP(e, s) TTYPE_ ## e,
-#define TK(e, s) TTYPE_ ## e,
-
-typedef enum
-{
-  TOKEN_TABLE
-  N_TOKEN_TYPES
-} token_type;
-
-#undef OP
-#undef TK
-
-#define OP(e, s) "TTYPE_" # e,
-#define TK(e, s) "TTYPE_" # e,
-
-static const char *token_table_strings[(int) N_TOKEN_TYPES] =
-  {
-    TOKEN_TABLE
-  };
-
-#undef OP
-#undef TK
-
-static struct stap_token *EOF_token;
-
-typedef unsigned char token_value;
-
-struct stap_token
-{
-  token_type type;
-  token_value *value;
-};
-
-struct stap_token_list
-{
-  struct stap_token *token;
-  struct stap_token_list *next;
-};
-
-struct stap_parser_buffer
+enum stap_arg_offset
 {
-  /* The actual buffer holding the input.  */
-  const char *buf;
-
-  /* A pointer to the current lexer location.  */
-  const char *cur;
+  STAP_ARG_OFFSET_32BIT_UNSIGNED,
+  STAP_ARG_OFFSET_32BIT_SIGNED,
+  STAP_ARG_OFFSET_64BIT_UNSIGNED,
+  STAP_ARG_OFFSET_64BIT_SIGNED,
 };
 
-struct stap_lexer
+struct stap_arg_info
 {
-  /* Buffer used for lexing.  */
-  struct stap_parser_buffer buffer;
+  /* The number of this argument.  */
+  int number;
 
-  /* The token stream.  */
-  struct stap_token_list *tokens;
-  struct stap_token_list *head;
+  enum stap_arg_offset offset;
+  /* FIXME Add more fields to this struct.  */
 };
 
-struct stap_parser
-{
-  /* The lexer.  */
-  struct stap_lexer *lexer;
-};
-
-static struct stap_token *
-new_stap_token (void)
-{
-  struct stap_token *t
-    = (struct stap_token *) xcalloc (1, sizeof (struct stap_token));
-
-  return t;
-}
-
-static void
-stap_lexer_push_token (struct stap_lexer *lexer, struct stap_token *token)
-{
-  struct stap_token_list *new
-    = (struct stap_token_list *) xmalloc (sizeof (struct stap_token_list));
-
-  new->token = token;
-  new->next = NULL;
-
-  if (lexer->tokens == NULL)
-    {
-      lexer->tokens = new;
-      lexer->head = new;
-    }
-  else
-    {
-      struct stap_token_list *l;
-
-      for (l = lexer->tokens; l->next != NULL; l = l->next);
-
-      l->next = new;
-    }
-}
-
-static struct stap_token *
-stap_lexer_peek_token (const struct stap_lexer *lexer)
-{
-  if (lexer->tokens == NULL)
-    return EOF_token;
-
-  return lexer->tokens->token;
-}
-
-#if 0
-static struct stap_token *
-stap_lexer_consume_token (struct stap_lexer *lexer)
-{
-  struct stap_token_list *h = lexer->tokens;
-
-  lexer->tokens = h->next;
-
-  return h->token;
-}
-
-static int
-stap_lexer_next_token_is (struct stap_lexer *lexer, token_type type)
-{
-  return stap_lexer_peek_token (lexer)->type == type;
-}
-#endif
-
-static int
-stap_is_eof_token (struct stap_token *token)
-{
-  return token == EOF_token;
-}
-
-#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)	\
-  do {						\
-      result->type = ELSE_TYPE;			\
-      if (*lexer->buffer.cur == CHAR)		\
-	{					\
-	  ++lexer->buffer.cur;			\
-	  result->type = ELSE_TYPE;		\
-	}					\
-  } while (0)
-
-#define IF_NEXT_IS_ELSE_IF(CHAR_THEN, THEN_TYPE,	\
-			   CHAR_ELSE_IF, ELSE_IF_TYPE,	\
-			   ELSE_TYPE)			\
-  do {							\
-      result->type = ELSE_TYPE;				\
-      if (*lexer->buffer.cur == CHAR_THEN)		\
-	{						\
-	  ++lexer->buffer.cur;				\
-	  result->type = THEN_TYPE;			\
-	}						\
-      else if (*lexer->buffer.cur == CHAR_ELSE_IF)	\
-	{						\
-	  ++lexer->buffer.cur;				\
-	  result->type = ELSE_IF_TYPE;			\
-	}						\
-  } while (0)
-
-static struct stap_token *
-stap_lex_one_token (struct stap_lexer *lexer)
-{
-  char c;
-  struct stap_token *result = new_stap_token ();
-
-retry:
-  c = *lexer->buffer.cur++;
-
-  switch (c)
-    {
-    case ' ':
-    case '\t':
-    case '\n':
-      goto retry;
-
-    case '=':
-      IF_NEXT_IS ('=', TTYPE_EQ_EQ, TTYPE_EQ);
-      break;
-
-    case '!':
-      IF_NEXT_IS ('=', TTYPE_NOT_EQ, TTYPE_NOT);
-      break;
-
-    case '>':
-      IF_NEXT_IS_ELSE_IF ('=', TTYPE_GREATER_EQ,
-			  '>', TTYPE_RSHIFT,
-			  TTYPE_GREATER);
-      break;
-
-    case '<':
-      if (*lexer->buffer.cur == '=')
-	{
-	  ++lexer->buffer.cur;
-	  result->type = TTYPE_LESS_EQ;
-	}
-      else if (*lexer->buffer.cur == '>')
-	{
-	  ++lexer->buffer.cur;
-	  result->type = TTYPE_NOT_EQ2;
-	}
-      else if (*lexer->buffer.cur == '<')
-	{
-	  ++lexer->buffer.cur;
-	  result->type = TTYPE_LSHIFT;
-	}
-      else
-	result->type = TTYPE_LESS;
-      break;
-
-    case '|':
-      IF_NEXT_IS ('|', TTYPE_OR_OR, TTYPE_OR);
-      break;
-
-    case '&':
-      IF_NEXT_IS ('&', TTYPE_AND_AND, TTYPE_AND);
-      break;
-
-    case '+':
-      result->type = TTYPE_PLUS;
-      break;
-
-    case '-':
-      result->type = TTYPE_MINUS;
-      break;
-
-    case '*':
-      result->type = TTYPE_MULT;
-      break;
-
-    case '/':
-      result->type = TTYPE_DIV;
-      break;
-
-    case '(':
-      result->type = TTYPE_OPEN_PAREN;
-      break;
-
-    case ')':
-      result->type = TTYPE_CLOSE_PAREN;
-      break;
-
-    case '[':
-      result->type = TTYPE_OPEN_SQUARE;
-      break;
-
-    case ']':
-      result->type = TTYPE_CLOSE_SQUARE;
-      break;
-
-    case '@':
-      result->type = TTYPE_AT_SIGN;
-      break;
-
-    case 0:
-      --lexer->buffer.cur;
-      xfree (result);
-      result = EOF_token;
-      break;
-
-    default:
-      xfree (result);
-      return NULL;
-    }
-
-  return result;
-}
-
-static void
-stap_lex (struct stap_lexer *lexer)
-{
-  struct stap_token *token;
-
-  do
-    {
-      token = stap_lex_one_token (lexer);
-      stap_lexer_push_token (lexer, token);
-    }
-  while (!stap_is_eof_token (token));
-}
-
-static struct stap_parser *
-new_stap_parser (const char *start)
-{
-  struct stap_parser *parser
-    = (struct stap_parser *) xcalloc (1, sizeof (struct stap_parser));
-
-  parser->lexer
-    = (struct stap_lexer *) xcalloc (1, sizeof (struct stap_lexer));
-
-  parser->lexer->buffer.buf = start;
-  parser->lexer->buffer.cur = parser->lexer->buffer.buf;
-
-  return parser;
-}
-
-static void
-free_stap_parser (void *parser_ptr)
-{
-  struct stap_parser *p = (struct stap_parser *) parser_ptr;
-
-  /* FIXME: Free tokens.  */
-
-  xfree (p->lexer);
-  xfree (p);
-}
-
-static void
-stap_dump_token_stream (const struct stap_lexer *lexer)
-{
-  if (!lexer->tokens)
-    printf ("EMPTY!!!!!\n");
-  else
-    {
-      int i;
-      struct stap_token_list *l;
-
-      for (i = 0, l = lexer->tokens; l; l = l->next)
-	{
-	  struct stap_token *t = l->token;
-
-	  printf ("\t[%d] token = %s, value = \"%s\"\n", i++,
-		  token_table_strings[(int) t->type], t->value);
-	}
-    }
-}
+#define STAP_MAX_ARGS 10
 
 int
 stap_parse_probe_arguments (const char *args, int *n_args)
 {
-  struct stap_parser *parser;
-  struct cleanup *c;
+  struct stap_arg_info arg_info[STAP_MAX_ARGS];
+  const char *cur = args;
+  int current_arg = -1;
+  enum
+    {
+      NEW_ARG,
+      OFFSET,
+      PARSE_ARG,
+    } current_state;
 
   *n_args = 0;
 
-  /* No arguments.  */
   if (!args || !*args)
     /* The parsing has not failed, so there is no need to return
        zero here.  */
     return 1;
 
-  parser = new_stap_parser (args);
-  c = make_cleanup (free_stap_parser, parser);
-
-  stap_lex (parser->lexer);
+  /* Ok, let's start.  */
+  current_state = NEW_ARG;
 
-  stap_dump_token_stream (parser->lexer);
-
-  /* FIXME: This will probably return an struct expression.  */
-//  stap_parse_expression (parser);
-
-  if (!stap_is_eof_token (stap_lexer_peek_token (parser->lexer)))
+  while (*cur)
     {
-      /* Error.  */
-      /* FIXME: Maybe we should be more verbose at reporting error
-	 situations.  Printing a message would be good.  complaint()??  */
-      do_cleanups (c);
-      return 0;
+      switch (current_state)
+	{
+	case NEW_ARG:
+	  ++current_arg;
+	  if (current_arg >= STAP_MAX_ARGS)
+	    /* More args than we can handle.  */
+	    return 0;
+	  current_state = OFFSET;
+	  arg_info[current_arg].number = current_arg;
+	  break;
+
+	case OFFSET:
+	    {
+	      enum stap_arg_offset o;
+	      int got_minus = 0;
+
+	      /* We expect to find something like:
+
+		 N@OP
+
+		 Where `N' can be [+,-][4,8].  This is not mandatory, so
+		 we check it here.  If we don't find it, go to the next
+		 state.  */
+	      if ((*cur == '-' && cur[2] != '@')
+		  && cur[1] != '@')
+		{
+		  current_state = PARSE_ARG;
+		  break;
+		}
+
+	      if (*cur == '-')
+		{
+		  /* Discard the `-'.  */
+		  ++cur;
+		  got_minus = 1;
+		}
+
+	      if (*cur == '4')
+		o = got_minus ? STAP_ARG_OFFSET_32BIT_SIGNED
+		  : STAP_ARG_OFFSET_32BIT_UNSIGNED;
+	      else if (*cur == '8')
+		o = got_minus ? STAP_ARG_OFFSET_64BIT_SIGNED
+		  : STAP_ARG_OFFSET_64BIT_UNSIGNED;
+	      else
+		/* We have an error, because we don't expect anything
+		   except 4 and 8.  */
+		return 1;
+
+	      arg_info[current_arg].offset = o;
+	      /* Discard the number and the `@' sign.  */
+	      cur += 2;
+	      /* Move on.  */
+	      current_state = PARSE_ARG;
+	    }
+	  break;
+
+	case PARSE_ARG:
+	  /* FIXME THIS IS JUST FOR TESTING  */
+	  while (*cur++ != ' ');
+	  /* Start it over again.  */
+	  current_state = NEW_ARG;
+	  break;
+	}
     }
 
+  *n_args = current_arg + 1;
+
   return 1;
 }
 
-
-/* End of the parser code.  */
-
-
 static void
 ep_skip_leading_whitespace (char **s)


hooks/post-receive
--
Repository for Project Archer.


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2011-02-28  6:45 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-02-28  6:45 [SCM] archer-sergiodj-stap: Re-doing the stap argument parser sergiodj

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).