public inbox for frysk-cvs@sourceware.org
help / color / mirror / Atom feed
* [SCM]  master: Introduce options FQ identifier parser
@ 2008-06-09 14:37 pmachata
  0 siblings, 0 replies; only message in thread
From: pmachata @ 2008-06-09 14:37 UTC (permalink / raw)
  To: frysk-cvs

The branch, master has been updated
       via  e72cddb17cbe725315d59b04682073d5d865efb0 (commit)
       via  054f5813c28e637c4fc79c289c2e2111b31b46ab (commit)
      from  4baf67699b3de5a88098a8eec820593ae4d153f2 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email.

- Log -----------------------------------------------------------------
commit e72cddb17cbe725315d59b04682073d5d865efb0
Author: Petr Machata <pmachata@redhat.com>
Date:   Mon Jun 9 16:33:16 2008 +0200

    Introduce options FQ identifier parser

commit 054f5813c28e637c4fc79c289c2e2111b31b46ab
Author: Petr Machata <pmachata@redhat.com>
Date:   Mon Jun 9 14:47:34 2008 +0200

    Move FQ identifier parser to the class of its own

-----------------------------------------------------------------------

Summary of changes:
 frysk-core/frysk/bindir/ftrace.java      |    8 +-
 frysk-core/frysk/expr/CExpr.g            |  283 +---------------------
 frysk-core/frysk/expr/ChangeLog          |   10 +
 frysk-core/frysk/expr/FQIdentParser.java |  394 ++++++++++++++++++++++++++++++
 4 files changed, 414 insertions(+), 281 deletions(-)
 create mode 100644 frysk-core/frysk/expr/FQIdentParser.java

First 500 lines of diff:
diff --git a/frysk-core/frysk/bindir/ftrace.java b/frysk-core/frysk/bindir/ftrace.java
index 1ac4f89..86cb8fc 100644
--- a/frysk-core/frysk/bindir/ftrace.java
+++ b/frysk-core/frysk/bindir/ftrace.java
@@ -53,7 +53,7 @@ import gnu.classpath.tools.getopt.OptionGroup;
 import inua.util.PrintWriter;
 
 import frysk.debuginfo.PrintStackOptions;
-import frysk.expr.CExprLexer;
+import frysk.expr.FQIdentParser;
 import frysk.expr.FQIdentifier;
 import frysk.ftrace.AddrRule;
 import frysk.ftrace.Ftrace;
@@ -149,14 +149,14 @@ class ftrace {
 				 RuleOptions options, Collection rules) {
 
 		    try {
-			FQIdentifier fqid = CExprLexer.parseFQIdentifier(str);
+			FQIdentifier fqid = FQIdentParser.parseFtraceIdentifier(str);
 			rules.add(new SymbolRule(addition, options, fqid));
 		    }
-		    catch (CExprLexer.FQIdentExtraGarbageException exc) {
+		    catch (FQIdentParser.ExtraGarbageException exc) {
 			warning.log("Ignoring garbage after the end of the symbol rule",
 				    exc.getMessage());
 		    }
-		    catch (CExprLexer.FQIdentInvalidTokenException exc) {
+		    catch (FQIdentParser.InvalidTokenException exc) {
 			warning.log("Invalid symbol rule", exc.getMessage());
 		    }
 		}
diff --git a/frysk-core/frysk/expr/CExpr.g b/frysk-core/frysk/expr/CExpr.g
index 587dca2..3ea45b6 100644
--- a/frysk-core/frysk/expr/CExpr.g
+++ b/frysk-core/frysk/expr/CExpr.g
@@ -1,6 +1,6 @@
 // This file is part of the program FRYSK.
 //
-// Copyright 2005, 2007 Red Hat Inc.
+// Copyright 2005, 2007, 2008 Red Hat Inc.
 //
 // FRYSK is free software; you can redistribute it and/or modify it
 // under the terms of the GNU General Public License as published by
@@ -79,10 +79,6 @@ header
 // version and license this file solely under the GPL without
 // exception.
     package frysk.expr;
-
-    import java.util.regex.Pattern;
-    import java.util.regex.Matcher;
-    import java.io.StringReader;
 }
 
 class CExprParser extends Parser;
@@ -419,272 +415,8 @@ tokens
 }
 
 {
-    private String fqinit;
-
-    private char fqLA(int i) throws CharStreamException {
-        if (i >= fqinit.length())
-            return LA(i - fqinit.length() + 1);
-        else
-            return fqinit.charAt(i);
-    }
-
-    private void fqmatch(String s) throws MismatchedCharException, CharStreamException {
-        while (fqinit.length() > 0) {
-            char c = s.charAt(0);
-            char d = fqinit.charAt(0);
-            if (c != d)
-                throw new MismatchedCharException(d, c, false, this);
-            s = s.substring(1);
-            fqinit = fqinit.substring(1);
-        }
-        super.match(s);
-    }
-
-    private Token parseFQIdentifier()
-        throws RecognitionException, CharStreamException, TokenStreamException
-    {
-
-        /*
-         * Funky HPD #-syntax doesn't map very well to LL-k type parser (for
-         * constant 'k').  When written directly, we get lots of lexical
-         * ambiguities.  We work around that by doing arbitrary manual
-         * look-ahead and just parsing the tokens ourselves.  Any whitespace
-         * or EOF stops the lookahead.
-         */
-
-        String matched = "";
-        String part = "";
-
-        String partDso = null;
-        String partFile = null;
-        String partProc = null;
-        String partLine = null;
-        String partProcessId = null;
-        String partThreadId = null;
-        String partFrameNum = null;
-
-        int i = 0;
-        char c;
-
-        // Automaton state is composed of following sub-states:
-        final int FILE = 1;
-        final int LINE = 2;
-        final int SYMB = 4;
-        int allowed = LINE | SYMB;
-
-        if ((c = fqLA(0)) == '#'
-            || c == '[') {
-
-            char term = (c == '[') ? ']' : '#';
-            String context = (c == '[') ? "dynamic context" : "DSO part";
-
-            matched += c;
-            i++;
-            while (true) {
-                c = fqLA(i++);
-                matched += c;
-                if (Character.isWhitespace(c) || c == EOF_CHAR)
-                    throw new RecognitionException("Nonterminated " + context
-                                                   + " `" + matched
-                                                   + "' in fully qualified notation.");
-                else if (c == term)
-                    break;
-                part += c;
-            }
-
-            if (part.length() == 0)
-                throw new RecognitionException("Empty " + context
-                                               + " `" + matched
-                                               + "' in fully qualified notation.");
-
-            if (term == ']') {
-                Matcher m = Pattern.compile("[0-9]+\\.[0-9]+#[0-9]+").matcher(part);
-                if (!m.matches())
-                    return null;
-
-                int hash = part.indexOf('#');
-                int dot = part.indexOf('.');
-                partProcessId = part.substring(0, dot);
-                partThreadId = part.substring(dot + 1, hash);
-                partFrameNum = part.substring(hash + 1);
-                part = "";
-                allowed = SYMB;
-
-            } else {
-                partDso = part;
-                part = "";
-            }
-
-        }
-
-        int state = allowed;
-
-        loop: while(true) {
-            c = fqLA(i++);
-            if (Character.isWhitespace(c) || c == EOF_CHAR)
-                break;
-
-            matched += c;
-            part += c;
-            switch (c) {
-                case '.': {
-                    state |= FILE;
-                    state &= ~SYMB;
-                    break;
-                }
-
-                case '#': {
-                    if (partLine == null && partProc == null
-                        && partProcessId == null) {
-
-                        if ((state & FILE) != 0 && partFile == null)
-                            partFile = part.substring(0, part.length() - 1);
-                        else if ((state & LINE) != 0)
-                            partLine = part.substring(0, part.length() - 1);
-                        else if ((state & SYMB) != 0) {
-                            partProc = part.substring(0, part.length() - 1);
-                            if (!Character.isJavaIdentifierStart(partProc.charAt(0)))
-                                throw new RecognitionException("Procedure part (`" + partProc + "') in fully "
-                                                               + "qualified notation has to be valid identifier.");
-                        } else
-                            // This # could belong to the next symbol.
-                            // Break out and try to match the initial sequence.
-                            break loop;
-                    } else
-                        throw new RecognitionException("Unexpected `#' after line or proc name was defined.");
-
-                    state = allowed & SYMB;
-                    if (partLine == null && partProc == null)
-                        state |= allowed & LINE;
-                    part = "";
-                    break;
-                }
-
-                default: {
-                    if (!(c >= '0' && c <= '9')) {
-                        state &= ~LINE;
-
-                        if (!(Character.isJavaIdentifierStart(c)
-                              || c == '@'
-                              || (c == ':' && part.length() == 4
-                                  && part.equals("plt:")))) {
-
-                            // Break out early if we are already
-                            // just waiting for symbol.
-                            if (partLine != null || partProc != null
-                                || partProcessId != null)
-                                break loop;
-                            else
-                                state &= ~SYMB;
-                        }
-                    }
-                }
-            }
-        }
-
-        // ((state & SYMB) == 0) here means that we've parsed more
-        // than a symbol name, in hope it would turn out to be a
-        // file name (e.g. hello-world.c#symbol as a symbol
-        // reference vs. hello-world.c as an expression involving
-        // subtraction and struct access).  In following, we take
-        // care not to consume anything that's not an identifier.
-        // E.g. when the user types "a+b", we want to match
-        // only identifier "a".
-
-        boolean wantPlt = false;
-        if (part.startsWith("plt:")) {
-            wantPlt = true;
-            part = part.substring(4);
-        }
-
-        int v = part.indexOf('@');
-        String version = null;
-        if (v >= 0) {
-            version = part.substring(v + 1);
-            part = part.substring(0, v);
-        }
-
-        // This is delibaretely simplified and ignores request for initial letter.
-        // This is for better error reporting below, we first snip off irrelevant
-        // parts before yelling at user that his identifier sucks.
-        Matcher m = Pattern.compile("[a-zA-Z0-9_$]+").matcher(part);
-        if (m.lookingAt()) {
-            int diff = part.length() - m.end();
-            if (diff > 0) {
-                matched = matched.substring(0, matched.length() - diff);
-                part = part.substring(0, m.end());
-            }
-        }
-        else
-            throw new RecognitionException("Expected symbol name, got `" + part + "'.");
-
-        if (!Character.isJavaIdentifierStart(part.charAt(0)))
-            throw new RecognitionException("Invalid symbol `" + part + "'.");
-
-        FQIdentToken tok = new FQIdentToken(IDENT, matched);
-        tok.dso = partDso;
-        tok.file = partFile;
-        tok.line = partLine;
-        tok.proc = partProc;
-        tok.symbol = part;
-        tok.version = version;
-        tok.wantPlt = wantPlt;
-        tok.processId = partProcessId;
-        tok.threadId = partThreadId;
-        tok.frameNumber = partFrameNum;
-        tok.setLine(getLine());
-
-        fqmatch(matched);
-        tok.setColumn(getColumn() - matched.length());
-
-        return tok;
-    }
-
-    public static class FQIdentException extends RuntimeException {
-        private static final long serialVersionUID = 1L;
-        public FQIdentException(String s) {
-            super(s);
-        }
-    }
-
-    public static class FQIdentExtraGarbageException extends FQIdentException {
-        private static final long serialVersionUID = 1L;
-        public FQIdentExtraGarbageException(String garbage) {
-            super(garbage);
-        }
-    }
-
-    public static class FQIdentInvalidTokenException extends FQIdentException {
-        private static final long serialVersionUID = 1L;
-        public FQIdentInvalidTokenException(String token) {
-            super(token);
-        }
-    }
-
-    public static FQIdentifier parseFQIdentifier(String str)
-        throws FQIdentExtraGarbageException, FQIdentInvalidTokenException
-    {
-        StringReader r = new StringReader(str);
-        CExprLexer lexer = new CExprLexer(r);
-        Token tok;
-
-        try {
-            tok = lexer.nextToken();
-
-            if (!(tok instanceof FQIdentToken))
-                throw new FQIdentInvalidTokenException(tok.getText());
-
-            FQIdentToken fqTok = (FQIdentToken)tok;
-
-            if ((tok = lexer.nextToken()).getType() != Token.EOF_TYPE)
-                throw new FQIdentExtraGarbageException(tok.getText());
-
-            return new FQIdentifier(fqTok);
-
-        } catch (antlr.TokenStreamException exc) {
-            throw new FQIdentInvalidTokenException(str);
-        }
-    }
+    final FQIdentParser fqIdParser
+        = new FQIdentParser(this, true, false, true);
 }
 
 AMPERSAND       : '&' ;
@@ -710,9 +442,8 @@ LESSTHAN        : "<" ;
 LESSTHANOREQUALTO     : "<=" ;
 LPAREN          : '('   ;
 LSQUARE         : '[' (('0'..'9') {
-                      fqinit = $getText;
                       try {
-                          Token tok = parseFQIdentifier();
+                          Token tok = fqIdParser.parse($getText);
                           if (tok != null) {
                               $setToken(tok);
                               $setType(IDENT);
@@ -752,8 +483,7 @@ ELLIPSIS  : "..." ;
 protected
 IDENT
     : ('$'|'#'|'a'..'z'|'A'..'Z'|'_') {
-          fqinit = $getText;
-          $setToken(parseFQIdentifier());
+          $setToken(fqIdParser.parse($getText));
       } ;
 
 /**
@@ -905,9 +635,8 @@ NUM
 			)?
 		|	(('1'..'9') ('0'..'9')* {_ttype = DECIMALINT;})
              ( '#' {
-                   fqinit = $getText;
                    $setType(IDENT);
-                   $setToken(parseFQIdentifier());
+                   $setToken(fqIdParser.parse($getText));
                } )?
 		)
 		(	('l'|'L') { _ttype = DECIMALINT; }
diff --git a/frysk-core/frysk/expr/ChangeLog b/frysk-core/frysk/expr/ChangeLog
index d07e970..9b25bfc 100644
--- a/frysk-core/frysk/expr/ChangeLog
+++ b/frysk-core/frysk/expr/ChangeLog
@@ -1,3 +1,13 @@
+2008-06-09  Petr Machata  <pmachata@redhat.com>
+
+	* FQIdentParser.java: Introduce parsing options.
+	(static parseFQIdentifier): Don't create the lexer at all.
+
+2008-06-09  Petr Machata  <pmachata@redhat.com>
+
+	* CExpr.g: Cut FQ identifier parser out...
+	* FQIdentParser.java: ... and more here.
+
 2008-06-06  Andrew Cagney  <cagney@redhat.com>
 
 	* ExprSearchEngine.java: Let DwflDie adjust for module bias.
diff --git a/frysk-core/frysk/expr/FQIdentParser.java b/frysk-core/frysk/expr/FQIdentParser.java
new file mode 100644
index 0000000..63035b9
--- /dev/null
+++ b/frysk-core/frysk/expr/FQIdentParser.java
@@ -0,0 +1,394 @@
+// This file is part of the program FRYSK.
+//
+// Copyright 2008 Red Hat Inc.
+//
+// FRYSK is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by
+// the Free Software Foundation; version 2 of the License.
+//
+// FRYSK is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with FRYSK; if not, write to the Free Software Foundation,
+// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+// 
+// In addition, as a special exception, Red Hat, Inc. gives You the
+// additional right to link the code of FRYSK with code not covered
+// under the GNU General Public License ("Non-GPL Code") and to
+// distribute linked combinations including the two, subject to the
+// limitations in this paragraph. Non-GPL Code permitted under this
+// exception must only link to the code of FRYSK through those well
+// defined interfaces identified in the file named EXCEPTION found in
+// the source code files (the "Approved Interfaces"). The files of
+// Non-GPL Code may instantiate templates or use macros or inline
+// functions from the Approved Interfaces without causing the
+// resulting work to be covered by the GNU General Public
+// License. Only Red Hat, Inc. may make changes or additions to the
+// list of Approved Interfaces. You must obey the GNU General Public
+// License in all respects for all of the FRYSK code and other code
+// used in conjunction with FRYSK except the Non-GPL Code covered by
+// this exception. If you modify this file, you may extend this
+// exception to your version of the file, but you are not obligated to
+// do so. If you do not wish to provide this exception without
+// modification, you must delete this exception statement from your
+// version and license this file solely under the GPL without
+// exception.
+
+package frysk.expr;
+
+import java.io.StringReader;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import antlr.CharScanner;
+import antlr.CharStreamException;
+import antlr.MismatchedCharException;
+import antlr.RecognitionException;
+import antlr.Token;
+import antlr.TokenStreamException;
+import antlr.InputBuffer;
+import antlr.CharBuffer;
+
+public class FQIdentParser {
+
+    private int i;
+    private String fqinit;
+    private final CharScanner scanner;
+    private final boolean allowDynamic;
+    private final boolean allowGlobs;
+    private final boolean expectMoreTokens;
+
+    /**
+     * @param allowDynamic Whether the [pid.tid#frame] portion of the
+     *        FQ syntax makes sense in given context.  For example it
+     *        doesn't for ftrace, but in general does for hpd.
+     *
+     * @param allowGlobs Whether globs should be allowed.  This
+     *        changes syntax of symbol portion of FQ identifier, which
+     *        becomes essentially unrestricted.  Note that is globs
+     *        are allowed, simple expressions as e.g. "a*b" are no
+     *        longer parsed as three tokens, but become one glob
+     *        symbol name.
+     *
+     * @param expectMoreTokens Whether whitespace terminates
+     *        lookahead.  When no more tokens are expected, it
+     *        doesn't.
+     */
+    FQIdentParser(CharScanner scanner,
+		  boolean allowDynamic,
+		  boolean allowGlobs,
+		  boolean expectMoreTokens) {
+
+	this.scanner = scanner;
+	this.allowDynamic = allowDynamic;
+	this.allowGlobs = allowGlobs;
+	this.expectMoreTokens = expectMoreTokens;
+    }
+
+    private char fqLA(int i) throws CharStreamException {
+	if (i >= fqinit.length())
+	    return scanner.LA(i - fqinit.length() + 1);
+	else
+	    return fqinit.charAt(i);
+    }
+
+    private void fqmatch(String s) throws MismatchedCharException, CharStreamException {
+	while (fqinit.length() > 0) {
+	    char c = s.charAt(0);
+	    char d = fqinit.charAt(0);
+	    if (c != d)
+		throw new MismatchedCharException(d, c, false, scanner);
+	    s = s.substring(1);
+	    fqinit = fqinit.substring(1);
+	}
+	scanner.match(s);
+    }
+
+    private String maybeParsePrefix(char start, char end, String context)
+	throws RecognitionException, CharStreamException
+    {
+	char c = fqLA(0);
+        if (c != start)
+	    return null;


hooks/post-receive
--
frysk system monitor/debugger


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2008-06-09 14:37 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-06-09 14:37 [SCM] master: Introduce options FQ identifier parser pmachata

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).