From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 15952 invoked by alias); 4 Jun 2008 15:56:57 -0000 Received: (qmail 15926 invoked by uid 9697); 4 Jun 2008 15:56:57 -0000 Date: Wed, 04 Jun 2008 15:56:00 -0000 Message-ID: <20080604155657.15911.qmail@sourceware.org> From: pmachata@sourceware.org To: frysk-cvs@sourceware.org Subject: [SCM] master: Support "[pid.tid#frameNum]symbol" syntax in lexer X-Git-Refname: refs/heads/master X-Git-Reftype: branch X-Git-Oldrev: d64a49dffa0bbf7e2e558f09c602b80b14f42063 X-Git-Newrev: a8f8edfe323dbfd4c4b23b8720dc8ec3ec959b3d Mailing-List: contact frysk-cvs-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Post: List-Help: , Sender: frysk-cvs-owner@sourceware.org Reply-To: frysk@sourceware.org X-SW-Source: 2008-q2/txt/msg00326.txt.bz2 The branch, master has been updated via a8f8edfe323dbfd4c4b23b8720dc8ec3ec959b3d (commit) via bf4ad659e1b4dc577f2d70c899258ab43c9d6e8b (commit) from d64a49dffa0bbf7e2e558f09c602b80b14f42063 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email. - Log ----------------------------------------------------------------- commit a8f8edfe323dbfd4c4b23b8720dc8ec3ec959b3d Author: Petr Machata Date: Wed Jun 4 17:44:41 2008 +0200 Support "[pid.tid#frameNum]symbol" syntax in lexer commit bf4ad659e1b4dc577f2d70c899258ab43c9d6e8b Author: Petr Machata Date: Tue Jun 3 16:46:20 2008 +0200 Disabled SourceBreakpoint doesn't remove it's state from state map * ... instead it puts DISABLED state here. Thus when BreakpointManager calls refreshBreakpoints it doesn't re-enable once disabled breakpoints. ----------------------------------------------------------------------- Summary of changes: frysk-core/frysk/expr/CExpr.g | 385 ++++++++++++++++------------- frysk-core/frysk/expr/ChangeLog | 12 + frysk-core/frysk/expr/FQIdentToken.java | 8 +- frysk-core/frysk/expr/FQIdentifier.java | 13 + frysk-core/frysk/ftrace/TaskTracer.java | 15 -- frysk-core/frysk/rt/ChangeLog | 5 + frysk-core/frysk/rt/SourceBreakpoint.java | 2 +- 7 files changed, 256 insertions(+), 184 deletions(-) First 500 lines of diff: diff --git a/frysk-core/frysk/expr/CExpr.g b/frysk-core/frysk/expr/CExpr.g index cf8e654..587dca2 100644 --- a/frysk-core/frysk/expr/CExpr.g +++ b/frysk-core/frysk/expr/CExpr.g @@ -440,6 +440,206 @@ tokens super.match(s); } + private Token parseFQIdentifier() + throws RecognitionException, CharStreamException, TokenStreamException + { + + /* + * Funky HPD #-syntax doesn't map very well to LL-k type parser (for + * constant 'k'). When written directly, we get lots of lexical + * ambiguities. We work around that by doing arbitrary manual + * look-ahead and just parsing the tokens ourselves. Any whitespace + * or EOF stops the lookahead. + */ + + String matched = ""; + String part = ""; + + String partDso = null; + String partFile = null; + String partProc = null; + String partLine = null; + String partProcessId = null; + String partThreadId = null; + String partFrameNum = null; + + int i = 0; + char c; + + // Automaton state is composed of following sub-states: + final int FILE = 1; + final int LINE = 2; + final int SYMB = 4; + int allowed = LINE | SYMB; + + if ((c = fqLA(0)) == '#' + || c == '[') { + + char term = (c == '[') ? ']' : '#'; + String context = (c == '[') ? "dynamic context" : "DSO part"; + + matched += c; + i++; + while (true) { + c = fqLA(i++); + matched += c; + if (Character.isWhitespace(c) || c == EOF_CHAR) + throw new RecognitionException("Nonterminated " + context + + " `" + matched + + "' in fully qualified notation."); + else if (c == term) + break; + part += c; + } + + if (part.length() == 0) + throw new RecognitionException("Empty " + context + + " `" + matched + + "' in fully qualified notation."); + + if (term == ']') { + Matcher m = Pattern.compile("[0-9]+\\.[0-9]+#[0-9]+").matcher(part); + if (!m.matches()) + return null; + + int hash = part.indexOf('#'); + int dot = part.indexOf('.'); + partProcessId = part.substring(0, dot); + partThreadId = part.substring(dot + 1, hash); + partFrameNum = part.substring(hash + 1); + part = ""; + allowed = SYMB; + + } else { + partDso = part; + part = ""; + } + + } + + int state = allowed; + + loop: while(true) { + c = fqLA(i++); + if (Character.isWhitespace(c) || c == EOF_CHAR) + break; + + matched += c; + part += c; + switch (c) { + case '.': { + state |= FILE; + state &= ~SYMB; + break; + } + + case '#': { + if (partLine == null && partProc == null + && partProcessId == null) { + + if ((state & FILE) != 0 && partFile == null) + partFile = part.substring(0, part.length() - 1); + else if ((state & LINE) != 0) + partLine = part.substring(0, part.length() - 1); + else if ((state & SYMB) != 0) { + partProc = part.substring(0, part.length() - 1); + if (!Character.isJavaIdentifierStart(partProc.charAt(0))) + throw new RecognitionException("Procedure part (`" + partProc + "') in fully " + + "qualified notation has to be valid identifier."); + } else + // This # could belong to the next symbol. + // Break out and try to match the initial sequence. + break loop; + } else + throw new RecognitionException("Unexpected `#' after line or proc name was defined."); + + state = allowed & SYMB; + if (partLine == null && partProc == null) + state |= allowed & LINE; + part = ""; + break; + } + + default: { + if (!(c >= '0' && c <= '9')) { + state &= ~LINE; + + if (!(Character.isJavaIdentifierStart(c) + || c == '@' + || (c == ':' && part.length() == 4 + && part.equals("plt:")))) { + + // Break out early if we are already + // just waiting for symbol. + if (partLine != null || partProc != null + || partProcessId != null) + break loop; + else + state &= ~SYMB; + } + } + } + } + } + + // ((state & SYMB) == 0) here means that we've parsed more + // than a symbol name, in hope it would turn out to be a + // file name (e.g. hello-world.c#symbol as a symbol + // reference vs. hello-world.c as an expression involving + // subtraction and struct access). In following, we take + // care not to consume anything that's not an identifier. + // E.g. when the user types "a+b", we want to match + // only identifier "a". + + boolean wantPlt = false; + if (part.startsWith("plt:")) { + wantPlt = true; + part = part.substring(4); + } + + int v = part.indexOf('@'); + String version = null; + if (v >= 0) { + version = part.substring(v + 1); + part = part.substring(0, v); + } + + // This is delibaretely simplified and ignores request for initial letter. + // This is for better error reporting below, we first snip off irrelevant + // parts before yelling at user that his identifier sucks. + Matcher m = Pattern.compile("[a-zA-Z0-9_$]+").matcher(part); + if (m.lookingAt()) { + int diff = part.length() - m.end(); + if (diff > 0) { + matched = matched.substring(0, matched.length() - diff); + part = part.substring(0, m.end()); + } + } + else + throw new RecognitionException("Expected symbol name, got `" + part + "'."); + + if (!Character.isJavaIdentifierStart(part.charAt(0))) + throw new RecognitionException("Invalid symbol `" + part + "'."); + + FQIdentToken tok = new FQIdentToken(IDENT, matched); + tok.dso = partDso; + tok.file = partFile; + tok.line = partLine; + tok.proc = partProc; + tok.symbol = part; + tok.version = version; + tok.wantPlt = wantPlt; + tok.processId = partProcessId; + tok.threadId = partThreadId; + tok.frameNumber = partFrameNum; + tok.setLine(getLine()); + + fqmatch(matched); + tok.setColumn(getColumn() - matched.length()); + + return tok; + } + public static class FQIdentException extends RuntimeException { private static final long serialVersionUID = 1L; public FQIdentException(String s) { @@ -509,7 +709,16 @@ LCURLY : '{' ; LESSTHAN : "<" ; LESSTHANOREQUALTO : "<=" ; LPAREN : '(' ; -LSQUARE : '[' ; +LSQUARE : '[' (('0'..'9') { + fqinit = $getText; + try { + Token tok = parseFQIdentifier(); + if (tok != null) { + $setToken(tok); + $setType(IDENT); + } + } catch (RecognitionException exc) { } + } )? ; MINUS : '-' ; MINUSEQUAL : "-=" ; MINUSMINUS : "--" ; @@ -540,171 +749,12 @@ TIMESEQUAL : "*=" ; protected ELLIPSIS : "..." ; -/* - * Funky HPD #-syntax doesn't map very well to LL-k type parser (for - * constant 'k'). When written directly, we get lots of lexical - * ambiguities. We work around that by doing arbitrary manual - * look-ahead and just parsing the tokens ourselves. Any whitespace - * or EOF stops the lookahead. - */ - -protected -PARSE_FQIDENT - : { - String matched = ""; - String part = ""; - - String partDso = null; - String partFile = null; - String partProc = null; - String partLine = null; - - int i = 0; - char c; - if ((c = fqLA(0)) == '#') { - matched += c; - i++; - while (true) { - c = fqLA(i++); - matched += c; - if (Character.isWhitespace(c) || c == EOF_CHAR) - // This is a wack. - throw new RecognitionException("Nonterminated DSO part `" + matched - + "' in fully qualified notation."); - else if (c == '#') - break; - part += c; - } - if (part.length() == 0) - throw new RecognitionException("Empty DSO part `" + matched - + "' in fully qualified notation."); - partDso = part; - part = ""; - } - - // Automaton state is composed of following sub-states: - final int FILE = 1; - final int LINE = 2; - final int SYMB = 4; - int state = LINE | SYMB; - loop: while(true) { - c = fqLA(i++); - if (Character.isWhitespace(c) || c == EOF_CHAR) - break; - - matched += c; - part += c; - switch (c) { - case '.': { - state |= FILE; - state &= ~SYMB; - break; - } - - case '#': { - if (partLine == null && partProc == null) { - if ((state & FILE) != 0 && partFile == null) - partFile = part.substring(0, part.length() - 1); - else if ((state & LINE) != 0) - partLine = part.substring(0, part.length() - 1); - else if ((state & SYMB) != 0) { - partProc = part.substring(0, part.length() - 1); - if (!Character.isJavaIdentifierStart(partProc.charAt(0))) - throw new RecognitionException("Procedure part (`" + partProc + "') in fully " - + "qualified notation has to be valid identifier."); - } else - // This # could belong to the next symbol. - // Break out and try to match the initial sequence. - break loop; - } else - throw new RecognitionException("Unexpected `#' after line or proc name was defined."); - - state = SYMB; - if (partLine == null && partProc == null) - state |= LINE; - part = ""; - break; - } - - default: { - if (!(c >= '0' && c <= '9')) { - state &= ~LINE; - - if (!(Character.isJavaIdentifierStart(c) - || c == '@' - || (c == ':' && part.length() == 4 - && part.equals("plt:")))) { - - // Break out early if we are already - // just waiting for symbol. - if (partLine != null || partProc != null) - break loop; - else - state &= ~SYMB; - } - } - } - } - } - - // ((state & SYMB) == 0) here means that we've parsed more - // than a symbol name, in hope it would turn out to be a - // file name (e.g. hello-world.c#symbol as a symbol - // reference vs. hello-world.c as an expression involving - // subtraction and struct access). In following, we take - // care not to consume anything that's not an identifier. - // E.g. when the user types "a+b", we want to match - // only identifier "a". - - boolean wantPlt = false; - if (part.startsWith("plt:")) { - wantPlt = true; - part = part.substring(4); - } - - int v = part.indexOf('@'); - String version = null; - if (v >= 0) { - version = part.substring(v + 1); - part = part.substring(0, v); - } - - // This is delibaretely simplified and ignores request for initial letter. - // This is for better error reporting below, we first snip off irrelevant - // parts before yelling at user that his identifier sucks. - Matcher m = Pattern.compile("[a-zA-Z0-9_$]+").matcher(part); - if (m.lookingAt()) { - int diff = part.length() - m.end(); - if (diff > 0) { - matched = matched.substring(0, matched.length() - diff); - part = part.substring(0, m.end()); - } - } - else - throw new RecognitionException("Expected symbol name, got `" + part + "'."); - - if (!Character.isJavaIdentifierStart(part.charAt(0))) - throw new RecognitionException("Invalid symbol `" + part + "'."); - - FQIdentToken tok = new FQIdentToken(IDENT, matched); - tok.dso = partDso; - tok.file = partFile; - tok.line = partLine; - tok.proc = partProc; - tok.symbol = part; - tok.version = version; - tok.wantPlt = wantPlt; - tok.setLine(getLine()); - $setToken(tok); - - fqmatch(matched); - tok.setColumn(getColumn() - matched.length()); - } ; - protected IDENT - : ('$'|'#'|'a'..'z'|'A'..'Z'|'_') { fqinit = $getText; } - fqident:PARSE_FQIDENT { $setToken(fqident); } ; + : ('$'|'#'|'a'..'z'|'A'..'Z'|'_') { + fqinit = $getText; + $setToken(parseFQIdentifier()); + } ; /** * A token is returned not only on regular tabs @@ -854,8 +904,11 @@ NUM | ('0'..'7')+ {_ttype = OCTALINT;} )? | (('1'..'9') ('0'..'9')* {_ttype = DECIMALINT;}) - ( '#' {fqinit = $getText;} - fqident:PARSE_FQIDENT { $setType(IDENT); $setToken(fqident); } )? + ( '#' { + fqinit = $getText; + $setType(IDENT); + $setToken(parseFQIdentifier()); + } )? ) ( ('l'|'L') { _ttype = DECIMALINT; } diff --git a/frysk-core/frysk/expr/ChangeLog b/frysk-core/frysk/expr/ChangeLog index 5639fed..097137a 100644 --- a/frysk-core/frysk/expr/ChangeLog +++ b/frysk-core/frysk/expr/ChangeLog @@ -1,3 +1,15 @@ +2008-06-04 Petr Machata + + * FQIdentToken.java (processId, threadId, frameNumber): New fields. + * FQIdentifier.java: The same. + * CExpr.g: + (PARSE_FQIDENT): Move to the procedure of its own... + (parseFqIdent): ... that is here. + (LSQUARE): Dispatch to parseFqIdent, and if it succeeds, the token + was FQ identifier. If it doesn't succeed, just answer '['. + (IDENT): Rewrite to call parseFqIdent. + (NUM): Likewise. + 2008-06-03 Teresa Thomas * Expression.java(getType): Use CTypeEvaluator.type(). diff --git a/frysk-core/frysk/expr/FQIdentToken.java b/frysk-core/frysk/expr/FQIdentToken.java index aaf3e6d..fdb5f42 100644 --- a/frysk-core/frysk/expr/FQIdentToken.java +++ b/frysk-core/frysk/expr/FQIdentToken.java @@ -43,7 +43,8 @@ public class FQIdentToken extends antlr.CommonToken { public String dso = null, file = null, line = null, - proc = null, symbol = null, version = null; + proc = null, symbol = null, version = null, + processId = null, threadId = null, frameNumber = null; public boolean wantPlt = false; public FQIdentToken(int t, String txt) { @@ -58,6 +59,9 @@ public class FQIdentToken + (proc != null ? ", proc:" + proc : "") + (wantPlt ? ", pltref" : "") + (symbol != null ? ", symbol:" + symbol : "") - + (version != null ? ", version:" + symbol : "") + "]"; + + (version != null ? ", version:" + symbol : "") + + (processId != null ? (", dynamic:" + processId + + "." + threadId + "#" + frameNumber) : "") + + "]"; } } diff --git a/frysk-core/frysk/expr/FQIdentifier.java b/frysk-core/frysk/expr/FQIdentifier.java index ee197de..0859363 100644 --- a/frysk-core/frysk/expr/FQIdentifier.java +++ b/frysk-core/frysk/expr/FQIdentifier.java @@ -54,6 +54,9 @@ public class FQIdentifier { final public String symbol; final public String version; final public boolean wantPlt; + final public Long processId; + final public Long threadId; + final public Long frameNumber; final private int metasoname; final private static int soname_null = -1; @@ -70,6 +73,16 @@ public class FQIdentifier { this.version = tok.version; this.wantPlt = tok.wantPlt; + if (tok.processId != null) { + if (tok.threadId == null || tok.frameNumber == null) + throw new AssertionError("Either I need a pid, a tid, AND a " + + "frame number, or neither of them."); + this.processId = new Long(Long.parseLong(tok.processId, 10)); + this.threadId = new Long(Long.parseLong(tok.threadId, 10)); + this.frameNumber = new Long(Long.parseLong(tok.frameNumber, 10)); + } else + this.processId = this.threadId = this.frameNumber = null; + if (tok.line != null) this.line = new Long(Long.parseLong(tok.line, 10)); else diff --git a/frysk-core/frysk/ftrace/TaskTracer.java b/frysk-core/frysk/ftrace/TaskTracer.java index 467d3c1..0ae6b0f 100644 --- a/frysk-core/frysk/ftrace/TaskTracer.java +++ b/frysk-core/frysk/ftrace/TaskTracer.java @@ -228,7 +228,6 @@ class TaskTracer hooks/post-receive -- frysk system monitor/debugger