From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 13400 invoked by alias); 9 Oct 2007 10:27:16 -0000 Received: (qmail 13389 invoked by uid 22791); 9 Oct 2007 10:27:14 -0000 X-Spam-Status: No, hits=-2.0 required=5.0 tests=AWL,BAYES_00,DK_POLICY_SIGNSOME,FORGED_RCVD_HELO,TW_FH X-Spam-Check-By: sourceware.org Received: from wildebeest.demon.nl (HELO gnu.wildebeest.org) (83.160.170.119) by sourceware.org (qpsmtpd/0.31) with ESMTP; Tue, 09 Oct 2007 10:27:10 +0000 Received: from dijkstra.wildebeest.org ([192.168.1.29]) by gnu.wildebeest.org with esmtp (Exim 4.63) (envelope-from ) id 1IfCIn-0001mh-Rr for frysk@sourceware.org; Tue, 09 Oct 2007 12:27:06 +0200 Subject: multi-thread reset stepping bug fix From: Mark Wielaard To: frysk@sourceware.org Content-Type: multipart/mixed; boundary="=-i0lQhEK0ewAi/3YQkGvV" Date: Tue, 09 Oct 2007 10:27:00 -0000 Message-Id: <1191925621.3811.17.camel@dijkstra.wildebeest.org> Mime-Version: 1.0 X-Mailer: Evolution 2.8.3 (2.8.3-2.fc6) X-Spam-Score: -4.4 (----) X-Virus-Checked: Checked by ClamAV on sourceware.org X-IsSubscribed: yes Mailing-List: contact frysk-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Post: List-Help: , Sender: frysk-owner@sourceware.org X-SW-Source: 2007-q4/txt/msg00028.txt.bz2 --=-i0lQhEK0ewAi/3YQkGvV Content-Type: text/plain Content-Transfer-Encoding: 7bit Content-length: 2196 Hi, Tim found a bug and created a testcase for when two Tasks were unfortunate enough to be simultaneously stepping the same instruction using "reset stepping" at the exact same moment. http://sourceware.org/bugzilla/show_bug.cgi?id=5082 This fortunately triggered a sanity check or really unpredictable behavior would result. Unfortunately the sanity check was actually wrong. When reset-stepping an instruction we keep all threads running, temporarily remove a breakpoint instruction by putting the original instruction back, step over it and put the breakpoint instruction back. But if multiple Tasks are stepping the exact same instruction we should wait till all have stepped before putting the breakpoint instruction back. There is a related bug in the out-of-line stepping case which is tracked in http://sourceware.org/bugzilla/show_bug.cgi?id=5148 but for which there is no testcase yet. It is much harder to trigger since we almost never use out-of-line-stepping since we don't have a proper instruction parser yet (http://sourceware.org/bugzilla/show_bug.cgi?id=4762). frysk-core/frysk/proc/ChangeLog 2007-10-09 Mark Wielaard Fixes bug #5082 * Breakpoint.java (stepping): Removed, state is Task specific. (reset_stepping_tasks): New field. (prepareStep): Check Instruction properties not stepping. Adjust reset_stepping_tasks and only reset when zero. (stepDone): Likewise. (stepAbort): Likewise. frysk-core/frysk/hpd/ChangeLog 2007-10-09 Mark Wielaard * TestBreakpoints.java (testHpdBreakMultiThreadedContinue): Don't mark unresolved bug #5082. Expect Task terminating messages. This should fix all occurrences of IllegalStateException "Already stepping" that sometimes popped up. It does make the window of opportunity of missing a breakpoint a little bigger though for other running Tasks. Tim, I adjusted your testcase a little to catch the various Task X terminating/terminated statements. I wasn't sure there were just extra debug output or whether they should be explicitly matches. If I made the wrong assumption please adjust it to how the fhpd should report these. Cheers, Mark --=-i0lQhEK0ewAi/3YQkGvV Content-Disposition: inline; filename=breakpoint-reset.patch Content-Type: text/x-patch; name=breakpoint-reset.patch; charset=UTF-8 Content-Transfer-Encoding: 7bit Content-length: 7029 Index: frysk-core/frysk/hpd/TestBreakpoints.java =================================================================== RCS file: /cvs/frysk/frysk-core/frysk/hpd/TestBreakpoints.java,v retrieving revision 1.10 diff -u -r1.10 TestBreakpoints.java --- frysk-core/frysk/hpd/TestBreakpoints.java 1 Oct 2007 13:56:33 -0000 1.10 +++ frysk-core/frysk/hpd/TestBreakpoints.java 9 Oct 2007 10:14:14 -0000 @@ -158,8 +158,6 @@ } public void testHpdBreakMultiThreadedContinue() { - if (unresolved(5082)) - return; e = new HpdTestbed(); e.send ("run " + Config.getPkgLibFile("funit-fib-clone") + " 3\n\n"); e.expect (5, "Attached.*\n" + prompt); @@ -180,7 +178,16 @@ e.expect("where.*\\[0\\.0\\].*\\[0\\.1\\].*#0 0x[0-9a-f]+ in fib\\(.*\\[0\\.2\\].*#0 0x[0-9a-f]+ in fib\\(.*" + prompt); e.send("go\n"); - e.expect("go.*fib (3) = 2.*"); + e.expect("go.*" + prompt + "Task \\d+ is terminating"); + e.expect("(Breakpoint 0 fib.*){2}"); + e.send("go\n"); + e.expect("go.*" + prompt); + e.expect("Task \\d+ is terminating"); + e.expect("Task \\d+ is terminating"); + e.expect("Task \\d+ is terminating"); + e.expect("fib \\(3\\) = 2"); + e.expect("Task \\d+ is terminating"); + e.expect("Task \\d+ terminated"); e.send("quit\n"); e.expect("Quitting..."); e.close(); Index: frysk-core/frysk/proc/Breakpoint.java =================================================================== RCS file: /cvs/frysk/frysk-core/frysk/proc/Breakpoint.java,v retrieving revision 1.16 diff -u -r1.16 Breakpoint.java --- frysk-core/frysk/proc/Breakpoint.java 15 Aug 2007 11:05:18 -0000 1.16 +++ frysk-core/frysk/proc/Breakpoint.java 9 Oct 2007 10:14:14 -0000 @@ -56,15 +56,9 @@ private final long address; private final Proc proc; - // Different ways to setup a breakpoint for stepping. - private static final byte NOT_STEPPING = 0; - private static final byte OUT_OF_LINE_STEPPING = 1; - private static final byte SIMULATE_STEPPING = 2; - private static final byte RESET_STEPPING = 3; - - // Whether the breakpoint is setup for a step instruction. - // And if so how according to one of the above constants. - private byte stepping; + // Records how many tasks are reset stepping this Breakpoint + // currently. Only when zero can the breakpoint be put back. + private int reset_stepping_tasks; // Static cache of installed break points. private static HashMap installed = new HashMap(); @@ -74,6 +68,8 @@ private Instruction origInstruction; // The address to execute out of line if any. + // FIXME - this depends on the Task stepping and isn't an intrinsic + // property of Breakpoint. private long oo_address; /** @@ -121,8 +117,7 @@ */ public long getSetupAddress() { - if (stepping == NOT_STEPPING) - throw new IllegalStateException("Not currently stepping"); + // XXX - FIXME - This depends on the Task stepping. return oo_address != 0 ? oo_address : address; } @@ -168,9 +163,6 @@ */ public void remove(Task task) { - if (stepping != NOT_STEPPING) - throw new IllegalStateException("Currently stepping: " + this); - synchronized (installed) { if (! this.equals(installed.remove(this))) @@ -212,9 +204,6 @@ */ public void prepareStep(Task task) { - if (stepping != NOT_STEPPING) - throw new IllegalStateException("Already stepping"); - // We like out of line stepping above simulating the instruction // and if neither is possible we reset the instruction and risk // other Tasks missing the breakpoint (FIXME: the right way in @@ -224,7 +213,6 @@ // Proc will collect an address for our usage, our wait // till one if available. We need to return it to Proc // afterwards in stepDone(). - stepping = OUT_OF_LINE_STEPPING; oo_address = proc.getOutOfLineAddress(); origInstruction.setupExecuteOutOfLine(task, address, oo_address); } @@ -236,14 +224,17 @@ // stepDone(). Luckily no Instructions can actually simulate // themselves at this time. stepDone() will warn if it does // happen in the future. - stepping = SIMULATE_STEPPING; origInstruction.simulate(task); } else { - // WARNING, WILL ROBINSON! - stepping = RESET_STEPPING; - reset(task); + // WARNING, WILL ROBINSON! (Temporarily disable breakpoint) + // Record the number of concurrent stepping tasks. + // The breakpoint instruction can only be set back if all + // these tasks get passed it. + if (reset_stepping_tasks == 0) + reset(task); + reset_stepping_tasks++; } } @@ -256,9 +247,7 @@ { if (isInstalled()) { - if (stepping == NOT_STEPPING) - throw new IllegalStateException("Not stepping"); - else if (stepping == OUT_OF_LINE_STEPPING) + if (origInstruction.canExecuteOutOfLine()) { // Fixup any registers depending on the instruction being // at the original pc address. And let Proc know the address @@ -267,23 +256,23 @@ proc.doneOutOfLine(oo_address); oo_address = 0; } - else if (stepping == SIMULATE_STEPPING) + else if (origInstruction.canSimulate()) { // FIXME: See prepareStep(). System.err.println("Instruction simulation not finished! " + "Already stepped next instruction. Sorry."); } - else if (stepping == RESET_STEPPING) + else // Reset stepping { // Put the breakpoint instruction quickly back. - set(task); + // If all Tasks concurrently stepping it are done. + reset_stepping_tasks--; + if (reset_stepping_tasks == 0) + set(task); + else + return; } - else - throw new IllegalStateException("Impossible stepping state: " - + stepping); } - - stepping = NOT_STEPPING; } /** @@ -293,9 +282,7 @@ { if (isInstalled()) { - if (stepping == NOT_STEPPING) - throw new IllegalStateException("Not stepping"); - else if (stepping == OUT_OF_LINE_STEPPING) + if (origInstruction.canExecuteOutOfLine()) { // No step took place, so no fixup needed. Just but // breakpoint back and cleaer oo_address for Proc. @@ -303,23 +290,23 @@ proc.doneOutOfLine(oo_address); oo_address = 0; } - else if (stepping == SIMULATE_STEPPING) + else if (origInstruction.canSimulate()) { // FIXME: See prepareStep(). System.err.println("Instruction simulation not finished! " + "Already stepped next instruction. Sorry."); } - else if (stepping == RESET_STEPPING) + else // Reset stepping { // Put the breakpoint instruction quickly back. - set(task); + // If all Tasks concurrently stepping it are done. + reset_stepping_tasks--; + if (reset_stepping_tasks == 0) + set(task); + else + return; } - else - throw new IllegalStateException("Impossible stepping state: " - + stepping); } - - stepping = NOT_STEPPING; } /** --=-i0lQhEK0ewAi/3YQkGvV--