public inbox for frysk@sourceware.org
 help / color / mirror / Atom feed
From: Kris Van Hees <kris.van.hees@oracle.com>
To: frysk@sourceware.org
Subject: Patch for TearDownProcess
Date: Fri, 07 Sep 2007 02:22:00 -0000	[thread overview]
Message-ID: <20070907022100.GK22263@oracle.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 1014 bytes --]

The attached patch has been commitedfor TearDownProcess, to handle various
cases of TestRunner hanging due to stray child process.  The bugzilla ticket
that handles this case is #4996.

The symptom was typically that a test execution started a child process, but
failed to register that process.  This left the process hanging, sometimes in
a utrace call, causing the next invocation of tearDown() to hang on the waitAll
request because the wait was remaining in effect due to a stuck process.

The attached patch *does* open the possibility that a stray process may remain
after tearDown() has been executed if e.g. a process termination causes another
process to be created, etc...  However, that will not cause a testsuite hang.
It is also not expected to pose a problem with the further execution of the
testsuite (in fact, it is very likely to get cleaned up during the tearDown()
of the next test).

The change has been tested multiple times to verify that it does not cause
any regressions.

	Cheers,
	Kris

[-- Attachment #2: FOO --]
[-- Type: text/plain, Size: 7289 bytes --]

--- TearDownProcess.java-orig	2007-09-06 17:37:05.000000000 -0400
+++ TearDownProcess.java	2007-09-06 20:07:51.000000000 -0400
@@ -1,5 +1,6 @@
 // This file is part of the program FRYSK.
 //
+// Copyright 2007 Oracle Corporation.
 // Copyright 2005, 2006, 2007, Red Hat Inc.
 //
 // FRYSK is free software; you can redistribute it and/or modify it
@@ -43,15 +44,16 @@
 import java.util.logging.Level;
 import java.util.Set;
 import java.util.HashSet;
+import frysk.junit.TestCase;
 import frysk.sys.Errno;
 import frysk.sys.Sig;
 import frysk.sys.Signal;
 import frysk.sys.Wait;
 import frysk.sys.proc.ProcBuilder;
 import frysk.sys.ProcessIdentifier;
-import java.util.Iterator;
 import frysk.sys.Ptrace;
 import frysk.sys.WaitBuilder;
+import frysk.sys.SignalBuilder;
 
 /**
  * Framework for cleaning up temporary processes created as part of a
@@ -160,7 +162,7 @@
      * continued. Work around this by first sending all tasks a
      * continue ...
      */
-    private static ProcessIdentifier capturedSendDetachContKill (ProcessIdentifier pid)
+    private static boolean capturedSendDetachContKill (ProcessIdentifier pid)
     {
 	// Do the detach
 	try {
@@ -181,11 +183,10 @@
 	    log("tkill -CONT", pid, "(failed - ESRCH)\n");
 	}
 	// Finally send it a kill to finish things off.
-	capturedSendTkill(pid);
-	return pid;
+	return capturedSendTkill(pid);
     }
 
-    private static ProcessIdentifier capturedSendDetachContKill (int pid)
+    private static boolean capturedSendDetachContKill (int pid)
     {
 	return capturedSendDetachContKill (new ProcessIdentifier (pid));
     }
@@ -195,9 +196,11 @@
 	// Make a preliminary pass through all the registered
 	// pidsToKillDuringTearDown trying to simply kill
 	// each. Someone else may have waited on their deaths already.
-	for (Iterator i = pidsToKillDuringTearDown.iterator(); i.hasNext();) {
-	    ProcessIdentifier child = (ProcessIdentifier) i.next();
-	    capturedSendTkill(child);
+	Object[] pidsToKill = pidsToKillDuringTearDown.toArray();
+	for (int i = 0; i < pidsToKill.length; i++) {
+	    ProcessIdentifier child = (ProcessIdentifier) pidsToKill[i];
+	    if (!capturedSendTkill(child))
+		pidsToKillDuringTearDown.remove(child);
 	}
 
 	// Go through all registered processes / tasks adding any of
@@ -214,16 +217,18 @@
 	// Iterate over a copy of the tids's collection as the
 	// missingTidsToKillDuringTearDown may modify the underlying
 	// collection.
-	Object[] pidsToKill = pidsToKillDuringTearDown.toArray();
+	pidsToKill = pidsToKillDuringTearDown.toArray();
 	for (int i = 0; i < pidsToKill.length; i++) {
 	    ProcessIdentifier child = (ProcessIdentifier) pidsToKill[i];
 	    missingTidsToKillDuringTearDown.construct(child);
 	}
 
 	// Blast all the processes for real.
-	for (Iterator i = pidsToKillDuringTearDown.iterator(); i.hasNext();) {
-	    ProcessIdentifier child = (ProcessIdentifier) i.next();
-	    capturedSendDetachContKill(child);
+	pidsToKill = pidsToKillDuringTearDown.toArray();
+	for (int i = 0; i < pidsToKill.length; i++) {
+	    ProcessIdentifier child = (ProcessIdentifier) pidsToKill[i];
+	    if (!capturedSendDetachContKill(child))
+		pidsToKillDuringTearDown.remove(child);
 	}
 
 	// Drain the wait event queue. This ensures that: there are
@@ -235,77 +240,77 @@
 	// For attached tasks, which will generate non-exit wait
 	// events (clone et.al.), the task is detached / killed.
 	// Doing that frees up the task so that it can run to exit.
-	try {
-	    while (! pidsToKillDuringTearDown.isEmpty()) {
-		log("waitAll -1 ....");
-		Wait.waitAll(-1, new WaitBuilder() {
-			public void cloneEvent (int pid, int clone)
-			{
-			    capturedSendDetachContKill(pid);
-			}
-
-			public void forkEvent (int pid, int child)
-			{
-			    capturedSendDetachContKill(pid);
-			}
-
-			public void exitEvent (int pid, boolean signal, int value,
-					       boolean coreDumped)
-			{
-			    capturedSendDetachContKill(pid);
-			    // Do not remove PID from
-			    // pidsToKillDuringTearDown list; need to
-			    // let the terminated event behind it
-			    // bubble up.
-			}
-
-			public void execEvent (int pid)
-			{
-			    capturedSendDetachContKill(pid);
-			}
-
-			public void syscallEvent (int pid)
-			{
-			    capturedSendDetachContKill(pid);
-			}
-
-			public void stopped (int pid, int signal)
-			{
-			    capturedSendDetachContKill(pid);
-			}
-
-			private void drainTerminated (int pid)
-			{
-			    // To be absolutly sure, again make
-			    // certain that the thread is detached.
-			    ProcessIdentifier id = capturedSendDetachContKill(pid);
-			    // True pidsToKillDuringTearDown can have
-			    // a second exit status behind this first
-			    // one, drain that also. Give up when
-			    // this PID has no outstanding events.
-			    log("Wait.drain", id, "\n");
-			    id.blockingDrain ();
-			    // Hopefully done with this PID.
-			    pidsToKillDuringTearDown.remove(id);
-			}
-
-			public void terminated (int pid, boolean signal,
-						int value,
-						boolean coreDumped)
-			{
-			    drainTerminated(pid);
-			}
-
-			public void disappeared (int pid, Throwable w)
-			{
-			    // The task vanished somehow, drain it.
-			    drainTerminated(pid);
-			}
-		    });
-	    }
-	}
-	catch (Errno.Echild e) {
-	    // No more events.
+	if (!pidsToKillDuringTearDown.isEmpty()) {
+	    log("waitAll " + pidsToKillDuringTearDown + " ....");
+	    Wait.waitAll(
+		TestCase.getTimeoutMilliseconds(),
+		new WaitBuilder() {
+		    public void cloneEvent (int pid, int clone)
+		    {
+			capturedSendDetachContKill(pid);
+		    }
+
+		    public void forkEvent (int pid, int child)
+		    {
+			capturedSendDetachContKill(pid);
+		    }
+
+		    public void exitEvent (int pid, boolean signal, int value,
+					   boolean coreDumped)
+		    {
+			capturedSendDetachContKill(pid);
+			// Do not remove PID from
+			// pidsToKillDuringTearDown list; need to
+			// let the terminated event behind it
+			// bubble up.
+		    }
+
+		    public void execEvent (int pid)
+		    {
+			capturedSendDetachContKill(pid);
+		    }
+
+		    public void syscallEvent (int pid)
+		    {
+			capturedSendDetachContKill(pid);
+		    }
+
+		    public void stopped (int pid, int signal)
+		    {
+			capturedSendDetachContKill(pid);
+		    }
+
+		    private void drainTerminated (int pid)
+		    {
+			// To be absolutly sure, again make
+			// certain that the thread is detached.
+			capturedSendDetachContKill(pid);
+			ProcessIdentifier id = new ProcessIdentifier (pid);
+			// True pidsToKillDuringTearDown can have
+			// a second exit status behind this first
+			// one, drain that also. Give up when
+			// this PID has no outstanding events.
+			log("Wait.drain", id, "\n");
+			id.blockingDrain ();
+			// Hopefully done with this PID.
+			pidsToKillDuringTearDown.remove(id);
+		    }
+
+		    public void terminated (int pid, boolean signal, int value,
+					    boolean coreDumped)
+		    {
+			drainTerminated(pid);
+		    }
+
+		    public void disappeared (int pid, Throwable w)
+		    {
+			// The task vanished somehow, drain it.
+			drainTerminated(pid);
+		    }
+		},
+		new SignalBuilder() {
+		    public void signal(Sig sig) {}
+		});
 	}
 
 	// Drain all the pending signals. Note that the process of killing

             reply	other threads:[~2007-09-07  2:22 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-09-07  2:22 Kris Van Hees [this message]
2007-09-07 13:31 ` Andrew Cagney
2007-09-07 14:30   ` Kris Van Hees
2007-09-07 18:39   ` Elena Zannoni
2007-09-13 12:34   ` Kris Van Hees

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070907022100.GK22263@oracle.com \
    --to=kris.van.hees@oracle.com \
    --cc=frysk@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).