public inbox for systemtap@sourceware.org
 help / color / mirror / Atom feed
* [patch] Close fds before execute a cmd
@ 2007-10-12  0:41 Lai Jiangshan
  2007-10-12  0:46 ` Lai Jiangshan
  0 siblings, 1 reply; 5+ messages in thread
From: Lai Jiangshan @ 2007-10-12  0:41 UTC (permalink / raw)
  To: systemtap

Hi, all

	case 1:
stap -c 'sleep 100' -e 'probe begin{}'
# killing SIGUSR1 to "sleep 100" does not work as expected

	case 2:
trap "" SIGSEGV # set SIGSEGV's handler as SIG_IGN
stap -c 'sleep 100' -e 'probe begin{}'
# killing SIGSEGV still works.

	The bug reasons of case1 and case2 are same: the signal
handlers are not restored before exec().
	It's not a serious bug, but it's not friendly to users.
This bug can be fixed by the patch below(Restore signals except
SIGINT).
	There is also another good way: unblock all signals and
set signals' handlers as SIG_DFL **EXPLICITLY** before exec().

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>

diff -Nur src/runtime/staprun/common.c src.new/runtime/staprun/common.c
--- src/runtime/staprun/common.c	2007-08-20 23:02:47.000000000 +0900
+++ src.new/runtime/staprun/common.c	2007-10-12 09:05:21.000000000 +0900
@@ -251,6 +251,12 @@
 		_exit(1);
 }
 
+#ifdef SINGLE_THREADED
+#define sig_mask sigprocmask
+#else
+#define sig_mask pthread_sigmask
+#endif
+
 void setup_signals(void)
 {
 	sigset_t s;
@@ -258,11 +264,7 @@
 
 	/* blocking all signals while we set things up */
 	sigfillset(&s);
-#ifdef SINGLE_THREADED
-	sigprocmask(SIG_SETMASK, &s, NULL);
-#else
-	pthread_sigmask(SIG_SETMASK, &s, NULL);
-#endif
+	sig_mask(SIG_SETMASK, &s, NULL);
 	/* set some of them to be ignored */
 	memset(&a, 0, sizeof(a));
 	sigfillset(&a.sa_mask);
@@ -282,13 +284,41 @@
 	/* unblock all signals */
 	sigemptyset(&s);
 
-#ifdef SINGLE_THREADED
-	sigprocmask(SIG_SETMASK, &s, NULL);
-#else
-	pthread_sigmask(SIG_SETMASK, &s, NULL);
-#endif
+	sig_mask(SIG_SETMASK, &s, NULL);
+}
+
+static sigset_t saved_sigmask;
+static int saved_sig[] = {SIGTERM, SIGHUP, SIGQUIT,
+	SIGCHLD, SIGUSR1, SIGUSR2, SIGPIPE, SIGBUS,
+	SIGFPE, SIGILL, SIGSEGV, SIGXCPU, SIGXFSZ};
+static struct sigaction saved_sa[sizeof(saved_sig) / sizeof(saved_sig[0])]; 
+
+int save_signals(void)
+{
+	if(sig_mask(SIG_SETMASK, NULL, &saved_sigmask))
+		return -1;
+	int i = sizeof(saved_sig) / sizeof(saved_sig[0]);
+	while (i--) {
+		if (sigaction(saved_sig[i], NULL, &saved_sa[i]))
+			return -1;
+	}
+	return 0;
 }
 
+int restore_signals(void)
+{
+	if(sig_mask(SIG_SETMASK, &saved_sigmask, NULL))
+		return -1;
+	int i = sizeof(saved_sig) / sizeof(saved_sig[0]);
+	while (i--) {
+		if (sigaction(saved_sig[i], &saved_sa[i], NULL))
+			return -1;
+	}
+	return 0;
+}
+
+#undef sig_mask
+
 /**
  *	send_request - send request to kernel over control channel
  *	@type: the relay-app command id
diff -Nur src/runtime/staprun/mainloop.c src.new/runtime/staprun/mainloop.c
--- src/runtime/staprun/mainloop.c	2007-09-15 01:11:12.000000000 +0900
+++ src.new/runtime/staprun/mainloop.c	2007-10-10 16:04:40.000000000 +0900
@@ -90,6 +90,7 @@
 		
 		/* wait here until signaled */
 		sigwait(&usrset, &signum);
+		restore_signals();
 
 		if (execl("/bin/sh", "sh", "-c", target_cmd, NULL) < 0)
 			perror(target_cmd);
@@ -112,6 +113,7 @@
 		_perr("fork");
 	} else if (pid == 0) {
 		setpriority (PRIO_PROCESS, 0, 0);
+		restore_signals();
 		if (execl("/bin/sh", "sh", "-c", cmd, NULL) < 0)
 			perr("%s", cmd);
 		_exit(1);
diff -Nur src/runtime/staprun/stapio.c src.new/runtime/staprun/stapio.c
--- src/runtime/staprun/stapio.c	2007-08-15 00:23:59.000000000 +0900
+++ src.new/runtime/staprun/stapio.c	2007-10-10 16:03:28.000000000 +0900
@@ -26,6 +26,7 @@
 
 int main(int argc, char **argv)
 {
+	save_signals();
 	setup_signals();
 	
 	parse_args(argc, argv);
diff -Nur src/runtime/staprun/staprun.c src.new/runtime/staprun/staprun.c
--- src/runtime/staprun/staprun.c	2007-08-16 02:37:21.000000000 +0900
+++ src.new/runtime/staprun/staprun.c	2007-10-10 16:05:17.000000000 +0900
@@ -55,6 +55,8 @@
 			}
 		}
 
+		restore_signals();
+
 		/* Actually run the command. */
 		if (execv(path, argv) < 0)
 			perror(path);
@@ -149,6 +151,7 @@
 {
 	int rc;
 
+	save_signals();
 	if (atexit(exit_cleanup)) {
 		_perr("cannot set exit function");
 		exit(1);
diff -Nur src/runtime/staprun/staprun.h src.new/runtime/staprun/staprun.h
--- src/runtime/staprun/staprun.h	2007-08-15 00:23:59.000000000 +0900
+++ src.new/runtime/staprun/staprun.h	2007-10-10 16:00:38.000000000 +0900
@@ -151,6 +151,8 @@
 void usage(char *prog);
 void parse_modpath(const char *);
 void setup_signals(void);
+int save_signals(void);
+int restore_signals(void);
 
 /*
  * variables 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [patch] Close fds before execute a cmd
  2007-10-12  0:41 [patch] Close fds before execute a cmd Lai Jiangshan
@ 2007-10-12  0:46 ` Lai Jiangshan
  0 siblings, 0 replies; 5+ messages in thread
From: Lai Jiangshan @ 2007-10-12  0:46 UTC (permalink / raw)
  To: systemtap

Sorry to make a mistake about the title, it should be
"[patch] Restore signals before execute a cmd".

Lai Jiangshan wrote:
> Hi, all
> 
>     case 1:
> stap -c 'sleep 100' -e 'probe begin{}'
> # killing SIGUSR1 to "sleep 100" does not work as expected
> 
>     case 2:
> trap "" SIGSEGV # set SIGSEGV's handler as SIG_IGN
> stap -c 'sleep 100' -e 'probe begin{}'
> # killing SIGSEGV still works.
> 
>     The bug reasons of case1 and case2 are same: the signal
> handlers are not restored before exec().
>     It's not a serious bug, but it's not friendly to users.
> This bug can be fixed by the patch below(Restore signals except
> SIGINT).
>     There is also another good way: unblock all signals and
> set signals' handlers as SIG_DFL **EXPLICITLY** before exec().
> 
> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> 


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [patch] Close fds before execute a cmd
  2007-10-09  5:08 Lai Jiangshan
  2007-10-09  5:37 ` Stone, Joshua I
@ 2007-10-09 16:12 ` Martin Hunt
  1 sibling, 0 replies; 5+ messages in thread
From: Martin Hunt @ 2007-10-09 16:12 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: systemtap

On Tue, 2007-10-09 at 14:10 +0900, Lai Jiangshan wrote:
[...]
> 	I traced this bug and at the last I found the reason: stapio
> did not close some file descriptors before using execl() to run a cmd.
> The processes of this cmd may read or write on the fds inherited from
> stapio. Especially the fd of control_channel, if one of processes of
> this cmd reads and writes on this fd, it communicates with stap_XXXXXX.ko,
> and lots of BUGs occurred. (Does stap_XXXXXX.ko support communicating
> with two or more processes at the same time?)

Thank you for reporting this bug.  I have checked in a patch to set the
FD_CLOEXEC flags for all files stapio opens. 

Martin

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [patch] Close fds before execute a cmd
  2007-10-09  5:08 Lai Jiangshan
@ 2007-10-09  5:37 ` Stone, Joshua I
  2007-10-09 16:12 ` Martin Hunt
  1 sibling, 0 replies; 5+ messages in thread
From: Stone, Joshua I @ 2007-10-09  5:37 UTC (permalink / raw)
  To: Lai Jiangshan, systemtap

Lai Jiangshan wrote:
> 	I traced this bug and at the last I found the reason: stapio did
not
> close some file descriptors before using execl() to run a cmd. The
> processes of this cmd may read or write on the fds inherited from
> stapio. Especially the fd of control_channel, if one of processes of
> this cmd reads and writes on this fd, it communicates with
> stap_XXXXXX.ko, and lots of BUGs occurred. (Does stap_XXXXXX.ko
> support communicating with two or more processes at the same time?)

It would be strange for an app to do anything on an already open fd, but
not unheard of, I suppose. 

Rather than brute-force closing all fd > 2, a better way might be to set
FD_CLOEXEC in the appropriate places.


Josh

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [patch] Close fds before execute a cmd
@ 2007-10-09  5:08 Lai Jiangshan
  2007-10-09  5:37 ` Stone, Joshua I
  2007-10-09 16:12 ` Martin Hunt
  0 siblings, 2 replies; 5+ messages in thread
From: Lai Jiangshan @ 2007-10-09  5:08 UTC (permalink / raw)
  To: systemtap

Hi, all!

	When I test systemtap, I found that sometimes,
such bug occurred:
1)stap will not exit.(^C does not work)
2)staprun will be "Uninterruptible sleep" on syscall delete_module.
3)stapio will receive SIGCHLDs and exits successfully.(it's proper)
4)stap_XXXXXX.ko cannot be unloaded by command rmmod.
(and I found it is the bug#3718)

	There are two ways to reproduce this bug:
1)use option -c "/etc/init.d/nfs start" with any stap script.
2)use stapfunc system("/etc/init.d/nfs start") in script.
(This bug is easily reproduced by these ways on the platforms
that I have, fc6 on i386 and rhel5_ga on ia64.)

	I traced this bug and at the last I found the reason: stapio
did not close some file descriptors before using execl() to run a cmd.
The processes of this cmd may read or write on the fds inherited from
stapio. Especially the fd of control_channel, if one of processes of
this cmd reads and writes on this fd, it communicates with stap_XXXXXX.ko,
and lots of BUGs occurred. (Does stap_XXXXXX.ko support communicating
with two or more processes at the same time?)

This is a example, ill_req.c:
----------------------------------------------
#include <unistd.h>
#include <string.h>
#define ILL_REQ 1 //STP_EXIT
int main(int argc, char *argv[])
{
	int i;
	int req_type = ILL_REQ;
	char buf[1024];
	memcpy(buf, &req_type, 4);
	// guest control_channel and write
	for (i = 3; i < 1024; i++)
		write(i, buf, 4);
	return 0;
}
----------------------------------------------

$ stap -e 'probe begin{system("./ill_req")}'
	I has not terminated it and there is no exit() in script,
but it exit quickly!

This bug can be fixed by the following patch:

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>

--- src/runtime/staprun/mainloop.c	2007-09-15 01:11:12.000000000 +0900
+++ src.new/runtime/staprun/mainloop.c	2007-10-09 13:49:24.000000000 +0900
@@ -48,6 +48,30 @@
 	sigaction(SIGQUIT, &a, NULL);
 }
 
+/* close all fds except std fds */
+static int close_all(void)
+{
+	struct dirent *d;
+	DIR *fds_dir = opendir("/proc/self/fd");
+	if (fds_dir == NULL)
+		return -1;
+
+	while ((d = readdir(fds_dir)) != NULL) {
+		char *endptr;
+		long fd = strtol(d->d_name, &endptr, 10);
+		if (d->d_name == endptr || *endptr != '\0')
+			continue; /* skip, it's not an integer */
+		if (fd < 0 || (unsigned long)fd >= (unsigned long)INT_MAX)
+			continue; /* skip, it's not a nonnegative-int */
+		if (fd != STDIN_FILENO && fd != STDOUT_FILENO
+			&& fd != STDERR_FILENO && fd != dirfd(fds_dir))
+		{
+			close((int) fd);
+		}
+	}
+	closedir(fds_dir);
+	return 0;
+}
 
 /* 
  * start_cmd forks the command given on the command line
@@ -87,6 +111,9 @@
 
 		/* commands we fork need to run at normal priority */
 		setpriority (PRIO_PROCESS, 0, 0);
+
+		if (close_all())
+			_exit(1);
 		
 		/* wait here until signaled */
 		sigwait(&usrset, &signum);
@@ -112,6 +139,8 @@
 		_perr("fork");
 	} else if (pid == 0) {
 		setpriority (PRIO_PROCESS, 0, 0);
+		if (close_all())
+			_exit(1);
 		if (execl("/bin/sh", "sh", "-c", cmd, NULL) < 0)
 			perr("%s", cmd);
 		_exit(1);



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2007-10-12  0:46 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-10-12  0:41 [patch] Close fds before execute a cmd Lai Jiangshan
2007-10-12  0:46 ` Lai Jiangshan
  -- strict thread matches above, loose matches on Subject: below --
2007-10-09  5:08 Lai Jiangshan
2007-10-09  5:37 ` Stone, Joshua I
2007-10-09 16:12 ` Martin Hunt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).