From mboxrd@z Thu Jan 1 00:00:00 1970 Content-Type: multipart/mixed; boundary="===============5039985819592309799==" MIME-Version: 1.0 From: Jan Kratochvil To: elfutils-devel@lists.fedorahosted.org Subject: Re: [PATCH] Add --core-pattern option to eu-stack Date: Wed, 26 Nov 2014 15:32:39 +0100 Message-ID: <20141126143239.GA16611@host2.jankratochvil.net> In-Reply-To: 1414706141.18323.39.camel@bordewijk.wildebeest.org --===============5039985819592309799== Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable On Thu, 30 Oct 2014 22:55:41 +0100, Mark Wielaard wrote: > On Thu, 2014-10-09 at 23:25 +0200, Jan Kratochvil wrote: > > + if (opt_core_pattern =3D=3D true && show_one_tid =3D=3D false) > > + argp_error (state, > > + N_("--core-pattern requires -1")); > = > Why this restriction? I was blindly following Oleg's note which was not so obvious to me, though: On Wed, 03 Sep 2014 16:26:41 +0200, Oleg Nesterov wrote: # Obviously, this way you can only inspect the thread which dumps the core. Therefore I have now tried to remove this limitation of -1 Show the backtrace of only one thread But I have found the other threads end up with: wait4(13902, [{WIFSIGNALED(s) && WTERMSIG(s) =3D=3D SIGSEGV && WCOREDUMP(s= )}], __WALL, NULL) =3D 13902 Therefore they are dead at the core_pattern time, they cannot be ptraced and therefore they cannot be unwound. One could only find them in the core file itself but that is outside of the scope of this eu-stack feature. Jan diff --git a/src/stack.c b/src/stack.c index 59ae826..6a8a3ee 100644 --- a/src/stack.c +++ b/src/stack.c @@ -28,11 +28,16 @@ #include #include #include +#include #include ELFUTILS_HEADER(dwfl) = #include #include = +#ifndef PTRACE_EVENT_STOP +# define PTRACE_EVENT_STOP 128 +#endif + /* Name and version of program. */ static void print_version (FILE *stream, struct argp_state *state); ARGP_PROGRAM_VERSION_HOOK_DEF =3D print_version; @@ -480,6 +485,74 @@ print_version (FILE *stream, struct argp_state *state = __attribute__ ((unused))) fprintf (stream, "stack (%s) %s\n", PACKAGE_NAME, PACKAGE_VERSION); } = +/* Attach thread TID without waiting for it. It must not be a crashed thr= ead + core_pattern has been called for. */ + +static void +core_pattern_seize_one (pid_t tid) +{ + if (ptrace (PTRACE_SEIZE, tid, NULL, NULL) !=3D 0) + error (EXIT_BAD, errno, "ptrace (PTRACE_SEIZE, %d)", tid); + if (ptrace (PTRACE_INTERRUPT, tid, NULL, NULL) !=3D 0) + error (EXIT_BAD, errno, "ptrace (PTRACE_INTERRUPT, %d)", tid); +} + +/* Wait for thread TID. It must not be a crashed thread core_pattern has = been + called for. */ + +static void +core_pattern_wait_for_one (pid_t tid) +{ + int status; + if (waitpid (tid, &status, __WALL) !=3D tid) + error (EXIT_BAD, errno, "waitpid (%d, __WALL)", tid); + if (! WIFSIGNALED (status)) + error (EXIT_BAD, 0, "waitpid (%d, __WALL) status 0x%x is not WIFSIGNAL= ED", + tid, status); +} + +/* Call CB for all threads of process EXCEPT_TID except tid EXCEPT_TID. + EXCEPT_TID does not have to be thread group leader. */ + +static void +core_pattern_attach_other (pid_t except_tid, void (*cb) (pid_t tid)) +{ + char dirname[64]; + int i =3D snprintf (dirname, sizeof (dirname), "/proc/%d/task", except_t= id); + assert (i > 0 && i < (ssize_t) sizeof (dirname) - 1); + DIR *dir =3D opendir (dirname); + if (dir =3D=3D NULL) + error (EXIT_BAD, errno, "opendir (\"%s\")", dirname); + for (;;) + { + errno =3D 0; + struct dirent *dirent =3D readdir (dir); + if (dirent =3D=3D NULL) + { + if (errno !=3D 0) + error (EXIT_BAD, errno, "readdir (\"%s\")", dirname); + break; + } + if (strcmp (dirent->d_name, ".") =3D=3D 0 + || strcmp (dirent->d_name, "..") =3D=3D 0) + continue; + char *end; + errno =3D 0; + long tidl =3D strtol (dirent->d_name, &end, 10); + if (errno !=3D 0) + error (EXIT_BAD, errno, "strtol (\"%s\"->\"%s\")", dirname, + dirent->d_name); + pid_t tid =3D tidl; + if (tidl <=3D 0 || (end && *end) || tid !=3D tidl) + error (EXIT_BAD, 0, "Invalid TID (\"%s\"->\"%s\")", dirname, + dirent->d_name); + if (tid !=3D except_tid) + cb (tid); + } + if (closedir (dir) !=3D 0) + error (EXIT_BAD, errno, "closedir (\"%s\")", dirname); +} + /* Provide PTRACE_ATTACH like operation compatible with Linux core_pattern handler. */ = @@ -489,10 +562,12 @@ core_pattern (void) if (ptrace (PTRACE_SEIZE, pid, NULL, (void *) (uintptr_t) PTRACE_O_TRACE= EXIT) !=3D 0) error (EXIT_BAD, errno, "ptrace (PTRACE_SEIZE, PTRACE_O_TRACEEXIT)"); + if (! show_one_tid) + core_pattern_attach_other (pid, core_pattern_seize_one); if (close (0) !=3D 0) error (EXIT_BAD, errno, "close (0; core file fd)"); int status; - pid_t got =3D waitpid (pid, &status, 0); + pid_t got =3D waitpid (pid, &status, __WALL); if (got =3D=3D -1) error (EXIT_BAD, errno, "waitpid ()"); if (got !=3D pid) @@ -507,6 +582,8 @@ core_pattern (void) "waitpid () returned status 0x%x but (status >> 8)" " =3D=3D (SIGTRAP | (PTRACE_EVENT_EXIT << 8)) was expected", status); + if (! show_one_tid) + core_pattern_attach_other (pid, core_pattern_wait_for_one); } = static error_t @@ -610,10 +687,6 @@ parse_opt (int key, char *arg __attribute__ ((unused)), argp_error (state, N_("One of -p PID or --core COREFILE should be given.")); = - if (opt_core_pattern =3D=3D true && show_one_tid =3D=3D false) - argp_error (state, - N_("--core-pattern requires -1")); - if (pid !=3D 0) { dwfl =3D dwfl_begin (&proc_callbacks); --===============5039985819592309799==--