public inbox for systemtap@sourceware.org
 help / color / mirror / Atom feed
* [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint
       [not found] <4B128ECF.9020906@redhat.com>
@ 2009-12-02 20:42 ` Masami Hiramatsu
  2009-12-03 10:40   ` Ingo Molnar
  2009-12-05  7:18   ` KOSAKI Motohiro
  0 siblings, 2 replies; 14+ messages in thread
From: Masami Hiramatsu @ 2009-12-02 20:42 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, lkml
  Cc: systemtap, DLE, Masami Hiramatsu, Oleg Nesterov, Roland McGrath,
	Jason Baron, Ingo Molnar, Andrew Morton, KOSAKI Motohiro

Add signal coredump tracepoint which shows signal number,
mm->flags, limits, pointer to file structure and core
file name.

This tracepoint requirement comes mainly from the viewpoint of
administrators. Since now we have introduced many coredump
configurations (e.g. dumpable, coredump_filter, core_pattern,
etc) and some of them can be modified by users, it will be hard
to know what was actually dumped (or not dumped) after some
problem happened on the system. For example, a process didn't
generated core, coredump doesn't have some sections, etc.
In those cases, the coredump tracepoint can help us to know
why the core file is so big or small, or not generated, by
recording all configurations for all processes on the system.
That will reduce system-administration cost.

Changes in v2:
 - Fix a bug to clear file local variable when
   call_usermodehelper_pipe() is failed.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
---

 fs/exec.c                     |   23 ++++++++++++----------
 include/trace/events/signal.h |   43 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 2ec6973..be3ec5c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -56,6 +56,7 @@
 #include <linux/fsnotify.h>
 #include <linux/fs_struct.h>
 #include <linux/pipe_fs_i.h>
+#include <trace/events/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1772,6 +1773,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	static atomic_t core_dump_count = ATOMIC_INIT(0);
 	struct coredump_params cprm = {
 		.signr = signr,
+		.file = NULL,
 		.regs = regs,
 		.limit = current->signal->rlim[RLIMIT_CORE].rlim_cur,
 		/*
@@ -1837,9 +1839,6 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	ispipe = format_corename(corename, signr);
 	unlock_kernel();
 
-	if ((!ispipe) && (cprm.limit < binfmt->min_coredump))
-		goto fail_unlock;
-
  	if (ispipe) {
 		if (cprm.limit == 0) {
 			/*
@@ -1860,7 +1859,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 				"Process %d(%s) has RLIMIT_CORE set to 0\n",
 				task_tgid_vnr(current), current->comm);
 			printk(KERN_WARNING "Aborting core\n");
-			goto fail_unlock;
+			goto end_open;
 		}
 
 		dump_count = atomic_inc_return(&core_dump_count);
@@ -1868,14 +1867,14 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 			printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
 			       task_tgid_vnr(current), current->comm);
 			printk(KERN_WARNING "Skipping core dump\n");
-			goto fail_dropcount;
+			goto end_open;
 		}
 
 		helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc);
 		if (!helper_argv) {
 			printk(KERN_WARNING "%s failed to allocate memory\n",
 			       __func__);
-			goto fail_dropcount;
+			goto end_open;
 		}
 
 		cprm.limit = RLIM_INFINITY;
@@ -1883,15 +1882,20 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 		/* SIGPIPE can happen, but it's just never processed */
 		if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
 				&cprm.file)) {
+			cprm.file = NULL;
  			printk(KERN_INFO "Core dump to %s pipe failed\n",
 			       corename);
-			goto fail_dropcount;
+			goto end_open;
  		}
- 	} else
+	} else if (cprm.limit >= binfmt->min_coredump)
 		cprm.file = filp_open(corename,
 				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
 				 0600);
-	if (IS_ERR(cprm.file))
+
+end_open:
+	trace_signal_coredump(&cprm, corename);
+
+	if (!cprm.file || IS_ERR(cprm.file))
 		goto fail_dropcount;
 	inode = cprm.file->f_path.dentry->d_inode;
 	if (inode->i_nlink > 1)
@@ -1928,7 +1932,6 @@ close_fail:
 fail_dropcount:
 	if (dump_count)
 		atomic_dec(&core_dump_count);
-fail_unlock:
 	if (helper_argv)
 		argv_free(helper_argv);
 
diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h
index a510b75..7feba6e 100644
--- a/include/trace/events/signal.h
+++ b/include/trace/events/signal.h
@@ -6,6 +6,7 @@
 
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/binfmts.h>
 #include <linux/tracepoint.h>
 
 #define TP_STORE_SIGINFO(__entry, info)				\
@@ -167,6 +168,48 @@ TRACE_EVENT(signal_lose_info,
 	TP_printk("sig=%d group=%d errno=%d code=%d",
 		  __entry->sig, __entry->group, __entry->errno, __entry->code)
 );
+
+/**
+ * signal_coredump - called when dumping core by signal
+ * @cprm: pointer to struct coredump_params
+ * @core_name: core-name string
+ *
+ * Current process dumps core file to 'core_name' file, because 'cprm->signr'
+ * signal is delivered.
+ * 'cprm->file' is a pointer to file structure of core file, if it is NULL
+ * or an error number(IS_ERR(cprm->file)), coredump should be failed.
+ */
+TRACE_EVENT(signal_coredump,
+
+	TP_PROTO(struct coredump_params *cprm, const char *core_name),
+
+	TP_ARGS(cprm, core_name),
+
+	TP_STRUCT__entry(
+		__field(	int,		sig		)
+		__field(	unsigned long,	limit		)
+		__field(	unsigned long,	flags		)
+		__field(	unsigned long,	file		)
+		__string(	name,		core_name	)
+	),
+
+
+	TP_fast_assign(
+		__entry->sig	= (int)cprm->signr;
+		__entry->limit	= cprm->limit;
+		__entry->flags	= cprm->mm_flags;
+		__entry->file	= (unsigned long)cprm->file;
+		__assign_str(name,	core_name);
+	),
+
+	TP_printk("sig=%d limit=%lu dumpable=%lx dump_filter=%lx "
+		  "file=%lx corename=%s",
+		  __entry->sig, __entry->limit,
+		  __entry->flags & MMF_DUMPABLE_MASK,
+		  (__entry->flags & MMF_DUMP_FILTER_MASK) >>
+		  MMF_DUMP_FILTER_SHIFT,
+		  __entry->file, __get_str(name))
+);
 #endif /* _TRACE_SIGNAL_H */
 
 /* This part must be outside protection */


-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-02 20:42 ` [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint Masami Hiramatsu
@ 2009-12-03 10:40   ` Ingo Molnar
  2009-12-03 11:30     ` Masami Hiramatsu
  2009-12-05  7:18   ` KOSAKI Motohiro
  1 sibling, 1 reply; 14+ messages in thread
From: Ingo Molnar @ 2009-12-03 10:40 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: Andrew Morton, lkml, systemtap, DLE, Oleg Nesterov,
	Roland McGrath, Jason Baron, KOSAKI Motohiro


* Masami Hiramatsu <mhiramat@redhat.com> wrote:

> Add signal coredump tracepoint which shows signal number, mm->flags, 
> limits, pointer to file structure and core file name.

Why is the kernel pointer to the file structure logged? User-space has 
no use for it and the analysis value is low.

	Ingo

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-03 10:40   ` Ingo Molnar
@ 2009-12-03 11:30     ` Masami Hiramatsu
  2009-12-05  7:17       ` Ingo Molnar
  0 siblings, 1 reply; 14+ messages in thread
From: Masami Hiramatsu @ 2009-12-03 11:30 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, lkml, systemtap, DLE, Oleg Nesterov,
	Roland McGrath, Jason Baron, KOSAKI Motohiro

Ingo Molnar wrote:
> 
> * Masami Hiramatsu <mhiramat@redhat.com> wrote:
> 
>> Add signal coredump tracepoint which shows signal number, mm->flags, 
>> limits, pointer to file structure and core file name.
> 
> Why is the kernel pointer to the file structure logged? User-space has 
> no use for it and the analysis value is low.

Ah, if open() or opening pipe fails, it becomes 0 or -ERRNO,
so we can check if there is an error.

Perhaps, we can do below in trace_printk for trace users.
"open %s", (!file || IS_ERR((void *)file)) ? "failed" : "succeeded"

Thank you,

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-03 11:30     ` Masami Hiramatsu
@ 2009-12-05  7:17       ` Ingo Molnar
  2009-12-07 17:19         ` Masami Hiramatsu
  0 siblings, 1 reply; 14+ messages in thread
From: Ingo Molnar @ 2009-12-05  7:17 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: Andrew Morton, lkml, systemtap, DLE, Oleg Nesterov,
	Roland McGrath, Jason Baron, KOSAKI Motohiro


* Masami Hiramatsu <mhiramat@redhat.com> wrote:

> Ingo Molnar wrote:
> > 
> > * Masami Hiramatsu <mhiramat@redhat.com> wrote:
> > 
> >> Add signal coredump tracepoint which shows signal number, mm->flags, 
> >> limits, pointer to file structure and core file name.
> > 
> > Why is the kernel pointer to the file structure logged? User-space has 
> > no use for it and the analysis value is low.
> 
> Ah, if open() or opening pipe fails, it becomes 0 or -ERRNO, so we can 
> check if there is an error.

ok, that wasnt obvious from the patch - worth adding it to the 
changelog.

> Perhaps, we can do below in trace_printk for trace users.
> "open %s", (!file || IS_ERR((void *)file)) ? "failed" : "succeeded"

i'd rather suggest to pass an error code (and keep it 0 if none), 
instead of some ad-hoc string message.

But ... the whole issue of VFS event logging and new tracepoints should 
be approached from a more generic direction i think. Do we want to log 
inode_nr:dev pairs as well? Shouldnt there be a generic event-class 
definition via DECLARE_EVENT_CLASS for file related events, with 'core 
dumped' just being a sub-event-code?

I sense reluctance from the direction of Andrew and disinterest from the 
VFS folks - not a good backdrop in general.

	Ingo

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-02 20:42 ` [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint Masami Hiramatsu
  2009-12-03 10:40   ` Ingo Molnar
@ 2009-12-05  7:18   ` KOSAKI Motohiro
  2009-12-07 15:25     ` Masami Hiramatsu
  1 sibling, 1 reply; 14+ messages in thread
From: KOSAKI Motohiro @ 2009-12-05  7:18 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: Ingo Molnar, Andrew Morton, lkml, systemtap, DLE, Oleg Nesterov,
	Roland McGrath, Jason Baron

2009/12/3 Masami Hiramatsu <mhiramat@redhat.com>:
> Add signal coredump tracepoint which shows signal number,
> mm->flags, limits, pointer to file structure and core
> file name.
>
> This tracepoint requirement comes mainly from the viewpoint of
> administrators. Since now we have introduced many coredump
> configurations (e.g. dumpable, coredump_filter, core_pattern,
> etc) and some of them can be modified by users, it will be hard
> to know what was actually dumped (or not dumped) after some
> problem happened on the system. For example, a process didn't
> generated core, coredump doesn't have some sections, etc.
> In those cases, the coredump tracepoint can help us to know
> why the core file is so big or small, or not generated, by
> recording all configurations for all processes on the system.
> That will reduce system-administration cost.

AFAIK, not-dumped case is important than dump successful case.
IOW, admin need to know why the crashed process was not dumped.

This tracepoint doesn't cover all failure case. especially
binfmt->core_dump() et.al.
IOW, this tracepoint seems too specialized piped-coredump feature.

What do you think this tracepoint's use case?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-05  7:18   ` KOSAKI Motohiro
@ 2009-12-07 15:25     ` Masami Hiramatsu
  2009-12-08  1:51       ` KOSAKI Motohiro
  0 siblings, 1 reply; 14+ messages in thread
From: Masami Hiramatsu @ 2009-12-07 15:25 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Ingo Molnar, Andrew Morton, lkml, systemtap, DLE, Oleg Nesterov,
	Roland McGrath, Jason Baron

KOSAKI Motohiro wrote:
> 2009/12/3 Masami Hiramatsu<mhiramat@redhat.com>:
>> Add signal coredump tracepoint which shows signal number,
>> mm->flags, limits, pointer to file structure and core
>> file name.
>>
>> This tracepoint requirement comes mainly from the viewpoint of
>> administrators. Since now we have introduced many coredump
>> configurations (e.g. dumpable, coredump_filter, core_pattern,
>> etc) and some of them can be modified by users, it will be hard
>> to know what was actually dumped (or not dumped) after some
>> problem happened on the system. For example, a process didn't
>> generated core, coredump doesn't have some sections, etc.
>> In those cases, the coredump tracepoint can help us to know
>> why the core file is so big or small, or not generated, by
>> recording all configurations for all processes on the system.
>> That will reduce system-administration cost.
>
> AFAIK, not-dumped case is important than dump successful case.
> IOW, admin need to know why the crashed process was not dumped.

Certainly, failure cases are important, but also, the cases
that dumped-core doesn't or does include some sections
are also important.

> This tracepoint doesn't cover all failure case. especially
> binfmt->core_dump() et.al.
> IOW, this tracepoint seems too specialized piped-coredump feature.

Hmm, so would you mean that after calling binfmt->core_dump()
is better place?

> What do you think this tracepoint's use case?

Frankly to say, our first attempt was tracing mm->flags because
it can be changed by users without asking, and they sometimes
ask why core is not perfect or why core file is so big.

Perhaps, covering all of those failure cases and succeed cases,
gives better information for them. In that case, we might better
tweak execution(goto) path to leave some error code on retval.

e.g.
         if (IS_ERR(file))
                 goto fail_dropcount;
+	retval = -EBADF;
         inode = file->f_path.dentry->d_inode;
         if (inode->i_nlink > 1)
                 goto close_fail;        /* multiple links - don't dump */
         if (!ispipe && d_unhashed(file->f_path.dentry))
                 goto close_fail;

         /* AK: actually i see no reason to not allow this for named pipes etc.,
            but keep the previous behaviour for now. */
         if (!ispipe && !S_ISREG(inode->i_mode))
                 goto close_fail;
         /*
          * Dont allow local users get cute and trick others to coredump
          * into their pre-created files:
          */
+	retval = -EPERM;
         if (inode->i_uid != current_fsuid())
                 goto close_fail;
+	retval = -EINVAL;
         if (!file->f_op)
                 goto close_fail;
         if (!file->f_op->write)
                 goto close_fail;
+	retval = -EEXIST;
         if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0)
                 goto close_fail;


Thank you,

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-05  7:17       ` Ingo Molnar
@ 2009-12-07 17:19         ` Masami Hiramatsu
  0 siblings, 0 replies; 14+ messages in thread
From: Masami Hiramatsu @ 2009-12-07 17:19 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, lkml, systemtap, DLE, Oleg Nesterov,
	Roland McGrath, Jason Baron, KOSAKI Motohiro

Ingo Molnar wrote:
>
> * Masami Hiramatsu<mhiramat@redhat.com>  wrote:
>
>> Ingo Molnar wrote:
>>>
>>> * Masami Hiramatsu<mhiramat@redhat.com>  wrote:
>>>
>>>> Add signal coredump tracepoint which shows signal number, mm->flags,
>>>> limits, pointer to file structure and core file name.
>>>
>>> Why is the kernel pointer to the file structure logged? User-space has
>>> no use for it and the analysis value is low.
>>
>> Ah, if open() or opening pipe fails, it becomes 0 or -ERRNO, so we can
>> check if there is an error.
>
> ok, that wasnt obvious from the patch - worth adding it to the
> changelog.

OK.

>> Perhaps, we can do below in trace_printk for trace users.
>> "open %s", (!file || IS_ERR((void *)file)) ? "failed" : "succeeded"
>
> i'd rather suggest to pass an error code (and keep it 0 if none),
> instead of some ad-hoc string message.

Sure. Or, perhaps, is it enough to show error code? (as block_rq_with_error did)

> But ... the whole issue of VFS event logging and new tracepoints should
> be approached from a more generic direction i think. Do we want to log
> inode_nr:dev pairs as well? Shouldnt there be a generic event-class
> definition via DECLARE_EVENT_CLASS for file related events, with 'core
> dumped' just being a sub-event-code?

Hmm, would you mean that coredump event should be a VFS event? or
handling file open errors should be a VFS event?
There are many other special reasons of failing coredump, as I discussed
with Kosaki-san. So, I think coredump event should be different from VFS
events.

Of course, Failure of file opening event should be handled in VFS events
if possible. In that case, we just need to trace coredump event and
VFS open event, and matching file descriptor or something like that.

> I sense reluctance from the direction of Andrew and disinterest from the
> VFS folks - not a good backdrop in general.
>
> 	Ingo

Thank you,

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-07 15:25     ` Masami Hiramatsu
@ 2009-12-08  1:51       ` KOSAKI Motohiro
  2009-12-08 20:35         ` [PATCH v3] " Masami Hiramatsu
  0 siblings, 1 reply; 14+ messages in thread
From: KOSAKI Motohiro @ 2009-12-08  1:51 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: kosaki.motohiro, Ingo Molnar, Andrew Morton, lkml, systemtap,
	DLE, Oleg Nesterov, Roland McGrath, Jason Baron

> KOSAKI Motohiro wrote:
> > 2009/12/3 Masami Hiramatsu<mhiramat@redhat.com>:
> >> Add signal coredump tracepoint which shows signal number,
> >> mm->flags, limits, pointer to file structure and core
> >> file name.
> >>
> >> This tracepoint requirement comes mainly from the viewpoint of
> >> administrators. Since now we have introduced many coredump
> >> configurations (e.g. dumpable, coredump_filter, core_pattern,
> >> etc) and some of them can be modified by users, it will be hard
> >> to know what was actually dumped (or not dumped) after some
> >> problem happened on the system. For example, a process didn't
> >> generated core, coredump doesn't have some sections, etc.
> >> In those cases, the coredump tracepoint can help us to know
> >> why the core file is so big or small, or not generated, by
> >> recording all configurations for all processes on the system.
> >> That will reduce system-administration cost.
> >
> > AFAIK, not-dumped case is important than dump successful case.
> > IOW, admin need to know why the crashed process was not dumped.
> 
> Certainly, failure cases are important, but also, the cases
> that dumped-core doesn't or does include some sections
> are also important.

correct.

> > This tracepoint doesn't cover all failure case. especially
> > binfmt->core_dump() et.al.
> > IOW, this tracepoint seems too specialized piped-coredump feature.
> 
> Hmm, so would you mean that after calling binfmt->core_dump()
> is better place?

I think your following use-case desired so. if you have unwritten reason, please correct me.

	For example, a process didn't generated core, coredump doesn't have
	some sections, etc.


> > What do you think this tracepoint's use case?
> 
> Frankly to say, our first attempt was tracing mm->flags because
> it can be changed by users without asking, and they sometimes
> ask why core is not perfect or why core file is so big.
> 
> Perhaps, covering all of those failure cases and succeed cases,
> gives better information for them. In that case, we might better
> tweak execution(goto) path to leave some error code on retval.

This is enough acceptable to me.

> 
> e.g.
>          if (IS_ERR(file))
>                  goto fail_dropcount;
> +	retval = -EBADF;
>          inode = file->f_path.dentry->d_inode;
>          if (inode->i_nlink > 1)
>                  goto close_fail;        /* multiple links - don't dump */
>          if (!ispipe && d_unhashed(file->f_path.dentry))
>                  goto close_fail;
> 
>          /* AK: actually i see no reason to not allow this for named pipes etc.,
>             but keep the previous behaviour for now. */
>          if (!ispipe && !S_ISREG(inode->i_mode))
>                  goto close_fail;
>          /*
>           * Dont allow local users get cute and trick others to coredump
>           * into their pre-created files:
>           */
> +	retval = -EPERM;
>          if (inode->i_uid != current_fsuid())
>                  goto close_fail;
> +	retval = -EINVAL;
>          if (!file->f_op)
>                  goto close_fail;
>          if (!file->f_op->write)
>                  goto close_fail;
> +	retval = -EEXIST;
>          if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0)
>                  goto close_fail;
> 
> 
> Thank you,
> 
> -- 
> Masami Hiramatsu
> 
> Software Engineer
> Hitachi Computer Products (America), Inc.
> Software Solutions Division
> 
> e-mail: mhiramat@redhat.com
> 



^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v3] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-08  1:51       ` KOSAKI Motohiro
@ 2009-12-08 20:35         ` Masami Hiramatsu
  2009-12-09  5:34           ` KOSAKI Motohiro
  0 siblings, 1 reply; 14+ messages in thread
From: Masami Hiramatsu @ 2009-12-08 20:35 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, lkml, KOSAKI Motohiro
  Cc: systemtap, DLE, Masami Hiramatsu, Oleg Nesterov, Roland McGrath,
	Jason Baron, Ingo Molnar, Andrew Morton, KOSAKI Motohiro

Add signal coredump tracepoint which shows signal number,
mm->flags, limits, the result of coredump, and core
file name.

This tracepoint requirement comes mainly from the viewpoint of
administrators. Since now we have introduced many coredump
configurations (e.g. dumpable, coredump_filter, core_pattern,
etc) and some of them can be modified by users, it will be hard
to know what was actually dumped (or not dumped) after some
problem happened on the system. For example, a process didn't
generated core, coredump doesn't have some sections, etc.
In those cases, the coredump tracepoint can help us to know
why the core file is so big or small, or not generated, by
recording all configurations for all processes on the system.
That will reduce system-administration cost.

Changes in v3:
 - Move tracepoint at the end of do_coredump() for tracing
   the result of coredump.
 - Use retval to record error-code at every failure points
   for passing the result of coredump to tracepoint.
 - Trace retval instead of cprm->file for recording the
   result of coredump.

Changes in v2:
 - Fix a bug to clear file local variable when
   call_usermodehelper_pipe() is failed.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
---

 fs/exec.c                     |   58 ++++++++++++++++++++++++++++++-----------
 include/trace/events/signal.h |   48 ++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+), 16 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 0a5d944..d67ed5a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,6 +55,7 @@
 #include <linux/fsnotify.h>
 #include <linux/fs_struct.h>
 #include <linux/pipe_fs_i.h>
+#include <trace/events/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1753,7 +1754,7 @@ static void wait_for_dump_helpers(struct file *file)
 void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 {
 	struct core_state core_state;
-	char corename[CORENAME_MAX_SIZE + 1];
+	char corename[CORENAME_MAX_SIZE + 1] = "";
 	struct mm_struct *mm = current->mm;
 	struct linux_binfmt * binfmt;
 	struct inode * inode;
@@ -1768,6 +1769,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	static atomic_t core_dump_count = ATOMIC_INIT(0);
 	struct coredump_params cprm = {
 		.signr = signr,
+		.file = NULL,
 		.regs = regs,
 		.limit = current->signal->rlim[RLIMIT_CORE].rlim_cur,
 		/*
@@ -1781,8 +1783,10 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	audit_core_dumps(signr);
 
 	binfmt = mm->binfmt;
-	if (!binfmt || !binfmt->core_dump)
+	if (!binfmt || !binfmt->core_dump) {
+		retval = -ENOSYS;
 		goto fail;
+	}
 
 	cred = prepare_creds();
 	if (!cred) {
@@ -1795,6 +1799,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	 * If another thread got here first, or we are not dumpable, bail out.
 	 */
 	if (mm->core_state || !__get_dumpable(cprm.mm_flags)) {
+		/* This is not an error. retval should be 0 */
 		up_write(&mm->mmap_sem);
 		put_cred(cred);
 		goto fail;
@@ -1833,11 +1838,14 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	ispipe = format_corename(corename, signr);
 	unlock_kernel();
 
-	if ((!ispipe) && (cprm.limit < binfmt->min_coredump))
+	if ((!ispipe) && (cprm.limit < binfmt->min_coredump)) {
+		retval = -EFBIG;
 		goto fail_unlock;
+	}
 
  	if (ispipe) {
 		if (cprm.limit == 0) {
+			retval = -EINVAL;
 			/*
 			 * Normally core limits are irrelevant to pipes, since
 			 * we're not writing to the file system, but we use
@@ -1861,6 +1869,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 
 		dump_count = atomic_inc_return(&core_dump_count);
 		if (core_pipe_limit && (core_pipe_limit < dump_count)) {
+			retval = -EFBIG;
 			printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
 			       task_tgid_vnr(current), current->comm);
 			printk(KERN_WARNING "Skipping core dump\n");
@@ -1869,6 +1878,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 
 		helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc);
 		if (!helper_argv) {
+			retval = -ENOMEM;
 			printk(KERN_WARNING "%s failed to allocate memory\n",
 			       __func__);
 			goto fail_dropcount;
@@ -1877,8 +1887,9 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 		cprm.limit = RLIM_INFINITY;
 
 		/* SIGPIPE can happen, but it's just never processed */
-		if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
-				&cprm.file)) {
+		retval = call_usermodehelper_pipe(helper_argv[0], helper_argv,
+						  NULL, &cprm.file);
+		if (retval < 0) {
  			printk(KERN_INFO "Core dump to %s pipe failed\n",
 			       corename);
 			goto fail_dropcount;
@@ -1887,31 +1898,43 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 		cprm.file = filp_open(corename,
 				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
 				 0600);
-	if (IS_ERR(cprm.file))
+	if (IS_ERR(cprm.file)) {
+		retval = (int)PTR_ERR(cprm.file);
 		goto fail_dropcount;
+	}
 	inode = cprm.file->f_path.dentry->d_inode;
-	if (inode->i_nlink > 1)
+	if (inode->i_nlink > 1) {
+		retval = -EMLINK;
 		goto close_fail;	/* multiple links - don't dump */
-	if (!ispipe && d_unhashed(cprm.file->f_path.dentry))
+	}
+	if (!ispipe && d_unhashed(cprm.file->f_path.dentry)) {
+		retval = -EBADF;
 		goto close_fail;
+	}
 
 	/* AK: actually i see no reason to not allow this for named pipes etc.,
 	   but keep the previous behaviour for now. */
-	if (!ispipe && !S_ISREG(inode->i_mode))
+	if (!ispipe && !S_ISREG(inode->i_mode)) {
+		retval = -EBADF;
 		goto close_fail;
+	}
 	/*
 	 * Dont allow local users get cute and trick others to coredump
 	 * into their pre-created files:
 	 */
-	if (inode->i_uid != current_fsuid())
+	if (inode->i_uid != current_fsuid()) {
+		retval = -EPERM;
 		goto close_fail;
-	if (!cprm.file->f_op)
-		goto close_fail;
-	if (!cprm.file->f_op->write)
-		goto close_fail;
-	if (!ispipe &&
-	    do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file) != 0)
+	}
+	if (!cprm.file->f_op || !cprm.file->f_op->write) {
+		retval = -EINVAL;
 		goto close_fail;
+	}
+	if (!ispipe) {
+		retval = do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file);
+		if (retval != 0)
+			goto close_fail;
+	}
 
 	retval = binfmt->core_dump(&cprm);
 
@@ -1932,5 +1955,8 @@ fail_unlock:
 	put_cred(cred);
 	coredump_finish(mm);
 fail:
+	/* Trace coredump parameters and return value */
+	trace_signal_coredump(&cprm, corename, retval);
+
 	return;
 }
diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h
index a510b75..42608f0 100644
--- a/include/trace/events/signal.h
+++ b/include/trace/events/signal.h
@@ -4,8 +4,10 @@
 #if !defined(_TRACE_SIGNAL_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_SIGNAL_H
 
+#include <linux/err.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/binfmts.h>
 #include <linux/tracepoint.h>
 
 #define TP_STORE_SIGINFO(__entry, info)				\
@@ -167,6 +169,52 @@ TRACE_EVENT(signal_lose_info,
 	TP_printk("sig=%d group=%d errno=%d code=%d",
 		  __entry->sig, __entry->group, __entry->errno, __entry->code)
 );
+
+/**
+ * signal_coredump - called when dumping core by signal
+ * @cprm: pointer to struct coredump_params
+ * @core_name: core-name string
+ * @retval: return value of binfmt->coredump or error-code
+ *
+ * Current process dumps core file to 'core_name' file, because 'cprm->signr'
+ * signal is delivered.
+ * 'retval' is an error code or 0/1. retval == 1 means the core file was
+ * dumped successfully and retval == 0 means binfmt->coredump failed to dump.
+ * If retval < 0, this means do_coredump() failed to dump core file before
+ * calling binfmt->coredump.
+ */
+TRACE_EVENT(signal_coredump,
+
+	TP_PROTO(struct coredump_params *cprm, const char *core_name,
+		 int retval),
+
+	TP_ARGS(cprm, core_name, retval),
+
+	TP_STRUCT__entry(
+		__field(	int,		sig		)
+		__field(	unsigned long,	limit		)
+		__field(	unsigned long,	flags		)
+		__field(	int,		retval		)
+		__string(	name,		core_name	)
+	),
+
+
+	TP_fast_assign(
+		__entry->sig	= (int)cprm->signr;
+		__entry->limit	= cprm->limit;
+		__entry->flags	= cprm->mm_flags;
+		__entry->retval	= retval;
+		__assign_str(name,	core_name);
+	),
+
+	TP_printk("sig=%d limit=%lu dumpable=0x%lx dump_filter=0x%lx "
+		  "corename=\"%s\" retval=%d",
+		  __entry->sig, __entry->limit,
+		  __entry->flags & MMF_DUMPABLE_MASK,
+		  (__entry->flags & MMF_DUMP_FILTER_MASK) >>
+		  MMF_DUMP_FILTER_SHIFT,
+		  __get_str(name), __entry->retval)
+);
 #endif /* _TRACE_SIGNAL_H */
 
 /* This part must be outside protection */


-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-08 20:35         ` [PATCH v3] " Masami Hiramatsu
@ 2009-12-09  5:34           ` KOSAKI Motohiro
  2009-12-09 16:07             ` Masami Hiramatsu
  0 siblings, 1 reply; 14+ messages in thread
From: KOSAKI Motohiro @ 2009-12-09  5:34 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: kosaki.motohiro, Ingo Molnar, Andrew Morton, lkml, systemtap,
	DLE, Oleg Nesterov, Roland McGrath, Jason Baron

> +	TP_fast_assign(
> +		__entry->sig	= (int)cprm->signr;
> +		__entry->limit	= cprm->limit;
> +		__entry->flags	= cprm->mm_flags;
> +		__entry->retval	= retval;
> +		__assign_str(name,	core_name);
> +	),
> +
> +	TP_printk("sig=%d limit=%lu dumpable=0x%lx dump_filter=0x%lx "
> +		  "corename=\"%s\" retval=%d",
> +		  __entry->sig, __entry->limit,
> +		  __entry->flags & MMF_DUMPABLE_MASK,
> +		  (__entry->flags & MMF_DUMP_FILTER_MASK) >>
> +		  MMF_DUMP_FILTER_SHIFT,
> +		  __get_str(name), __entry->retval)
> +);
>  #endif /* _TRACE_SIGNAL_H */

I don't think "limit" is userfriendly name, core_limit or core_size_limit is better?
plus, we have core_pipe_limit sysctl too. (it's similar but different concept limit).

other parts looks good to me.



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-09  5:34           ` KOSAKI Motohiro
@ 2009-12-09 16:07             ` Masami Hiramatsu
  2009-12-09 16:16               ` Masami Hiramatsu
  0 siblings, 1 reply; 14+ messages in thread
From: Masami Hiramatsu @ 2009-12-09 16:07 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Ingo Molnar, Andrew Morton, lkml, systemtap, DLE, Oleg Nesterov,
	Roland McGrath, Jason Baron



KOSAKI Motohiro wrote:
>> +	TP_fast_assign(
>> +		__entry->sig	= (int)cprm->signr;
>> +		__entry->limit	= cprm->limit;
>> +		__entry->flags	= cprm->mm_flags;
>> +		__entry->retval	= retval;
>> +		__assign_str(name,	core_name);
>> +	),
>> +
>> +	TP_printk("sig=%d limit=%lu dumpable=0x%lx dump_filter=0x%lx "
>> +		  "corename=\"%s\" retval=%d",
>> +		  __entry->sig, __entry->limit,
>> +		  __entry->flags&  MMF_DUMPABLE_MASK,
>> +		  (__entry->flags&  MMF_DUMP_FILTER_MASK)>>
>> +		  MMF_DUMP_FILTER_SHIFT,
>> +		  __get_str(name), __entry->retval)
>> +);
>>   #endif /* _TRACE_SIGNAL_H */
>
> I don't think "limit" is userfriendly name, core_limit or core_size_limit is better?
> plus, we have core_pipe_limit sysctl too. (it's similar but different concept limit).

Ah, I missed it. OK, so I'll rename core_limit and add core_pipe_limit.
Thank you for pointed it out :-)

>
> other parts looks good to me.
>
>
>

Thanks!

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-09 16:07             ` Masami Hiramatsu
@ 2009-12-09 16:16               ` Masami Hiramatsu
  2009-12-09 20:34                 ` [PATCH v4] " Masami Hiramatsu
  0 siblings, 1 reply; 14+ messages in thread
From: Masami Hiramatsu @ 2009-12-09 16:16 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Ingo Molnar, Andrew Morton, lkml, systemtap, DLE, Oleg Nesterov,
	Roland McGrath, Jason Baron

Masami Hiramatsu wrote:
>
>
> KOSAKI Motohiro wrote:
>>> +	TP_fast_assign(
>>> +		__entry->sig	= (int)cprm->signr;
>>> +		__entry->limit	= cprm->limit;
>>> +		__entry->flags	= cprm->mm_flags;
>>> +		__entry->retval	= retval;
>>> +		__assign_str(name,	core_name);
>>> +	),
>>> +
>>> +	TP_printk("sig=%d limit=%lu dumpable=0x%lx dump_filter=0x%lx "
>>> +		  "corename=\"%s\" retval=%d",
>>> +		  __entry->sig, __entry->limit,
>>> +		  __entry->flags&   MMF_DUMPABLE_MASK,
>>> +		  (__entry->flags&   MMF_DUMP_FILTER_MASK)>>
>>> +		  MMF_DUMP_FILTER_SHIFT,
>>> +		  __get_str(name), __entry->retval)
>>> +);
>>>    #endif /* _TRACE_SIGNAL_H */
>>
>> I don't think "limit" is userfriendly name, core_limit or core_size_limit is better?
>> plus, we have core_pipe_limit sysctl too. (it's similar but different concept limit).
>
> Ah, I missed it. OK, so I'll rename core_limit and add core_pipe_limit.

Hmm, perhaps, would we need a pair of core_pipe_limit and dump_count?
because it limits the number of concurrently dump-to-pipe and the number
is stored on dump_count.
Or, maybe it is enough to trace current parameters, because if we hit the
core_pipe_limit, we can see -EFBIG at retval parameter.


> Thank you for pointed it out :-)
>
>>
>> other parts looks good to me.
>>
>>
>>
>
> Thanks!
>

-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v4] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-09 16:16               ` Masami Hiramatsu
@ 2009-12-09 20:34                 ` Masami Hiramatsu
  2009-12-10  0:09                   ` KOSAKI Motohiro
  0 siblings, 1 reply; 14+ messages in thread
From: Masami Hiramatsu @ 2009-12-09 20:34 UTC (permalink / raw)
  To: Ingo Molnar, Andrew Morton, lkml, KOSAKI Motohiro
  Cc: systemtap, DLE, Masami Hiramatsu, Oleg Nesterov, Roland McGrath,
	Jason Baron, Ingo Molnar, Andrew Morton, KOSAKI Motohiro

Add signal coredump tracepoint which shows signal number,
mm->flags, core file size limitation, the result of
coredump, and core file name.

This tracepoint requirement comes mainly from the viewpoint of
administrators. Since now we have introduced many coredump
configurations (e.g. dumpable, coredump_filter, core_pattern,
etc) and some of them can be modified by users, it will be hard
to know what was actually dumped (or not dumped) after some
problem happened on the system. For example, a process didn't
generated core, coredump doesn't have some sections, etc.
In those cases, the coredump tracepoint can help us to know
why the core file is so big or small, or not generated, by
recording all configurations for all processes on the system.
That will reduce system-administration cost.

Changes in v4:
 - Rename limit trace-argument to core_size_limit, because
   of user friendly output.

Changes in v3:
 - Move tracepoint at the end of do_coredump() for tracing
   the result of coredump.
 - Use retval to record error-code at every failure points
   for passing the result of coredump to tracepoint.
 - Trace retval instead of cprm->file for recording the
   result of coredump.

Changes in v2:
 - Fix a bug to clear file local variable when
   call_usermodehelper_pipe() is failed.

Signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
---

 fs/exec.c                     |   58 ++++++++++++++++++++++++++++++-----------
 include/trace/events/signal.h |   48 ++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+), 16 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 0a5d944..d67ed5a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,6 +55,7 @@
 #include <linux/fsnotify.h>
 #include <linux/fs_struct.h>
 #include <linux/pipe_fs_i.h>
+#include <trace/events/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1753,7 +1754,7 @@ static void wait_for_dump_helpers(struct file *file)
 void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 {
 	struct core_state core_state;
-	char corename[CORENAME_MAX_SIZE + 1];
+	char corename[CORENAME_MAX_SIZE + 1] = "";
 	struct mm_struct *mm = current->mm;
 	struct linux_binfmt * binfmt;
 	struct inode * inode;
@@ -1768,6 +1769,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	static atomic_t core_dump_count = ATOMIC_INIT(0);
 	struct coredump_params cprm = {
 		.signr = signr,
+		.file = NULL,
 		.regs = regs,
 		.limit = current->signal->rlim[RLIMIT_CORE].rlim_cur,
 		/*
@@ -1781,8 +1783,10 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	audit_core_dumps(signr);
 
 	binfmt = mm->binfmt;
-	if (!binfmt || !binfmt->core_dump)
+	if (!binfmt || !binfmt->core_dump) {
+		retval = -ENOSYS;
 		goto fail;
+	}
 
 	cred = prepare_creds();
 	if (!cred) {
@@ -1795,6 +1799,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	 * If another thread got here first, or we are not dumpable, bail out.
 	 */
 	if (mm->core_state || !__get_dumpable(cprm.mm_flags)) {
+		/* This is not an error. retval should be 0 */
 		up_write(&mm->mmap_sem);
 		put_cred(cred);
 		goto fail;
@@ -1833,11 +1838,14 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	ispipe = format_corename(corename, signr);
 	unlock_kernel();
 
-	if ((!ispipe) && (cprm.limit < binfmt->min_coredump))
+	if ((!ispipe) && (cprm.limit < binfmt->min_coredump)) {
+		retval = -EFBIG;
 		goto fail_unlock;
+	}
 
  	if (ispipe) {
 		if (cprm.limit == 0) {
+			retval = -EINVAL;
 			/*
 			 * Normally core limits are irrelevant to pipes, since
 			 * we're not writing to the file system, but we use
@@ -1861,6 +1869,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 
 		dump_count = atomic_inc_return(&core_dump_count);
 		if (core_pipe_limit && (core_pipe_limit < dump_count)) {
+			retval = -EFBIG;
 			printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
 			       task_tgid_vnr(current), current->comm);
 			printk(KERN_WARNING "Skipping core dump\n");
@@ -1869,6 +1878,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 
 		helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc);
 		if (!helper_argv) {
+			retval = -ENOMEM;
 			printk(KERN_WARNING "%s failed to allocate memory\n",
 			       __func__);
 			goto fail_dropcount;
@@ -1877,8 +1887,9 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 		cprm.limit = RLIM_INFINITY;
 
 		/* SIGPIPE can happen, but it's just never processed */
-		if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
-				&cprm.file)) {
+		retval = call_usermodehelper_pipe(helper_argv[0], helper_argv,
+						  NULL, &cprm.file);
+		if (retval < 0) {
  			printk(KERN_INFO "Core dump to %s pipe failed\n",
 			       corename);
 			goto fail_dropcount;
@@ -1887,31 +1898,43 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 		cprm.file = filp_open(corename,
 				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
 				 0600);
-	if (IS_ERR(cprm.file))
+	if (IS_ERR(cprm.file)) {
+		retval = (int)PTR_ERR(cprm.file);
 		goto fail_dropcount;
+	}
 	inode = cprm.file->f_path.dentry->d_inode;
-	if (inode->i_nlink > 1)
+	if (inode->i_nlink > 1) {
+		retval = -EMLINK;
 		goto close_fail;	/* multiple links - don't dump */
-	if (!ispipe && d_unhashed(cprm.file->f_path.dentry))
+	}
+	if (!ispipe && d_unhashed(cprm.file->f_path.dentry)) {
+		retval = -EBADF;
 		goto close_fail;
+	}
 
 	/* AK: actually i see no reason to not allow this for named pipes etc.,
 	   but keep the previous behaviour for now. */
-	if (!ispipe && !S_ISREG(inode->i_mode))
+	if (!ispipe && !S_ISREG(inode->i_mode)) {
+		retval = -EBADF;
 		goto close_fail;
+	}
 	/*
 	 * Dont allow local users get cute and trick others to coredump
 	 * into their pre-created files:
 	 */
-	if (inode->i_uid != current_fsuid())
+	if (inode->i_uid != current_fsuid()) {
+		retval = -EPERM;
 		goto close_fail;
-	if (!cprm.file->f_op)
-		goto close_fail;
-	if (!cprm.file->f_op->write)
-		goto close_fail;
-	if (!ispipe &&
-	    do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file) != 0)
+	}
+	if (!cprm.file->f_op || !cprm.file->f_op->write) {
+		retval = -EINVAL;
 		goto close_fail;
+	}
+	if (!ispipe) {
+		retval = do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file);
+		if (retval != 0)
+			goto close_fail;
+	}
 
 	retval = binfmt->core_dump(&cprm);
 
@@ -1932,5 +1955,8 @@ fail_unlock:
 	put_cred(cred);
 	coredump_finish(mm);
 fail:
+	/* Trace coredump parameters and return value */
+	trace_signal_coredump(&cprm, corename, retval);
+
 	return;
 }
diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h
index a510b75..6dbc856 100644
--- a/include/trace/events/signal.h
+++ b/include/trace/events/signal.h
@@ -4,8 +4,10 @@
 #if !defined(_TRACE_SIGNAL_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_SIGNAL_H
 
+#include <linux/err.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/binfmts.h>
 #include <linux/tracepoint.h>
 
 #define TP_STORE_SIGINFO(__entry, info)				\
@@ -167,6 +169,52 @@ TRACE_EVENT(signal_lose_info,
 	TP_printk("sig=%d group=%d errno=%d code=%d",
 		  __entry->sig, __entry->group, __entry->errno, __entry->code)
 );
+
+/**
+ * signal_coredump - called when dumping core by signal
+ * @cprm: pointer to struct coredump_params
+ * @core_name: core-name string
+ * @retval: return value of binfmt->coredump or error-code
+ *
+ * Current process dumps core file to 'core_name' file, because 'cprm->signr'
+ * signal is delivered.
+ * 'retval' is an error code or 0/1. retval == 1 means the core file was
+ * dumped successfully and retval == 0 means binfmt->coredump failed to dump.
+ * If retval < 0, this means do_coredump() failed to dump core file before
+ * calling binfmt->coredump.
+ */
+TRACE_EVENT(signal_coredump,
+
+	TP_PROTO(struct coredump_params *cprm, const char *core_name,
+		 int retval),
+
+	TP_ARGS(cprm, core_name, retval),
+
+	TP_STRUCT__entry(
+		__field(	int,		sig		)
+		__field(	unsigned long,	core_size_limit	)
+		__field(	unsigned long,	flags		)
+		__field(	int,		retval		)
+		__string(	name,		core_name	)
+	),
+
+
+	TP_fast_assign(
+		__entry->sig			= (int)cprm->signr;
+		__entry->core_size_limit	= cprm->limit;
+		__entry->flags			= cprm->mm_flags;
+		__entry->retval			= retval;
+		__assign_str(name,		core_name);
+	),
+
+	TP_printk("sig=%d core_size_limit=%lu dumpable=0x%lx dump_filter=0x%lx"
+		  " corename=\"%s\" retval=%d",
+		  __entry->sig, __entry->core_size_limit,
+		  __entry->flags & MMF_DUMPABLE_MASK,
+		  (__entry->flags & MMF_DUMP_FILTER_MASK) >>
+		  MMF_DUMP_FILTER_SHIFT,
+		  __get_str(name), __entry->retval)
+);
 #endif /* _TRACE_SIGNAL_H */
 
 /* This part must be outside protection */


-- 
Masami Hiramatsu

Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division

e-mail: mhiramat@redhat.com

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v4] [RFC] tracepoint: Add signal coredump tracepoint
  2009-12-09 20:34                 ` [PATCH v4] " Masami Hiramatsu
@ 2009-12-10  0:09                   ` KOSAKI Motohiro
  0 siblings, 0 replies; 14+ messages in thread
From: KOSAKI Motohiro @ 2009-12-10  0:09 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: kosaki.motohiro, Ingo Molnar, Andrew Morton, lkml, systemtap,
	DLE, Oleg Nesterov, Roland McGrath, Jason Baron

> Add signal coredump tracepoint which shows signal number,
> mm->flags, core file size limitation, the result of
> coredump, and core file name.
> 
> This tracepoint requirement comes mainly from the viewpoint of
> administrators. Since now we have introduced many coredump
> configurations (e.g. dumpable, coredump_filter, core_pattern,
> etc) and some of them can be modified by users, it will be hard
> to know what was actually dumped (or not dumped) after some
> problem happened on the system. For example, a process didn't
> generated core, coredump doesn't have some sections, etc.
> In those cases, the coredump tracepoint can help us to know
> why the core file is so big or small, or not generated, by
> recording all configurations for all processes on the system.
> That will reduce system-administration cost.
> 
> Changes in v4:
>  - Rename limit trace-argument to core_size_limit, because
>    of user friendly output.

Looks good to me.


^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2009-12-10  0:09 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <4B128ECF.9020906@redhat.com>
2009-12-02 20:42 ` [PATCH v2] [RFC] tracepoint: Add signal coredump tracepoint Masami Hiramatsu
2009-12-03 10:40   ` Ingo Molnar
2009-12-03 11:30     ` Masami Hiramatsu
2009-12-05  7:17       ` Ingo Molnar
2009-12-07 17:19         ` Masami Hiramatsu
2009-12-05  7:18   ` KOSAKI Motohiro
2009-12-07 15:25     ` Masami Hiramatsu
2009-12-08  1:51       ` KOSAKI Motohiro
2009-12-08 20:35         ` [PATCH v3] " Masami Hiramatsu
2009-12-09  5:34           ` KOSAKI Motohiro
2009-12-09 16:07             ` Masami Hiramatsu
2009-12-09 16:16               ` Masami Hiramatsu
2009-12-09 20:34                 ` [PATCH v4] " Masami Hiramatsu
2009-12-10  0:09                   ` KOSAKI Motohiro

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).