public inbox for systemtap@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] Linux Kernel Markers 0.7 for 2.6.17 (with type checking!)
@ 2006-09-22  0:01 Mathieu Desnoyers
       [not found] ` <451331A1.3020601@goop.org>
       [not found] ` <1160189237.21768.47.camel@localhost.localdomain>
  0 siblings, 2 replies; 6+ messages in thread
From: Mathieu Desnoyers @ 2006-09-22  0:01 UTC (permalink / raw)
  To: Martin Bligh, Frank Ch. Eigler, Masami Hiramatsu, prasanna,
	Andrew Morton, Ingo Molnar, Mathieu Desnoyers, Paul Mundt,
	linux-kernel, Jes Sorensen, Tom Zanussi, Richard J Moore,
	Michel Dagenais, Christoph Hellwig, Greg Kroah-Hartman,
	Thomas Gleixner, William Cohen, ltt-dev, systemtap, Alan Cox,
	Jeremy Fitzhardinge

Hi,

Still thinking about Frank's comment about type checking, I came up with this
type checking scheme where the marker loading infrastructure checks for
consistency of the format string passed as parameter by the probe with the
marker before probe setup.

The probe itself uses the compiler to make sure that its arguments follows the
format string.

Thanks for all the comments,

Mathieu


---BEGIN---

--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -1082,6 +1082,8 @@ config KPROBES
 	  for kernel debugging, non-intrusive instrumentation and testing.
 	  If in doubt, say "N".
 
+source "kernel/Kconfig.marker"
+
 source "ltt/Kconfig"
 
 endmenu
--- /dev/null
+++ b/include/linux/marker.h
@@ -0,0 +1,127 @@
+/*****************************************************************************
+ * marker.h
+ *
+ * Code markup for dynamic and static tracing.
+ *
+ * Example :
+ *
+ * MARK(subsystem_event, "%d %s", someint, somestring);
+ * Where :
+ * - Subsystem is the name of your subsystem.
+ * - event is the name of the event to mark.
+ * - "%d %s" is the formatted string for printk.
+ * - someint is an integer.
+ * - somestring is a char *.
+ * - subsystem_event must be unique thorough the kernel!
+ *
+ * Dynamically overridable function call based on marker mechanism
+ *          from Frank Ch. Eigler <fche@redhat.com>.
+ *
+ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/linkage.h>
+
+#define MARK_KPROBE_PREFIX "__mark_kprobe_"
+#define MARK_CALL_PREFIX "__mark_call_"
+#define MARK_JUMP_SELECT_PREFIX "__mark_jump_select_"
+#define MARK_JUMP_CALL_PREFIX "__mark_jump_call_"
+#define MARK_JUMP_INLINE_PREFIX "__mark_jump_inline_"
+#define MARK_JUMP_OVER_PREFIX "__mark_jump_over_"
+#define MARK_FORMAT_PREFIX "__mark_format_"
+
+#define MARK_MAX_FORMAT_LEN	1024
+
+#ifdef CONFIG_MARK_SYMBOL
+#define MARK_SYM(name) \
+	do { \
+		__label__ here; \
+		here: asm volatile \
+			(MARK_KPROBE_PREFIX#name " = %0" : : "m" (*&&here)); \
+	} while(0)
+#else 
+#define MARK_SYM(name)
+#endif
+
+#ifdef CONFIG_MARK_JUMP_CALL
+#define MARK_JUMP_CALL_PROTOTYPE(name) \
+	static marker_probe_func *__mark_call_##name \
+			asm (MARK_CALL_PREFIX#name) = \
+			__mark_empty_function
+#define MARK_JUMP_CALL(name, format, args...) \
+	do { \
+		preempt_disable(); \
+		(*__mark_call_##name)(format, ## args); \
+		preempt_enable_no_resched(); \
+	} while(0)
+#else
+#define MARK_JUMP_CALL_PROTOTYPE(name)
+#define MARK_JUMP_CALL(name, format, args...)
+#endif
+
+#ifdef CONFIG_MARK_JUMP_INLINE
+#define MARK_JUMP_INLINE(name, format, args...) \
+		(void) (__mark_inline_##name(format, ## args))
+#else
+#define MARK_JUMP_INLINE(name, format, args...)
+#endif
+
+#define MARK_JUMP(name, format, args...) \
+	do { \
+		__label__ over_label, call_label, inline_label; \
+		volatile static void *__mark_jump_select_##name \
+				asm (MARK_JUMP_SELECT_PREFIX#name) = \
+					&&over_label; \
+		volatile static void *__mark_jump_call_##name \
+				asm (MARK_JUMP_CALL_PREFIX#name) \
+				__attribute__((unused)) =  \
+					&&call_label; \
+		volatile static void *__mark_jump_inline_##name \
+				asm (MARK_JUMP_INLINE_PREFIX#name) \
+				__attribute__((unused)) =  \
+					&&inline_label; \
+		volatile static void *__mark_jump_over_##name \
+				asm (MARK_JUMP_OVER_PREFIX#name) \
+				__attribute__((unused)) =  \
+					&&over_label; \
+		static const char *__mark_format_##name \
+				asm (MARK_FORMAT_PREFIX#name) \
+				__attribute__((unused)) = \
+					format; \
+		MARK_JUMP_CALL_PROTOTYPE(name); \
+		goto *__mark_jump_select_##name; \
+call_label: \
+		MARK_JUMP_CALL(name, format, ## args); \
+		goto over_label; \
+inline_label: \
+		MARK_JUMP_INLINE(name, format, ## args); \
+over_label: \
+		do {} while(0); \
+	} while(0)
+
+#define MARK(name, format, args...) \
+	do { \
+		__mark_check_format(format, ## args); \
+		MARK_SYM(name); \
+		MARK_JUMP(name, format, ## args); \
+	} while(0)
+
+enum marker_type { MARKER_CALL, MARKER_INLINE };
+
+typedef asmlinkage void marker_probe_func(const char *fmt, ...);
+
+static inline __attribute__ ((format (printf, 1, 2)))
+void __mark_check_format(const char *fmt, ...)
+{ }
+
+extern marker_probe_func __mark_empty_function;
+
+int marker_set_probe(const char *name, const char *format,
+			marker_probe_func *probe,
+			enum marker_type type);
+
+void marker_disable_probe(const char *name, marker_probe_func *probe,
+			enum marker_type type);
--- /dev/null
+++ b/kernel/Kconfig.marker
@@ -0,0 +1,31 @@
+# Code markers configuration
+
+menu "Marker configuration"
+
+
+config MARK_SYMBOL
+	bool "Replace markers with symbols"
+	default n
+	help
+	  Put symbols in place of markers, useful for kprobe.
+
+config MARK_JUMP_CALL
+	bool "Replace markers with a jump over an inactive function call"
+	default n
+	help
+	  Put a jump over a call in place of markers.
+
+config MARK_JUMP_INLINE
+	bool "Replace markers with a jump over an inline function"
+	default n
+	help
+	  Put a jump over an inline function.
+
+config MARK_JUMP
+	bool "Jump marker probes set/disable infrastructure"
+	select KALLSYMS
+	default n
+	help
+	  Install or remove probes from markers.
+
+endmenu
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -42,6 +42,7 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_RELAY) += relay.o
+obj-$(CONFIG_MARK_JUMP) += marker.o
 
 ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
--- /dev/null
+++ b/kernel/marker.c
@@ -0,0 +1,196 @@
+/*****************************************************************************
+ * marker.c
+ *
+ * Code markup for dynamic and static tracing. Marker control module.
+ *
+ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ *
+ * Design :
+ * kernel/marker.c deals with all marker activation from a centralized,
+ * coherent mechanism. The functions that will be called will simply sit in
+ * modules.
+ *
+ * Before activating a probe, the marker module :
+ * 1 - takes proper locking
+ * 2 - verifies that the function pointer and jmp target are at their default
+ *     values, otherwise the "set" operation fails.
+ * 4 - does function pointer and jump setup.
+ *
+ * Setting them back to disabled is :
+ * 1 - setting back the default jmp and call values
+ * 2 - call synchronize_sched()
+ * 
+ * A probe module must call marker disable on all its markers before module
+ * unload.
+ *
+ * The marker module will also deal with inline jump selection, which is
+ * the same case as presented here, but without the function pointer.
+ */
+
+
+#include <linux/marker.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/string.h>
+
+static DEFINE_SPINLOCK(marker_lock);
+
+struct marker_pointers {
+	void **call;
+	void **jmpselect;
+	void **jmpcall;
+	void **jmpinline;
+	void **jmpover;
+	void **format;
+};
+
+asmlinkage void __mark_empty_function(const char *fmt, ...)
+{
+}
+EXPORT_SYMBOL(__mark_empty_function);
+
+/* Pointers can be used around preemption disabled */
+static int marker_get_pointers(const char *name,
+	struct marker_pointers *ptrs)
+{
+	char call_sym[KSYM_NAME_LEN] = MARK_CALL_PREFIX;
+	char jmpselect_sym[KSYM_NAME_LEN] = MARK_JUMP_SELECT_PREFIX;
+	char jmpcall_sym[KSYM_NAME_LEN] = MARK_JUMP_CALL_PREFIX;
+	char jmpinline_sym[KSYM_NAME_LEN] = MARK_JUMP_INLINE_PREFIX;
+	char jmpover_sym[KSYM_NAME_LEN] = MARK_JUMP_OVER_PREFIX;
+	char format_sym[KSYM_NAME_LEN] = MARK_FORMAT_PREFIX;
+	unsigned int call_sym_len = sizeof(MARK_CALL_PREFIX);
+	unsigned int jmpselect_sym_len = sizeof(MARK_JUMP_SELECT_PREFIX);
+	unsigned int jmpcall_sym_len = sizeof(MARK_JUMP_CALL_PREFIX);
+	unsigned int jmpinline_sym_len = sizeof(MARK_JUMP_INLINE_PREFIX);
+	unsigned int jmpover_sym_len = sizeof(MARK_JUMP_OVER_PREFIX);
+	unsigned int format_sym_len = sizeof(MARK_FORMAT_PREFIX);
+
+	strncat(call_sym, name, KSYM_NAME_LEN-call_sym_len);
+	strncat(jmpselect_sym, name, KSYM_NAME_LEN-jmpselect_sym_len);
+	strncat(jmpcall_sym, name, KSYM_NAME_LEN-jmpcall_sym_len);
+	strncat(jmpinline_sym, name, KSYM_NAME_LEN-jmpinline_sym_len);
+	strncat(jmpover_sym, name, KSYM_NAME_LEN-jmpover_sym_len);
+	strncat(format_sym, name, KSYM_NAME_LEN-format_sym_len);
+
+	ptrs->call = (void**)kallsyms_lookup_name(call_sym);
+	ptrs->jmpselect = (void**)kallsyms_lookup_name(jmpselect_sym);
+	ptrs->jmpcall = (void**)kallsyms_lookup_name(jmpcall_sym);
+	ptrs->jmpinline = (void**)kallsyms_lookup_name(jmpinline_sym);
+	ptrs->jmpover = (void**)kallsyms_lookup_name(jmpover_sym);
+	ptrs->format = (void**)kallsyms_lookup_name(format_sym);
+
+	if (!(ptrs->call && ptrs->jmpselect && ptrs->jmpcall
+		&& ptrs->jmpinline && ptrs->jmpover && ptrs->format)) {
+		return ENOENT;
+	}
+	return 0;
+}
+
+int marker_set_probe(const char *name, const char *format,
+		marker_probe_func *probe,
+		enum marker_type type)
+{
+	int result = 0;
+	struct marker_pointers ptrs;
+
+	spin_lock(&marker_lock);
+	result = marker_get_pointers(name, &ptrs);
+	if (result) {
+		printk(KERN_NOTICE
+			"Unable to find kallsyms for markers in %s\n",
+			name);
+		goto unlock;
+	}
+
+	switch(type) {
+		case MARKER_CALL:
+			if (*ptrs.call != __mark_empty_function) {
+				result = EBUSY;
+				printk(KERN_NOTICE
+					"Probe already installed on "
+					"marker in %s\n",
+					name);
+				goto unlock;
+			}
+			/* Check the types if format provided by probe */
+			if (format && strncmp(format, *ptrs.format,
+					MARK_MAX_FORMAT_LEN)) {
+				result = EPERM;
+				printk(KERN_NOTICE
+					"Format mismatch for probe %s "
+					"(%s), marker (%s)\n",
+					name,
+					format,
+					(const char*)*ptrs.format);
+				goto unlock;
+			}
+			/* Setup the call pointer */
+			*ptrs.call = probe;
+			/* Setup the jump */
+			*ptrs.jmpselect = *ptrs.jmpcall;
+			break;
+		case MARKER_INLINE:
+			if (*ptrs.jmpover == *ptrs.jmpinline) {
+				result = ENODEV;
+				printk(KERN_NOTICE
+					"No inline probe exists "
+					"for marker in %s\n",
+					name);
+				goto unlock;
+			}
+			/* Setup the call pointer */
+			*ptrs.call = __mark_empty_function;
+			/* Setup the jump */
+			*ptrs.jmpselect = *ptrs.jmpinline;
+			break;
+		default:
+			result = ENOENT;
+			printk(KERN_ERR
+				"Unknown marker type\n");
+			break;
+	}
+unlock:
+	spin_unlock(&marker_lock);
+	return result;
+}
+EXPORT_SYMBOL_GPL(marker_set_probe);
+
+void marker_disable_probe(const char *name, marker_probe_func *probe,
+		enum marker_type type)
+{
+	int result = 0;
+	struct marker_pointers ptrs;
+
+	spin_lock(&marker_lock);
+	result = marker_get_pointers(name, &ptrs);
+	if (result)
+		goto unlock;
+
+	switch(type) {
+		case MARKER_CALL:
+			if (*ptrs.call == probe) {
+				*ptrs.jmpselect = *ptrs.jmpover;
+				*ptrs.call = __mark_empty_function;
+			}
+			break;
+		case MARKER_INLINE:
+			if (*ptrs.jmpselect == *ptrs.jmpinline)
+				*ptrs.jmpselect = *ptrs.jmpover;
+			break;
+		default:
+			result = ENOENT;
+			printk(KERN_ERR
+				"Unknown marker type\n");
+			break;
+	}
+unlock:
+	spin_unlock(&marker_lock);
+	if (!result && type == MARKER_CALL)
+		synchronize_sched();
+}
+EXPORT_SYMBOL_GPL(marker_disable_probe);
---END---

---BEGIN---

/* probe.c
 *
 * Loads a function at a marker call site.
 *
 * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
 *
 * This file is released under the GPLv2.
 * See the file COPYING for more details.
 */

#include <linux/marker.h>
#include <linux/module.h>
#include <linux/kallsyms.h>

/* function to install */
#define DO_MARK1_FORMAT "%d"
asmlinkage void do_mark1(const char *format, int value)
{
	__mark_check_format(DO_MARK1_FORMAT, value);
	printk("value is %d\n", value);
}

int init_module(void)
{
	return marker_set_probe("subsys_mark1", DO_MARK1_FORMAT,
			(marker_probe_func*)do_mark1,
			MARKER_CALL);
}

void cleanup_module(void)
{
	marker_disable_probe("subsys_mark1", (marker_probe_func*)do_mark1,
		MARKER_CALL);
}

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Mathieu Desnoyers");
MODULE_DESCRIPTION("Probe");

---END---


---BEGIN---

/* test-mark.c
 *
 */

#include <linux/marker.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/sched.h>

int x=7;

struct proc_dir_entry *pentry = NULL;

static int my_open(struct inode *inode, struct file *file)
{
	MARK(subsys_mark1, "%d", 1);
	MARK(subsys_mark2, "%d %s", 2, "blah2");
	MARK(subsys_mark3, "%d %s", x, "blah3");

	return -EPERM;
}


static struct file_operations my_operations = {
	.open = my_open,
};

int init_module(void)
{
	pentry = create_proc_entry("testmark", 0444, NULL);
	if (pentry)
		pentry->proc_fops = &my_operations;
	return 0;
}

void cleanup_module(void)
{
	remove_proc_entry("testmark", NULL);
}

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Mathieu Desnoyers");
MODULE_DESCRIPTION("Marker Test");

---END---


OpenPGP public key:              http://krystal.dyndns.org:8080/key/compudj.gpg
Key fingerprint:     8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Linux Kernel Markers 0.7 for 2.6.17 (with type checking!)
       [not found] ` <451331A1.3020601@goop.org>
@ 2006-09-22  2:11   ` Mathieu Desnoyers
       [not found]     ` <45134539.7070305@goop.org>
  0 siblings, 1 reply; 6+ messages in thread
From: Mathieu Desnoyers @ 2006-09-22  2:11 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Martin Bligh, Frank Ch. Eigler, Masami Hiramatsu, prasanna,
	Andrew Morton, Ingo Molnar, Paul Mundt, linux-kernel,
	Jes Sorensen, Tom Zanussi, Richard J Moore, Michel Dagenais,
	Christoph Hellwig, Greg Kroah-Hartman, Thomas Gleixner,
	William Cohen, ltt-dev, systemtap, Alan Cox

* Jeremy Fitzhardinge (jeremy@goop.org) wrote:
> Mathieu Desnoyers wrote:
> >+#ifdef CONFIG_MARK_SYMBOL
> >+#define MARK_SYM(name) \
> >+	do { \
> >+		__label__ here; \
> >+		here: asm volatile \
> >+			(MARK_KPROBE_PREFIX#name " = %0" : : "m" (*&&here)); 
> >\
> >+	} while(0)
> >+#else 
> >+#define MARK_SYM(name)
> >+#endif
> 
> BTW, this won't work if you put the MARK_SYM in a loop which gcc 
> unrolls; you'll only get the mark in the last unrolled iteration 
> (because the symbol assignments will override each other).
> 
> Do make this work properly, you really need to put the mark entries into 
> a separate section, so that if gcc duplicates the code, you get 
> duplicated markers too.
> 

Good point, I will change it to :

#define MARK_SYM(name) \
        do { \
                __label__ here; \
                volatile static void *__mark_kprobe_##name \
                        asm (MARK_CALL_PREFIX#name) \
                        __attribute__((unused)) = &&here; \
here: \
                do { } while(0); \
        } while(0)

Which fixes the problem. Some tests showed me that the compiler does not unroll
an otherwise unrolled loop when this specific macro is called. (test done with
-funroll-all-loops).

Regards,

Mathieu


OpenPGP public key:              http://krystal.dyndns.org:8080/key/compudj.gpg
Key fingerprint:     8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Linux Kernel Markers 0.7 for 2.6.17 (with type checking!)
       [not found]     ` <45134539.7070305@goop.org>
@ 2006-09-22  2:24       ` Mathieu Desnoyers
       [not found]         ` <45135FA0.1030403@goop.org>
  2006-09-22  2:49       ` Mathieu Desnoyers
  1 sibling, 1 reply; 6+ messages in thread
From: Mathieu Desnoyers @ 2006-09-22  2:24 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Martin Bligh, Frank Ch. Eigler, Masami Hiramatsu, prasanna,
	Andrew Morton, Ingo Molnar, Paul Mundt, linux-kernel,
	Jes Sorensen, Tom Zanussi, Richard J Moore, Michel Dagenais,
	Christoph Hellwig, Greg Kroah-Hartman, Thomas Gleixner,
	William Cohen, ltt-dev, systemtap, Alan Cox

* Jeremy Fitzhardinge (jeremy@goop.org) wrote:
> Mathieu Desnoyers wrote:
> >#define MARK_SYM(name) \
> >        do { \
> >                __label__ here; \
> >                volatile static void *__mark_kprobe_##name \
> >                        asm (MARK_CALL_PREFIX#name) \
> >                        __attribute__((unused)) = &&here; \
> >here: \
> >                do { } while(0); \
> >        } while(0)
> >
> >Which fixes the problem. Some tests showed me that the compiler does not 
> >unroll
> >an otherwise unrolled loop when this specific macro is called. (test done 
> >with
> >-funroll-all-loops).
> 
> Eh?  I thought you wanted to avoid changing the generated code?  
> Inhibiting loop unrolling could be a pretty large change...
> 

Yes, if possible. But letting gcc duplicate those symbols brings many questions,
such as : how can we name each of them differently ? Is there any way to
automatically increment an "identifier" counter in assembly ?

Mathieu

OpenPGP public key:              http://krystal.dyndns.org:8080/key/compudj.gpg
Key fingerprint:     8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Linux Kernel Markers 0.7 for 2.6.17 (with type checking!)
       [not found]     ` <45134539.7070305@goop.org>
  2006-09-22  2:24       ` Mathieu Desnoyers
@ 2006-09-22  2:49       ` Mathieu Desnoyers
  1 sibling, 0 replies; 6+ messages in thread
From: Mathieu Desnoyers @ 2006-09-22  2:49 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Martin Bligh, Frank Ch. Eigler, Masami Hiramatsu, prasanna,
	Andrew Morton, Ingo Molnar, Paul Mundt, linux-kernel,
	Jes Sorensen, Tom Zanussi, Richard J Moore, Michel Dagenais,
	Christoph Hellwig, Greg Kroah-Hartman, Thomas Gleixner,
	William Cohen, ltt-dev, systemtap, Alan Cox

* Jeremy Fitzhardinge (jeremy@goop.org) wrote:
> Mathieu Desnoyers wrote:
> >#define MARK_SYM(name) \
> >        do { \
> >                __label__ here; \
> >                volatile static void *__mark_kprobe_##name \
> >                        asm (MARK_CALL_PREFIX#name) \
> >                        __attribute__((unused)) = &&here; \
> >here: \
> >                do { } while(0); \
> >        } while(0)
> >
> >Which fixes the problem. Some tests showed me that the compiler does not 
> >unroll
> >an otherwise unrolled loop when this specific macro is called. (test done 
> >with
> >-funroll-all-loops).
> 
> Eh?  I thought you wanted to avoid changing the generated code?  
> Inhibiting loop unrolling could be a pretty large change...
> 

From what I see in 2.6.17/2.6.18 makefiles, only -OS and -O2 are generally 
used by the build system (no -O3) and there is not use of -funroll-loops. I
guess it must not be so useful in this context.

Mathieu


OpenPGP public key:              http://krystal.dyndns.org:8080/key/compudj.gpg
Key fingerprint:     8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Linux Kernel Markers 0.7 for 2.6.17 (with type checking!)
       [not found]         ` <45135FA0.1030403@goop.org>
@ 2006-09-22 15:38           ` Mathieu Desnoyers
  0 siblings, 0 replies; 6+ messages in thread
From: Mathieu Desnoyers @ 2006-09-22 15:38 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Martin Bligh, Frank Ch. Eigler, Masami Hiramatsu, prasanna,
	Andrew Morton, Ingo Molnar, Paul Mundt, linux-kernel,
	Jes Sorensen, Tom Zanussi, Richard J Moore, Michel Dagenais,
	Christoph Hellwig, Greg Kroah-Hartman, Thomas Gleixner,
	William Cohen, ltt-dev, systemtap, Alan Cox

* Jeremy Fitzhardinge (jeremy@goop.org) wrote:
> Mathieu Desnoyers wrote:
> >* Jeremy Fitzhardinge (jeremy@goop.org) wrote:
> >  
> >>Mathieu Desnoyers wrote:
> >>    
> >>>#define MARK_SYM(name) \
> >>>       do { \
> >>>               __label__ here; \
> >>>               volatile static void *__mark_kprobe_##name \
> >>>                       asm (MARK_CALL_PREFIX#name) \
> >>>                       __attribute__((unused)) = &&here; \
> >>>here: \
> >>>               do { } while(0); \
> >>>       } while(0)
> >>>
> >>>Which fixes the problem. Some tests showed me that the compiler does not 
> >>>unroll
> >>>an otherwise unrolled loop when this specific macro is called. (test 
> >>>done with
> >>>-funroll-all-loops).
> >>>      
> >>Eh?  I thought you wanted to avoid changing the generated code?  
> >>Inhibiting loop unrolling could be a pretty large change...
> >>
> >>    
> >
> >Yes, if possible. But letting gcc duplicate those symbols brings many 
> >questions,
> >such as : how can we name each of them differently ? Is there any way to
> >automatically increment an "identifier" counter in assembly ?
> 
> Use a section instead:
> 
> struct marker {
> 	const char *name;
> 	const void *location;
> };
> 
> #define MARKER_SYM(name)
> 	do {
> 		__label__ here;
> 	here:	asm volatile(".section \".markers\"; .long %0, %1; 
> 	.previous" : : "m" (#name), "m" (*&&here));\
> 	} while(0);
> 
> Not a linker symbol, but it does let you find all the places containing 
> a particular mark.
> 

Very clever idea, as it lessens the impact on the compiler optimisations. Any
ideas about how we could fit in a list of "read" memory constraints based on a
vargs list in the macro ?

Mathieu

OpenPGP public key:              http://krystal.dyndns.org:8080/key/compudj.gpg
Key fingerprint:     8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: Proof of concept:  Logdev with "almost-non" intrusive markers.
       [not found] ` <1160189237.21768.47.camel@localhost.localdomain>
@ 2006-10-10 14:21   ` Mathieu Desnoyers
  0 siblings, 0 replies; 6+ messages in thread
From: Mathieu Desnoyers @ 2006-10-10 14:21 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: LKML, Martin Bligh, Frank Ch. Eigler, Masami Hiramatsu, prasanna,
	Andrew Morton, Ingo Molnar, Paul Mundt, Jes Sorensen,
	Tom Zanussi, Richard J Moore, Michel Dagenais, Christoph Hellwig,
	Greg Kroah-Hartman, Thomas Gleixner, William Cohen, ltt-dev,
	systemtap, Alan Cox, Jeremy Fitzhardinge

Hi Steven,

Those are great ideas! Kernel compile-time type checking is interesting, but has
the following disadvantages (compared to Linux Kernel Markers 0.20
  http://sources.redhat.com/ml/systemtap/2006-q3/msg00794.html) :

- It requires to keep a system-wide header file with all the marker functions
  prototypes around. When a change is done in the code, the header must match.
  My goal being to provide a self-describing, one liner marker, such system-wide
  header file makes me uncomfortable.
- Knowing the number of parameters (LD_MARK[1-4]) instead of using variable
  arguments (MARK) adds the ability to use typeof() on the parameters, which is
  clearly great. On the downside, it multiplies the number of macros and limits
  the number of parameters that can be passed (I guess we will never do an
  LD_MARK10). Wether
    MARK4(label, args1, arg2, arg3, arg4)
  or
    MARK(label, format, arg1, arg2, arg3, arg4)
  is better is a matter of visual impact and level of self-description of the
  probe. While I personally prefer the format string because of its
  flexibility, it could be useful to have other insight about which is the
  less visually hurting approach.

Regards,

Mathieu


* Steven Rostedt (rostedt@goodmis.org) wrote:
> OK, I've been catching up a little on the threads, but I'm still behind
> (4036 unread in LKML folder). But I wanted to show this proof of
> concept, just to get some more ideas rolling around.
> 
> Yesterday, I posted a way to save a location and register of a contained
> local variable.  Today I've implemented it in Logdev.  So my markers are
> almost non intrusive. The only thing that the markers do, is force gcc
> to put the wanted variables into a register at a certain point. 
> 
> Jeremy mentioned a way to also store the values in memory, but this is
> just a proof of concept, and further ideas can be built upon it.
> 
> 
> My logdev patch and tool set can be downloaded at
> http://rostedt.homelinux.com/logdev
> 
> The topic of this email is not about logdev, but about the markers.  So
> my patch on top of logdev is included here (also available for download
> at the mentioned url).
> 
> What did I do?
> 
> I've added 5 markers.
>   LD_MARK(label)
>   LD_MARK1(label,arg1)
>   LD_MARK2(label,arg1,arg2)
>   LD_MARK3(label,arg1,arg2,arg3)
>   LD_MARK4(label,arg1,arg2,arg3,arg4)
> 
> Here's an example of LD_MARK2: (defined in my patch at
> include/asm-i386/logdev_marker.h)
> 
> #define LD_MARK2(label, arg1, arg2)					\
> 	{								\
> 		extern void __logdev_caller__ ## label(typeof(arg1),	\
> 						       typeof(arg2));	\
> 		asm("1:"						\
> 		    ".section .__logdev_markers,\"a\"\n"		\
> 		    ".long 1b," LD_STR(__logdev_caller__ ## label) "\n"	\
> 		    ".long 2\n"						\
> 		    "xorl %0, %0\n"					\
> 		    ".short 0\n"					\
> 		    "xorl %1, %1\n"					\
> 		    ".short 0\n"					\
> 		    ".previous"						\
> 		    : :							\
> 		    "r"(arg1), "r"(arg2));				\
> 	}
> 
> 
> 
> So basically in the elf section __logdev_markers  I have dynamic records
> that are sizeof(long) defined.  The records would look like this.
> 
> struct {
> 	unsigned long probe_address;
> 	unsigned long function_to_call;
> 	unsigned long num_of_args;
> 	unsigned long regs[0];
> };
> 
> My logdev program reads these in to a descriptor, and puts them into a
> link list.  It calls an arch specific function to translate what was in
> the regs[] record to some id that can be used later to find what
> register the argument is used in.
> 
> 
> Now note in the macro:
> 
> 	extern void __logdev_caller__ ## label(typeof(arg1),	\
> 					       typeof(arg2));	\
> 
> This forces strict type checking of the tracer to the function that is
> called.  How is that done?  Well in include/linux/logdev_marker.h I have
> all my tracing prototypes declared. If it doesn't match the marker, then
> I get a compile time error.
> 
> In linux/logdev_marker.h:
> 
> #define LDCALLER(caller) __logdev_caller__ ## caller
> 
> void LDCALLER(context_switch) (struct task_struct *prev,
> 			       struct task_struct *next);
> void LDCALLER(mm_fault) (struct mm_struct *mm, struct vm_area_struct *vma,
> 			 unsigned long address, int write_access);
> 
> 
> Here you see two trace functions. The markers I have for these is in
> context_switch and __handle_mm_fault respectively.
> 
> in kernel/sched.c:
> 
> static inline struct task_struct *
> context_switch(struct rq *rq, struct task_struct *prev,
> 	       struct task_struct *next)
> {
> 	struct mm_struct *mm = next->mm;
> 	struct mm_struct *oldmm = prev->active_mm;
> 
> 	LD_MARK2(context_switch, prev, next);
> 
> 
> in mm/memory.c:
> 
> int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
> 		unsigned long address, int write_access)
> {
> 	pgd_t *pgd;
> 	pud_t *pud;
> 	pmd_t *pmd;
> 	pte_t *pte;
> 
> 	LD_MARK4(mm_fault, mm, vma, address, write_access);
> 
> 
> If the prototype doesn't match, then there's a compile time error.  That
> header file is also included in the same place that the trace functions
> are defined, so the match must be true there too.
> 
> These examples are just proof in concept.  The context_switch one is
> very hard to get kprobes to see prev and next, because context_switch is
> an static inline, and every modification of sched.c may make gcc
> optimize what it does with prev and next differently.  They are not
> passed as parameters.
> 
> The __handle_mm_fault can easily be done by jprobes, but I was testing
> my LD_MARK4 with it.
> 
> So what this proof-of-conecpt is doing, is showing a way to use kprobes
> with little to no performance hit to the binary, when tracing is off.
> We save on the L1 cache and barely change the optimization that gcc
> does.
> 
> This also shows a way to strongly type check the parameters used between
> the markers and the tracers that are using the markers.
> 
> What this proof-of-concept does *not* do, is solve the issues that LTTng
> are trying to solve, (as well as dprobes).   This only works with
> kprobes that are currently in the kernel, or basically any other "int 3"
> type method.
> 
> But this was done to get ideas out in the open, and perhaps this will
> trigger an idea in those that are much brighter than I, and we will have
> some utopia of a solution :-)
> 
> -------------
> 
> End of proof-of-concept
> 
> For those interested, here's a little description of what I've done with
> Logdev and the markers.
> 
> my logdev device has a /debugfs/logdev/marker file that shows what's
> been registered:
> 
> # cat /debugfs/logdev/marker
> probe address:  c0154599
> func address:   c0260ae3
> args:           4
> arg 0 reg:      eax
> arg 1 reg:      edx
> arg 2 reg:      ecx
> arg 3 reg:      ebx
> 
> probe address:  c02f8aa5
> func address:   c0260928
> args:           2
> arg 0 reg:      eax
> arg 1 reg:      edx
> 
> 
> Here we see that c0154599 is the address of where the marker is in
> __handle_mm_fault. Also the address of the function is shown, and number
> of args.  We also see what registers are used.
> 
> 
> To turn on the probes, an ioctl is called to that same file.  The logdev
> tool "logmark" can do this for you.  "logmark 1" turns on the probes,
> and "logmark 0" turns them off.
> 
> logread still shows the output of what is done:
> 
> # ./logread
> [...]
> [  367.829418] cpu:1 gnome-settings-:3858(115:120:115) -->> gnome-terminal:3875(116:120:116)
> [  367.829429] cpu:0 swapper:0(140:120:140) -->> logread:4323(116:120:116)
> [  367.829740] cpu:0 logread:4323  mm=0xf0f12200 vma=0xf293a41c  address=8065044  write_access=1
> [  367.830089] cpu:0 logread:4323(116:120:116) -->> swapper:0(140:120:140)
> [  367.835167] cpu:0 swapper:0(140:120:140) -->> logread:4323(115:120:115)
> [  367.835476] cpu:0 logread:4323  mm=0xf0f12200 vma=0xf293a41c  address=8066004  write_access=1
> 
> 
> Well if anyone wants to play with this.  The code is (once again) at
> http://rostedt.homelinux.com/logdev.  You will need to apply the two
> patches for 2.6.18:
> 
> http://rostedt.homelinux.com/logdev/logdev-0.5.3-2.6.18.patch
> http://rostedt.homelinux.com/logdev/logdev-markers-0.5.3-2.6.18.patch
> 
> and the tools are here
> 
> http://rostedt.homelinux.com/logdev/logdev-tools-0.5.3-1.tar.bz2
> 
> You need to build them yourself.
> 
> Currently, logdev-markers only works for i386. But it would be really
> easy to port it to any other arch that already has kprobes.
> 
> 
> Here's the marker patch:
> 
> Index: linux-2.6.18/include/asm-i386/logdev_marker.h
> ===================================================================
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6.18/include/asm-i386/logdev_marker.h	2006-10-06 21:21:37.000000000 -0400
> @@ -0,0 +1,182 @@
> +/*
> + * logdev_marker.h
> + *
> + * Copyright - 2006 - Steven Rostedt, Red Hat Inc, (srostedt at redhat dot com)
> + */
> +#ifndef _ASM_LOGDEV_MARKER_H
> +#define _ASM_LOGDEV_MARKER_H
> +
> +
> +/*
> + * eax = 0
> + * ebx = 3
> + * ecx = 1
> + * edx = 2
> + * edi = 7
> + * ebp = 5
> + * esp = 4
> + * esi = 6
> + */
> +enum {
> +	LD_REGA = 0,
> +	LD_REGB = 3,
> +	LD_REGC = 1,
> +	LD_REGD = 2,
> +	LD_REGDI = 7,
> +	LD_REGBP = 5,
> +	LD_REGSP = 4,
> +	LD_REGSI = 6
> +};
> +
> +static inline int logdev_mark_get_reg(unsigned long op)
> +{
> +	/*
> +	 * Strip out the register:
> +	 */
> +	return (op >> 8) & 0x7;
> +}
> +
> +static inline unsigned long
> +logdev_mark_get_reg_content(int reg, struct pt_regs *regs)
> +{
> +	static int once;
> +
> +	switch (reg) {
> +	case LD_REGA:
> +		return regs->eax;
> +	case LD_REGB:
> +		return regs->ebx;
> +	case LD_REGC:
> +		return regs->ecx;
> +	case LD_REGD:
> +		return regs->edx;
> +	case LD_REGDI:
> +		return regs->edi;
> +	case LD_REGBP:
> +		return regs->ebp;
> +	case LD_REGSP:
> +		return regs->esp;
> +	case LD_REGSI:
> +		return regs->esi;
> +	default:
> +		if (!once) {
> +			printk("unknown reg type %d\n", reg);
> +			once = 1;
> +		}
> +	}
> +	return 0;
> +}
> +
> +
> +static inline const char *logdev_reg_to_name(int reg)
> +{
> +	switch (reg) {
> +	case LD_REGA:
> +		return "eax";
> +	case LD_REGB:
> +		return "ebx";
> +	case LD_REGC:
> +		return "ecx";
> +	case LD_REGD:
> +		return "edx";
> +	case LD_REGDI:
> +		return "edi";
> +	case LD_REGBP:
> +		return "ebp";
> +	case LD_REGSP:
> +		return "esp";
> +	case LD_REGSI:
> +		return "esi";
> +	}
> +	return "unknown reg!";
> +}
> +
> +#define _LD_STR(x) #x
> +#define LD_STR(x) _LD_STR(x)
> +
> +#define LD_MARK(label)							\
> +	{								\
> +		extern void __logdev_caller__ ## label(void);		\
> +		asm("1:"						\
> +		    ".section .__logdev_markers,\"a\"\n"		\
> +		    ".long 1b," LD_STR(__logdev_caller__ ## label) "\n"	\
> +		    ".long 0\n"						\
> +		    ".previous"						\
> +		    : :	);						\
> +	}
> +
> +#define LD_MARK1(label, arg1)						\
> +	{								\
> +		extern void __logdev_caller__ ## label(typeof(arg1));	\
> +		asm("1:"						\
> +		    ".section .__logdev_markers,\"a\"\n"		\
> +		    ".long 1b," LD_STR(__logdev_caller__ ## label) "\n"	\
> +		    ".long 1\n"						\
> +		    "xorl %0, %0\n"					\
> +		    ".short 0\n"					\
> +		    ".previous"						\
> +		    : :							\
> +		    "r"(arg1));						\
> +	}
> +
> +#define LD_MARK2(label, arg1, arg2)					\
> +	{								\
> +		extern void __logdev_caller__ ## label(typeof(arg1),	\
> +						       typeof(arg2));	\
> +		asm("1:"						\
> +		    ".section .__logdev_markers,\"a\"\n"		\
> +		    ".long 1b," LD_STR(__logdev_caller__ ## label) "\n"	\
> +		    ".long 2\n"						\
> +		    "xorl %0, %0\n"					\
> +		    ".short 0\n"					\
> +		    "xorl %1, %1\n"					\
> +		    ".short 0\n"					\
> +		    ".previous"						\
> +		    : :							\
> +		    "r"(arg1), "r"(arg2));				\
> +	}
> +
> +#define LD_MARK3(label, arg1, arg2, arg3)				\
> +	{								\
> +		extern void __logdev_caller__ ## label(typeof(arg1),	\
> +						       typeof(arg2),	\
> +						       typeof(arg3));	\
> +		asm("1:"						\
> +		    ".section .__logdev_markers,\"a\"\n"		\
> +		    ".long 1b," LD_STR(__logdev_caller__ ## label) "\n"	\
> +		    ".long 3\n"						\
> +		    "xorl %0, %0\n"					\
> +		    ".short 0\n"					\
> +		    "xorl %1, %1\n"					\
> +		    ".short 0\n"					\
> +		    "xorl %2, %2\n"					\
> +		    ".short 0\n"					\
> +		    ".previous"						\
> +		    : :							\
> +		    "r"(arg1), "r"(arg2), "r"(arg3));			\
> +	}
> +
> +#define LD_MARK4(label, arg1, arg2, arg3, arg4)				\
> +	{								\
> +		extern void __logdev_caller__ ## label(typeof(arg1),	\
> +						       typeof(arg2),	\
> +						       typeof(arg3),	\
> +						       typeof(arg4));	\
> +		asm("1:"						\
> +		    ".section .__logdev_markers,\"a\"\n"		\
> +		    ".long 1b," LD_STR(__logdev_caller__ ## label) "\n"	\
> +		    ".long 4\n"						\
> +		    "xorl %0, %0\n"					\
> +		    ".short 0\n"					\
> +		    "xorl %1, %1\n"					\
> +		    ".short 0\n"					\
> +		    "xorl %2, %2\n"					\
> +		    ".short 0\n"					\
> +		    "xorl %3, %3\n"					\
> +		    ".short 0\n"					\
> +		    ".previous"						\
> +		    : :							\
> +		    "r"(arg1), "r"(arg2), "r"(arg3), "r"(arg4));	\
> +	}
> +
> +#endif /* _ASM_LOGDEV_MARKER_H */
> Index: linux-2.6.18/include/linux/logdev_marker.h
> ===================================================================
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6.18/include/linux/logdev_marker.h	2006-10-06 21:08:40.000000000 -0400
> @@ -0,0 +1,30 @@
> +/*
> + * logdev_marker.h
> + *
> + * Copyright - 2006 - Steven Rostedt, Red Hat Inc, (srostedt at redhat dot com)
> + */
> +#ifndef _LINUX_LOGDEV_MARKER_H
> +#define _LINUX_LOGDEV_MARKER_H
> +
> +#ifdef CONFIG_LOGDEV_MARKER
> +
> +#include <asm/logdev_marker.h>
> +
> +#define LDCALLER(caller) __logdev_caller__ ## caller
> +
> +void LDCALLER(context_switch) (struct task_struct *prev,
> +			       struct task_struct *next);
> +void LDCALLER(mm_fault) (struct mm_struct *mm, struct vm_area_struct *vma,
> +			 unsigned long address, int write_access);
> +
> +#else
> +
> +#define LD_MARK(label)				do { } while(0)
> +#define LD_MARK1(label,arg1)			do { } while(0)
> +#define LD_MARK2(label,arg1,arg2)		do { } while(0)
> +#define LD_MARK3(label,arg1,arg2,arg3)		do { } while(0)
> +#define LD_MARK4(label,arg1,arg2,arg3,arg4)	do { } while(0)
> +
> +#endif /* CONFIG_LOGDEV_MARKER */
> +
> +#endif /* _LINUX_LOGDEV_MARKER_H */
> Index: linux-2.6.18/kernel/sched.c
> ===================================================================
> --- linux-2.6.18.orig/kernel/sched.c	2006-10-06 17:49:36.000000000 -0400
> +++ linux-2.6.18/kernel/sched.c	2006-10-06 21:05:44.000000000 -0400
> @@ -53,7 +53,7 @@
>  #include <linux/kprobes.h>
>  #include <linux/delayacct.h>
>  #include <asm/tlb.h>
> -
> +#include <linux/logdev_marker.h>
>  #include <linux/logdev.h>
>  
>  #include <asm/unistd.h>
> @@ -1806,6 +1806,8 @@ context_switch(struct rq *rq, struct tas
>  	struct mm_struct *mm = next->mm;
>  	struct mm_struct *oldmm = prev->active_mm;
>  
> +	LD_MARK2(context_switch, prev, next);
> +
>  	if (unlikely(!mm)) {
>  		next->active_mm = oldmm;
>  		atomic_inc(&oldmm->mm_count);
> Index: linux-2.6.18/arch/i386/Kconfig.debug
> ===================================================================
> --- linux-2.6.18.orig/arch/i386/Kconfig.debug	2006-10-06 18:25:05.000000000 -0400
> +++ linux-2.6.18/arch/i386/Kconfig.debug	2006-10-06 20:59:41.000000000 -0400
> @@ -76,6 +76,11 @@ config X86_MPPARSE
>  	depends on X86_LOCAL_APIC && !X86_VISWS
>  	default y
>  
> +config LOGDEV_MARKER
> +	bool
> +	depends on LOGDEV && KPROBES
> +	default y
> +
>  config DOUBLEFAULT
>  	default y
>  	bool "Enable doublefault exception handler" if EMBEDDED
> Index: linux-2.6.18/arch/i386/kernel/vmlinux.lds.S
> ===================================================================
> --- linux-2.6.18.orig/arch/i386/kernel/vmlinux.lds.S	2006-10-06 18:20:17.000000000 -0400
> +++ linux-2.6.18/arch/i386/kernel/vmlinux.lds.S	2006-10-06 18:24:13.000000000 -0400
> @@ -44,6 +44,15 @@ SECTIONS
>    }
>    __tracedata_end = .;
>  
> +#ifdef CONFIG_LOGDEV_MARKER
> +  . = ALIGN(4);
> +  __logdev_marker_start = .;
> +  .__logdev_markers : AT(ADDR(.__logdev_markers) - LOAD_OFFSET) {
> +	*(.__logdev_markers)
> +  }
> +  __logdev_marker_end = .;
> +#endif
> +
>    /* writeable */
>    .data : AT(ADDR(.data) - LOAD_OFFSET) {	/* Data */
>  	*(.data)
> Index: linux-2.6.18/drivers/char/Makefile
> ===================================================================
> --- linux-2.6.18.orig/drivers/char/Makefile	2006-10-06 18:26:16.000000000 -0400
> +++ linux-2.6.18/drivers/char/Makefile	2006-10-06 18:26:46.000000000 -0400
> @@ -100,6 +100,7 @@ obj-$(CONFIG_LOGDEV)		+= logdev.o
>  obj-$(CONFIG_LOGDEV_PROBE)	+= logdev_probe.o
>  obj-$(CONFIG_LOGDEV_RINGBUF)	+= logdev_ringbuf.o
>  obj-$(CONFIG_LOGDEV_RELAY)	+= logdev_relay.o
> +obj-$(CONFIG_LOGDEV_MARKER)	+= logdev_marker.o
>  
>  obj-$(CONFIG_HANGCHECK_TIMER)	+= hangcheck-timer.o
>  obj-$(CONFIG_TCG_TPM)		+= tpm/
> Index: linux-2.6.18/drivers/char/logdev_marker.c
> ===================================================================
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6.18/drivers/char/logdev_marker.c	2006-10-06 21:45:40.000000000 -0400
> @@ -0,0 +1,497 @@
> +/*
> + * logdev_marker.c
> + *
> + * Copyright (C) 2006 Steven Rostedt <steven.rostedt@kihontech.com>
> + *
> + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; version 2 of the License (not later!)
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> + *
> + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> + */
> +#include <linux/init.h>
> +#include <linux/module.h>
> +#include <linux/spinlock.h>
> +#include <linux/logdev.h>
> +#include <linux/uaccess.h>
> +#include <linux/seq_file.h>
> +#include <linux/debugfs.h>
> +#include <linux/kprobes.h>
> +#include <linux/ptrace.h>
> +#include <linux/kallsyms.h>
> +#include <linux/list.h>
> +#include <linux/logdev_marker.h>
> +
> +#include <asm/atomic.h>
> +
> +#include "logdev_priv.h"
> +
> +#define MAX_ARGS 4
> +
> +#undef DPRINTK
> +#if 1
> +#  define DPRINTK(x...) printk(x)
> +#else
> +#  define DPRINTK(x...) do { } while(0)
> +#endif
> +
> +static LIST_HEAD(logdev_probes);
> +
> +typedef void (*logdev_func0_t)(void);
> +typedef void (*logdev_func1_t)(unsigned long);
> +typedef void (*logdev_func2_t)(unsigned long, unsigned long);
> +typedef void (*logdev_func3_t)(unsigned long, unsigned long,
> +			       unsigned long);
> +typedef void (*logdev_func4_t)(unsigned long, unsigned long,
> +			       unsigned long, unsigned long);
> +
> +union logdev_func {
> +	logdev_func0_t func0;
> +	logdev_func1_t func1;
> +	logdev_func2_t func2;
> +	logdev_func3_t func3;
> +	logdev_func4_t func4;
> +};
> +
> +struct logdev_mark_probe {
> +	struct list_head list;
> +	struct kprobe kp;
> +	union logdev_func func;
> +	unsigned long address;
> +	int args;
> +	int arg_regs[MAX_ARGS];
> +};
> +
> +/* ---------------- cut here for user space headers -------------------- */
> +
> +#define LOGMARK_IOCTL_BASE 'm'
> +
> +#define MARK_IO(nr) _IO(LOGMARK_IOCTL_BASE, nr)
> +#define MARK_IOW(nr, type) _IOW(LOGMARK_IOCTL_BASE, nr, type)
> +
> +#define LOGMARK_START  MARK_IO(0)
> +#define LOGMARK_STOP   MARK_IO(1)
> +
> +#define LOGMARK_ID_MARK		0x56580000
> +
> +#define LOGMARK_CNTXTSW		0x56580001
> +#define LOGMARK_MM_FAULT	0x56580002
> +
> +struct logdev_mark_hdr {
> +	unsigned long long t;
> +	int id;
> +};
> +
> +struct logdev_mark_switch {
> +	struct logdev_mark_hdr hdr;
> +	short pid_prev;
> +	short pid_next;
> +	int prev_prio;
> +	int prev_static_prio;
> +	int prev_normal_prio;
> +	int prev_state;
> +	int next_prio;
> +	int next_static_prio;
> +	int next_normal_prio;
> +	char prev_comm[TASK_COMM_LEN];
> +	char next_comm[TASK_COMM_LEN];
> +};
> +
> +struct logdev_mark_mm_fault {
> +	struct logdev_mark_hdr hdr;
> +	short pid;
> +	char comm[TASK_COMM_LEN];
> +	struct mm_struct *mm;
> +	struct vm_area_struct *vma;
> +	unsigned long address;
> +	int write_access;
> +};
> +
> +
> +/* ---------------- end of user space header cut ---------------- */
> +
> +struct logdev_probe_hdr {
> +	unsigned long addr;
> +	unsigned long offset;
> +	int func_symbol_size;
> +	char func_symbol[KSYM_NAME_LEN+1];
> +};
> +
> +
> +/* TODO, put this in logdev.c so I don't keep copying it */
> +static void logdev_print_time_cpu(unsigned long long t, int cpu)
> +{
> +	unsigned long usec_rem;
> +	unsigned long secs;
> +
> +	usec_rem = do_div(t, 1000000000)/1000;
> +	secs = (unsigned long)t;
> +
> +	printk("[%5lu.%06lu] cpu:%d ",
> +	       secs, usec_rem, cpu);
> +}
> +
> +/* ------------------- cut here for user space print -------------- */
> +
> +/* "s/printk/printf" */
> +
> +static void logdev_print_hdr(int cpu,
> +			     struct logdev_mark_hdr *hdr)
> +{
> +	logdev_print_time_cpu(hdr->t, cpu);
> +}
> +
> +static void logdev_mark_switch_print(int cpu, int size,
> +				     struct logdev_mark_switch *lm)
> +{
> +	logdev_print_hdr(cpu, &lm->hdr);
> +
> +	printk("%s:%d(%d:%d:%d) -->> ",
> +	       lm->prev_comm,
> +	       lm->pid_prev,
> +	       lm->prev_prio,
> +	       lm->prev_static_prio,
> +	       lm->prev_normal_prio);
> +
> +	printk("%s:%d(%d:%d:%d)\n",
> +	       lm->next_comm,
> +	       lm->pid_next,
> +	       lm->next_prio,
> +	       lm->next_static_prio,
> +	       lm->next_normal_prio);
> +}
> +
> +static void logdev_mark_mm_fault_print(int cpu, int size,
> +				       struct logdev_mark_mm_fault *lm)
> +{
> +	logdev_print_hdr(cpu, &lm->hdr);
> +
> +	printk("%s:%d  mm=%p vma=%p  address=%lx  write_access=%d\n",
> +	       lm->comm,
> +	       lm->pid,
> +	       lm->mm,
> +	       lm->vma,
> +	       lm->address,
> +	       lm->write_access);
> +}
> +
> +static void logdev_mark_callback(struct logdev_header *hdr,
> +				 struct logdev_custom *custom,
> +				 int cpu,
> +				 void *rec)
> +{
> +	struct logdev_mark_hdr *lm = rec;
> +
> +	switch (lm->id) {
> +	case LOGMARK_CNTXTSW:
> +		logdev_mark_switch_print(cpu, hdr->size, rec);
> +		break;
> +	case LOGMARK_MM_FAULT:
> +		logdev_mark_mm_fault_print(cpu, hdr->size, rec);
> +		break;
> +	default:
> +		printk("Unknown marker callback id %x\n",
> +		       custom->id);
> +		break;
> +	}
> +}
> +/* ------------------ end cut for user space printing ------------------- */
> +
> +
> +static void __kprobes logmark_hdr(struct logdev_mark_hdr *lm, int id)
> +{
> +	lm->t = sched_clock();
> +	lm->id = id;
> +}
> +
> +
> +void LDCALLER(context_switch) (struct task_struct *prev,
> +			       struct task_struct *next)
> +{
> +	struct logdev_mark_switch lm;
> +
> +	logmark_hdr(&lm.hdr, LOGMARK_CNTXTSW);
> +
> +	lm.pid_prev = prev->pid;
> +	lm.prev_prio = prev->prio;
> +	lm.prev_static_prio = prev->static_prio;
> +	lm.prev_normal_prio = prev->normal_prio;
> +	lm.prev_state = prev->state;
> +	lm.pid_next = next->pid;
> +	lm.next_prio = next->prio;
> +	lm.next_static_prio = next->static_prio;
> +	lm.next_normal_prio = next->normal_prio;
> +	memcpy(lm.prev_comm, prev->comm, TASK_COMM_LEN);
> +	memcpy(lm.next_comm, next->comm, TASK_COMM_LEN);
> +
> +	logdev_record(LOGMARK_ID_MARK, sizeof(lm),
> +		      &lm, sizeof(lm), NULL);
> +}
> +
> +void LDCALLER(mm_fault) (struct mm_struct *mm, struct vm_area_struct *vma,
> +			 unsigned long address, int write_access)
> +{
> +	struct logdev_mark_mm_fault lm;
> +
> +	logmark_hdr(&lm.hdr, LOGMARK_MM_FAULT);
> +
> +	lm.pid = current->pid;
> +	memcpy(lm.comm, current->comm, TASK_COMM_LEN);
> +
> +	lm.mm = mm;
> +	lm.vma = vma;
> +	lm.address = address;
> +	lm.write_access = write_access;
> +
> +	logdev_record(LOGMARK_ID_MARK, sizeof(lm),
> +		      &lm, sizeof(lm), NULL);
> +}
> +
> +/************************ Kprobes ******************************/
> +
> +static int __kprobes logmark_probe(struct kprobe *kp, struct pt_regs *regs)
> +{
> +	struct logdev_mark_probe *p =
> +		container_of(kp, struct logdev_mark_probe, kp);
> +	unsigned long args[MAX_ARGS];
> +	int i;
> +
> +	for (i=0; i < p->args; i++)
> +		args[i] = logdev_mark_get_reg_content(p->arg_regs[i], regs);
> +
> +	switch (p->args) {
> +	case 0:
> +		p->func.func0();
> +		break;
> +	case 1:
> +		p->func.func1(args[0]);
> +		break;
> +	case 2:
> +		p->func.func2(args[0], args[1]);
> +		break;
> +	case 3:
> +		p->func.func3(args[0], args[1], args[2]);
> +		break;
> +	case 4:
> +		p->func.func4(args[0], args[1], args[2], args[3]);
> +		break;
> +
> +	}
> +	return 0;
> +}
> +
> +
> +/************************ User Land ******************************/
> +
> +static DEFINE_MUTEX(logdev_marker_lock);
> +
> +static int mark_is_on;
> +
> +static int logdev_mark_ioctl(struct inode *inode, struct file *filp,
> +			     unsigned int cmd, unsigned long arg)
> +{
> +	int ret = 0;
> +	struct logdev_mark_probe *probe;
> +
> +	switch (cmd) {
> +	case LOGMARK_START:
> +
> +		mutex_lock(&logdev_marker_lock);
> +		if (!mark_is_on) {
> +			list_for_each_entry(probe, &logdev_probes, list) {
> +				probe->kp.pre_handler = logmark_probe;
> +				probe->kp.addr =
> +					(kprobe_opcode_t *)probe->address;
> +
> +				ret = register_kprobe(&probe->kp);
> +				if (ret < 0) {
> +					ret = -EINVAL;
> +					printk(KERN_WARNING
> +					       "logdev_marker: can't register probe\n");
> +					break;
> +				}
> +			}
> +			mark_is_on = 1;
> +		}
> +		mutex_unlock(&logdev_marker_lock);
> +
> +		break;
> +
> +	case LOGMARK_STOP:
> +
> +		mutex_lock(&logdev_marker_lock);
> +		if (mark_is_on) {
> +			list_for_each_entry(probe, &logdev_probes, list) {
> +				unregister_kprobe(&probe->kp);
> +			}
> +			mark_is_on = 0;
> +		}
> +		mutex_unlock(&logdev_marker_lock);
> +
> +		break;
> +
> +	default:
> +		ret = -ENOTTY;
> +	}
> +
> +	return ret;
> +}
> +
> +
> +/******************* List mark entries *****************/
> +
> +static void __kprobes *s_next(struct seq_file *m, void *v, loff_t *pos)
> +{
> +	struct logdev_mark_probe *p = NULL;
> +	int l = 0;
> +
> +	list_for_each_entry(p, &logdev_probes, list) {
> +		if (l++ >= *pos)
> +			break;
> +	}
> +
> +	(*pos)++;
> +
> +	if (&p->list == &logdev_probes)
> +		return NULL;
> +
> +	return p;
> +}
> +
> +static void __kprobes *s_start(struct seq_file *m, loff_t *pos)
> +	__acquires(logdev_dev.lock)
> +{
> +	struct logdev_mark_probe *p = NULL;
> +	loff_t l = 0;
> +
> +	list_for_each_entry(p, &logdev_probes, list) {
> +		if (l++ >= *pos)
> +			break;
> +	}
> +
> +	if (&p->list == &logdev_probes)
> +		return NULL;
> +
> +	(*pos)++;
> +
> +	return p;
> +}
> +
> +static void __kprobes s_stop(struct seq_file *m, void *p)
> +	__releases(logdev_dev.lock)
> +{
> +}
> +
> +static int __kprobes s_show(struct seq_file *m, void *v)
> +{
> +	struct logdev_mark_probe *lm = v;
> +	int i;
> +
> +	seq_printf(m, "probe address:\t%p\n", (void*)lm->address);
> +	seq_printf(m, "func address:\t%p\n", (void*)lm->func.func0);
> +	seq_printf(m, "args:\t\t%d\n", lm->args);
> +	for (i=0; i < lm->args; i++) {
> +		seq_printf(m, "arg %d reg:\t%s\n",
> +			   i, logdev_reg_to_name(lm->arg_regs[i]));
> +	}
> +	seq_printf(m,"\n");
> +	return 0;
> +}
> +
> +static struct seq_operations logdev_seq_op = {
> +	.start = s_start,
> +	.next = s_next,
> +	.stop = s_stop,
> +	.show = s_show,
> +};
> +
> +/******************* end list kprobes *****************/
> +
> +static int logdev_mark_open (struct inode *inode, struct file *filp)
> +{
> +	int ret;
> +
> +	ret = seq_open(filp, &logdev_seq_op);
> +	if (!ret) {
> +		struct seq_file *m = filp->private_data;
> +		m->private = inode->u.generic_ip;
> +	}
> +
> +	return ret;
> +}
> +
> +
> +static struct file_operations logdev_mark_fops = {
> +	.read		= seq_read,
> +	.ioctl		= logdev_mark_ioctl,
> +	.open		= logdev_mark_open,
> +	.llseek		= seq_lseek,
> +	.release	= seq_release,
> +};
> +
> +/************************ End User Land ******************************/
> +
> +
> +extern unsigned long __logdev_marker_start;
> +extern unsigned long __logdev_marker_end;
> +
> +static int __init logdev_marker_init(void)
> +{
> +	unsigned long *p;
> +	struct logdev_mark_probe *probe;
> +
> +	debugfs_create_file("marker", 0600, logdev_d,
> +			    NULL, &logdev_mark_fops);
> +
> +	logdev_register_callback(LOGMARK_ID_MARK, logdev_mark_callback);
> +
> +	/*
> +	 * Arch must make sure that these are aligned by sizeof(long)
> +	 *
> +	 *  (p is incremented in the loop)
> +	 */
> +	for (p = &__logdev_marker_start; p < &__logdev_marker_end; ) {
> +		int i;
> +
> +		probe = kzalloc(sizeof(*probe), GFP_KERNEL);
> +		if (!probe) {
> +			printk(KERN_WARNING "logdev_marker: ran out of memory!\n");
> +			break;
> +		}
> +		probe->address = *p++;
> +		probe->func.func0 = (logdev_func0_t)*p++;
> +		probe->args = *p++;
> +		if (probe->args > MAX_ARGS) {
> +			printk(KERN_WARNING "logdev_marker: corrupted mark section\n");
> +			kfree(probe);
> +			break;
> +		}
> +		for (i=0; i < probe->args; i++) {
> +			probe->arg_regs[i] = logdev_mark_get_reg(*p++);
> +			if (probe->arg_regs[i] < 0)
> +				break;
> +		}
> +		if (i < probe->args) {
> +			printk(KERN_WARNING "logdev_marker: unknown reg\n");
> +			kfree(probe);
> +			break;
> +		}
> +
> +		list_add(&probe->list, &logdev_probes);
> +	}
> +
> +	return 0;
> +}
> +
> +module_init(logdev_marker_init);
> Index: linux-2.6.18/mm/memory.c
> ===================================================================
> --- linux-2.6.18.orig/mm/memory.c	2006-10-06 21:06:52.000000000 -0400
> +++ linux-2.6.18/mm/memory.c	2006-10-06 21:20:10.000000000 -0400
> @@ -49,6 +49,7 @@
>  #include <linux/module.h>
>  #include <linux/delayacct.h>
>  #include <linux/init.h>
> +#include <linux/logdev_marker.h>
>  
>  #include <asm/pgalloc.h>
>  #include <asm/uaccess.h>
> @@ -2326,6 +2327,8 @@ int __handle_mm_fault(struct mm_struct *
>  	pmd_t *pmd;
>  	pte_t *pte;
>  
> +	LD_MARK4(mm_fault, mm, vma, address, write_access);
> +
>  	__set_current_state(TASK_RUNNING);
>  
>  	count_vm_event(PGFAULT);
> 
> 
OpenPGP public key:              http://krystal.dyndns.org:8080/key/compudj.gpg
Key fingerprint:     8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68 

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2006-10-10 14:21 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-09-22  0:01 [PATCH] Linux Kernel Markers 0.7 for 2.6.17 (with type checking!) Mathieu Desnoyers
     [not found] ` <451331A1.3020601@goop.org>
2006-09-22  2:11   ` Mathieu Desnoyers
     [not found]     ` <45134539.7070305@goop.org>
2006-09-22  2:24       ` Mathieu Desnoyers
     [not found]         ` <45135FA0.1030403@goop.org>
2006-09-22 15:38           ` Mathieu Desnoyers
2006-09-22  2:49       ` Mathieu Desnoyers
     [not found] ` <1160189237.21768.47.camel@localhost.localdomain>
2006-10-10 14:21   ` Proof of concept: Logdev with "almost-non" intrusive markers Mathieu Desnoyers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).