public inbox for systemtap@sourceware.org
 help / color / mirror / Atom feed
* [RFC][PATCH][kprobe] enabling booster on the preemptible kernel,  take 2
@ 2006-10-16 13:14 Masami Hiramatsu
  2006-10-19  9:00 ` [PATCH 1/5][djprobe] generalize the length of the instruction slots Masami Hiramatsu
                   ` (6 more replies)
  0 siblings, 7 replies; 20+ messages in thread
From: Masami Hiramatsu @ 2006-10-16 13:14 UTC (permalink / raw)
  To: Keshavamurthy, Anil S, Ananth N Mavinakayanahalli,
	Prasanna S Panchamukhi, Ingo Molnar
  Cc: SystemTAP, Satoshi Oshima, Hideo Aoki, Yumiko Sugita

Hi,

Here is the patch which enables kprobe-booster on
the preemptive kernel.

When we are unregistering a kprobe-booster, we can't release
its buffer immediately on the preemptive kernel, because
some processes might be preempted on the buffer.
The freeze_processes() and thaw_processes() functions can
clean those processes up from the buffer. However, the
processing of those functions takes a long time.
So, this patch introduces the garbage collection mechanism
of insn_slot. It also introduces the "dirty" flag to
free_insn_slot because of efficiency.

The "clean" instruction slots (dirty flag is cleared) are
released immediately. But the "dirty" slots which are used
by boosted kprobes, are marked as garbages.
collect_garbage_slots() will be invoked to release "dirty"
slots if 1) there are more than INSNS_PER_PAGE garbage slots
or 2) there are no unused slots.

Thanks,
-- 
Masami HIRAMATSU
2nd Research Dept.
Hitachi, Ltd., Systems Development Laboratory
E-mail: masami.hiramatsu.pt@hitachi.com


---
 arch/i386/kernel/kprobes.c    |    4 -
 arch/ia64/kernel/kprobes.c    |    2
 arch/powerpc/kernel/kprobes.c |    2
 arch/s390/kernel/kprobes.c    |    2
 arch/x86_64/kernel/kprobes.c  |    2
 include/linux/kprobes.h       |    2
 kernel/kprobes.c              |  101 +++++++++++++++++++++++++++++++++---------
 7 files changed, 87 insertions(+), 28 deletions(-)

Index: linux-2.6.19-rc1-mm1/kernel/kprobes.c
===================================================================
--- linux-2.6.19-rc1-mm1.orig/kernel/kprobes.c	2006-10-16 10:40:02.000000000 +0900
+++ linux-2.6.19-rc1-mm1/kernel/kprobes.c	2006-10-16 21:50:44.000000000 +0900
@@ -38,6 +38,7 @@
 #include <linux/module.h>
 #include <linux/moduleloader.h>
 #include <linux/kallsyms.h>
+#include <linux/sched.h>
 #include <asm-generic/sections.h>
 #include <asm/cacheflush.h>
 #include <asm/errno.h>
@@ -83,9 +84,12 @@
 	kprobe_opcode_t *insns;		/* Page of instruction slots */
 	char slot_used[INSNS_PER_PAGE];
 	int nused;
+	int ngarbage;
 };

 static struct hlist_head kprobe_insn_pages;
+static int kprobe_garbage_slots;
+static int collect_garbage_slots(void);

 /**
  * get_insn_slot() - Find a slot on an executable page for an instruction.
@@ -96,6 +100,7 @@
 	struct kprobe_insn_page *kip;
 	struct hlist_node *pos;

+      retry:
 	hlist_for_each(pos, &kprobe_insn_pages) {
 		kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
 		if (kip->nused < INSNS_PER_PAGE) {
@@ -112,7 +117,11 @@
 		}
 	}

-	/* All out of space.  Need to allocate a new page. Use slot 0.*/
+	/* If there are any garbage slots, collect it and try again. */
+	if (kprobe_garbage_slots && collect_garbage_slots() == 0) {
+		goto retry;
+	}
+	/* All out of space.  Need to allocate a new page. Use slot 0. */
 	kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL);
 	if (!kip) {
 		return NULL;
@@ -133,10 +142,70 @@
 	memset(kip->slot_used, 0, INSNS_PER_PAGE);
 	kip->slot_used[0] = 1;
 	kip->nused = 1;
+	kip->ngarbage = 0;
 	return kip->insns;
 }

-void __kprobes free_insn_slot(kprobe_opcode_t *slot)
+/* Return 1 if all garbages are collected, otherwise 0. */
+static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
+{
+	kip->slot_used[idx] = 0;
+	kip->nused--;
+	if (kip->nused == 0) {
+		/*
+		 * Page is no longer in use.  Free it unless
+		 * it's the last one.  We keep the last one
+		 * so as not to have to set it up again the
+		 * next time somebody inserts a probe.
+		 */
+		hlist_del(&kip->hlist);
+		if (hlist_empty(&kprobe_insn_pages)) {
+			INIT_HLIST_NODE(&kip->hlist);
+			hlist_add_head(&kip->hlist,
+				       &kprobe_insn_pages);
+			return 1;
+		} else {
+			module_free(NULL, kip->insns);
+			kfree(kip);
+		}
+	}
+	return 0;
+}
+
+static int __kprobes collect_garbage_slots(void)
+{
+	struct kprobe_insn_page *kip;
+	struct hlist_node *pos, *next;
+	int ret = -1;
+
+#if defined(CONFIG_PREEMPT) && defined(CONFIG_PM)
+	/* Ensure no-one is preepmted on the garbages */
+	if (freeze_processes() != 0)
+		goto thaw_all;
+#endif
+	hlist_for_each_safe(pos, next, &kprobe_insn_pages) {
+		int i;
+		kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
+		if (kip->ngarbage == 0)
+			continue;
+		kip->ngarbage = 0;	/* we will collect all garbages */
+		for (i = 0; i < INSNS_PER_PAGE; i++) {
+			if (kip->slot_used[i] == -1 &&
+			    collect_one_slot(kip, i))
+				goto collected;
+		}
+	}
+      collected:
+	kprobe_garbage_slots = 0;
+	ret = 0;
+#if defined(CONFIG_PREEMPT) && defined(CONFIG_PM)
+      thaw_all:
+	thaw_processes();
+#endif
+	return ret;
+}
+
+void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
 {
 	struct kprobe_insn_page *kip;
 	struct hlist_node *pos;
@@ -146,28 +215,18 @@
 		if (kip->insns <= slot &&
 		    slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) {
 			int i = (slot - kip->insns) / MAX_INSN_SIZE;
-			kip->slot_used[i] = 0;
-			kip->nused--;
-			if (kip->nused == 0) {
-				/*
-				 * Page is no longer in use.  Free it unless
-				 * it's the last one.  We keep the last one
-				 * so as not to have to set it up again the
-				 * next time somebody inserts a probe.
-				 */
-				hlist_del(&kip->hlist);
-				if (hlist_empty(&kprobe_insn_pages)) {
-					INIT_HLIST_NODE(&kip->hlist);
-					hlist_add_head(&kip->hlist,
-						&kprobe_insn_pages);
-				} else {
-					module_free(NULL, kip->insns);
-					kfree(kip);
-				}
+			if (dirty) {
+				kip->slot_used[i] = -1;
+				kip->ngarbage++;
+			} else {
+				collect_one_slot(kip, i);
+				break;
 			}
-			return;
 		}
 	}
+	if (dirty && (++kprobe_garbage_slots > INSNS_PER_PAGE)) {
+		collect_garbage_slots();
+	}
 }
 #endif

Index: linux-2.6.19-rc1-mm1/arch/i386/kernel/kprobes.c
===================================================================
--- linux-2.6.19-rc1-mm1.orig/arch/i386/kernel/kprobes.c	2006-10-16 10:40:00.000000000 +0900
+++ linux-2.6.19-rc1-mm1/arch/i386/kernel/kprobes.c	2006-10-16 21:43:03.000000000 +0900
@@ -184,7 +184,7 @@
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
 	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn);
+	free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
 	mutex_unlock(&kprobe_mutex);
 }

@@ -333,7 +333,7 @@
 		return 1;

 ss_probe:
-#ifndef CONFIG_PREEMPT
+#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
 	if (p->ainsn.boostable == 1 && !p->post_handler){
 		/* Boost up -- we can execute copied instructions directly */
 		reset_current_kprobe();
Index: linux-2.6.19-rc1-mm1/arch/ia64/kernel/kprobes.c
===================================================================
--- linux-2.6.19-rc1-mm1.orig/arch/ia64/kernel/kprobes.c	2006-10-16 10:40:00.000000000 +0900
+++ linux-2.6.19-rc1-mm1/arch/ia64/kernel/kprobes.c	2006-10-16 10:54:09.000000000 +0900
@@ -481,7 +481,7 @@
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
 	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn);
+	free_insn_slot(p->ainsn.insn, 0);
 	mutex_unlock(&kprobe_mutex);
 }
 /*
Index: linux-2.6.19-rc1-mm1/arch/powerpc/kernel/kprobes.c
===================================================================
--- linux-2.6.19-rc1-mm1.orig/arch/powerpc/kernel/kprobes.c	2006-10-16 10:40:00.000000000 +0900
+++ linux-2.6.19-rc1-mm1/arch/powerpc/kernel/kprobes.c	2006-10-16 10:54:09.000000000 +0900
@@ -85,7 +85,7 @@
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
 	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn);
+	free_insn_slot(p->ainsn.insn, 0);
 	mutex_unlock(&kprobe_mutex);
 }

Index: linux-2.6.19-rc1-mm1/arch/s390/kernel/kprobes.c
===================================================================
--- linux-2.6.19-rc1-mm1.orig/arch/s390/kernel/kprobes.c	2006-10-16 10:40:00.000000000 +0900
+++ linux-2.6.19-rc1-mm1/arch/s390/kernel/kprobes.c	2006-10-16 10:54:09.000000000 +0900
@@ -200,7 +200,7 @@
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
 	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn);
+	free_insn_slot(p->ainsn.insn, 0);
 	mutex_unlock(&kprobe_mutex);
 }

Index: linux-2.6.19-rc1-mm1/arch/x86_64/kernel/kprobes.c
===================================================================
--- linux-2.6.19-rc1-mm1.orig/arch/x86_64/kernel/kprobes.c	2006-10-16 10:40:00.000000000 +0900
+++ linux-2.6.19-rc1-mm1/arch/x86_64/kernel/kprobes.c	2006-10-16 10:54:09.000000000 +0900
@@ -224,7 +224,7 @@
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
 	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn);
+	free_insn_slot(p->ainsn.insn, 0);
 	mutex_unlock(&kprobe_mutex);
 }

Index: linux-2.6.19-rc1-mm1/include/linux/kprobes.h
===================================================================
--- linux-2.6.19-rc1-mm1.orig/include/linux/kprobes.h	2006-10-16 10:40:02.000000000 +0900
+++ linux-2.6.19-rc1-mm1/include/linux/kprobes.h	2006-10-16 21:43:07.000000000 +0900
@@ -165,7 +165,7 @@
 extern int arch_init_kprobes(void);
 extern void show_registers(struct pt_regs *regs);
 extern kprobe_opcode_t *get_insn_slot(void);
-extern void free_insn_slot(kprobe_opcode_t *slot);
+extern void free_insn_slot(kprobe_opcode_t *slot, int dirty);
 extern void kprobes_inc_nmissed_count(struct kprobe *p);

 /* Get the kprobe at this addr (if any) - called with preemption disabled */


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 1/5][djprobe] generalize the length of the instruction slots.
  2006-10-16 13:14 [RFC][PATCH][kprobe] enabling booster on the preemptible kernel, take 2 Masami Hiramatsu
@ 2006-10-19  9:00 ` Masami Hiramatsu
  2006-10-19  9:03 ` [PATCH 3/5][djprobe] export set_jmp_op() for sharing Masami Hiramatsu
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 20+ messages in thread
From: Masami Hiramatsu @ 2006-10-19  9:00 UTC (permalink / raw)
  To: Keshavamurthy, Anil S, Ananth N Mavinakayanahalli,
	Prasanna S Panchamukhi, Ingo Molnar, SystemTAP
  Cc: Masami Hiramatsu, Satoshi Oshima, Hideo Aoki, Yumiko Sugita

Hi,

This patch generalizes get/free_insn_slot() to manage multiple length
instruction slots, because the djprobe needs longer instruction slots
than kprobes.

NOTE: This patch depends on my previous patch which enables
kprobe-booster on the preemptible kernel.
http://sources.redhat.com/ml/systemtap/2006-q4/msg00126.html

-- 
Masami HIRAMATSU
Linux Technology Center
Hitachi, Ltd., Systems Development Laboratory
E-mail: masami.hiramatsu.pt@hitachi.com

---
 include/linux/kprobes.h |    6 +++
 kernel/kprobes.c        |   85 ++++++++++++++++++++++++++++++------------------
 2 files changed, 60 insertions(+), 31 deletions(-)

Index: linux-2.6.19-rc1-mm1/include/linux/kprobes.h
===================================================================
--- linux-2.6.19-rc1-mm1.orig/include/linux/kprobes.h	2006-10-16 21:43:07.000000000 +0900
+++ linux-2.6.19-rc1-mm1/include/linux/kprobes.h	2006-10-16 22:10:18.000000000 +0900
@@ -157,6 +157,12 @@
 	struct task_struct *task;
 };

+struct kprobe_insn_page_list {
+	struct hlist_head list;
+	int insn_size;		/* size of an instruction slot */
+	int nr_garbage;
+};
+
 extern spinlock_t kretprobe_lock;
 extern struct mutex kprobe_mutex;
 extern int arch_prepare_kprobe(struct kprobe *p);
Index: linux-2.6.19-rc1-mm1/kernel/kprobes.c
===================================================================
--- linux-2.6.19-rc1-mm1.orig/kernel/kprobes.c	2006-10-16 21:50:44.000000000 +0900
+++ linux-2.6.19-rc1-mm1/kernel/kprobes.c	2006-10-16 22:10:39.000000000 +0900
@@ -77,52 +77,60 @@
  * stepping on the instruction on a vmalloced/kmalloced/data page
  * is a recipe for disaster
  */
-#define INSNS_PER_PAGE	(PAGE_SIZE/(MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
+#define INSNS_PER_PAGE(size)	(PAGE_SIZE/(size * sizeof(kprobe_opcode_t)))

 struct kprobe_insn_page {
 	struct hlist_node hlist;
 	kprobe_opcode_t *insns;		/* Page of instruction slots */
-	char slot_used[INSNS_PER_PAGE];
 	int nused;
 	int ngarbage;
+	char slot_used[1];
 };

-static struct hlist_head kprobe_insn_pages;
-static int kprobe_garbage_slots;
-static int collect_garbage_slots(void);
+static struct kprobe_insn_page_list kprobe_insn_pages = {
+	.list = HLIST_HEAD_INIT,
+	.insn_size = MAX_INSN_SIZE,
+	.nr_garbage = 0,
+};
+
+static int collect_garbage_slots(struct kprobe_insn_page_list *plist);

 /**
- * get_insn_slot() - Find a slot on an executable page for an instruction.
+ * __get_insn_slot() - Find a slot on an executable page for an instruction.
  * We allocate an executable page if there's no room on existing ones.
  */
-kprobe_opcode_t __kprobes *get_insn_slot(void)
+kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_page_list *plist)
 {
 	struct kprobe_insn_page *kip;
 	struct hlist_node *pos;
+	int max_insn = INSNS_PER_PAGE(plist->insn_size);

       retry:
-	hlist_for_each(pos, &kprobe_insn_pages) {
+	hlist_for_each(pos, &plist->list) {
 		kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
-		if (kip->nused < INSNS_PER_PAGE) {
+		if (kip->nused < max_insn) {
 			int i;
-			for (i = 0; i < INSNS_PER_PAGE; i++) {
+			for (i = 0; i < max_insn; i++) {
 				if (!kip->slot_used[i]) {
 					kip->slot_used[i] = 1;
 					kip->nused++;
-					return kip->insns + (i * MAX_INSN_SIZE);
+					return kip->insns +
+						(i * plist->insn_size);
 				}
 			}
 			/* Surprise!  No unused slots.  Fix kip->nused. */
-			kip->nused = INSNS_PER_PAGE;
+			kip->nused = max_insn;
 		}
 	}

 	/* If there are any garbage slots, collect it and try again. */
-	if (kprobe_garbage_slots && collect_garbage_slots() == 0) {
+	if (plist->nr_garbage > 0 && collect_garbage_slots(plist) == 0) {
 		goto retry;
 	}
 	/* All out of space.  Need to allocate a new page. Use slot 0. */
-	kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL);
+	kip = kmalloc(sizeof(struct kprobe_insn_page) +
+		      sizeof(char) * (max_insn - 1),
+		      GFP_KERNEL);
 	if (!kip) {
 		return NULL;
 	}
@@ -138,8 +146,8 @@
 		return NULL;
 	}
 	INIT_HLIST_NODE(&kip->hlist);
-	hlist_add_head(&kip->hlist, &kprobe_insn_pages);
-	memset(kip->slot_used, 0, INSNS_PER_PAGE);
+	hlist_add_head(&kip->hlist, &plist->list);
+	memset(kip->slot_used, 0, max_insn);
 	kip->slot_used[0] = 1;
 	kip->nused = 1;
 	kip->ngarbage = 0;
@@ -147,7 +155,8 @@
 }

 /* Return 1 if all garbages are collected, otherwise 0. */
-static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
+static int __kprobes collect_one_slot(struct kprobe_insn_page_list *plist,
+				      struct kprobe_insn_page *kip, int idx)
 {
 	kip->slot_used[idx] = 0;
 	kip->nused--;
@@ -159,10 +168,10 @@
 		 * next time somebody inserts a probe.
 		 */
 		hlist_del(&kip->hlist);
-		if (hlist_empty(&kprobe_insn_pages)) {
+		if (hlist_empty(&plist->list)) {
 			INIT_HLIST_NODE(&kip->hlist);
 			hlist_add_head(&kip->hlist,
-				       &kprobe_insn_pages);
+				       &plist->list);
 			return 1;
 		} else {
 			module_free(NULL, kip->insns);
@@ -172,10 +181,12 @@
 	return 0;
 }

-static int __kprobes collect_garbage_slots(void)
+static int __kprobes
+	collect_garbage_slots(struct kprobe_insn_page_list *plist)
 {
 	struct kprobe_insn_page *kip;
 	struct hlist_node *pos, *next;
+	int max_insn = INSNS_PER_PAGE(plist->insn_size);
 	int ret = -1;

 #if defined(CONFIG_PREEMPT) && defined(CONFIG_PM)
@@ -183,20 +194,20 @@
 	if (freeze_processes() != 0)
 		goto thaw_all;
 #endif
-	hlist_for_each_safe(pos, next, &kprobe_insn_pages) {
+	hlist_for_each_safe(pos, next, &plist->list) {
 		int i;
 		kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
 		if (kip->ngarbage == 0)
 			continue;
 		kip->ngarbage = 0;	/* we will collect all garbages */
-		for (i = 0; i < INSNS_PER_PAGE; i++) {
+		for (i = 0; i < max_insn; i++) {
 			if (kip->slot_used[i] == -1 &&
-			    collect_one_slot(kip, i))
+			    collect_one_slot(plist, kip, i))
 				goto collected;
 		}
 	}
       collected:
-	kprobe_garbage_slots = 0;
+	plist->nr_garbage = 0;
 	ret = 0;
 #if defined(CONFIG_PREEMPT) && defined(CONFIG_PM)
       thaw_all:
@@ -205,29 +216,41 @@
 	return ret;
 }

-void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
+void __kprobes __free_insn_slot(struct kprobe_insn_page_list *plist,
+				kprobe_opcode_t * slot, int dirty)
 {
 	struct kprobe_insn_page *kip;
 	struct hlist_node *pos;
+	int max_insn = INSNS_PER_PAGE(plist->insn_size);

-	hlist_for_each(pos, &kprobe_insn_pages) {
+	hlist_for_each(pos, &plist->list) {
 		kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
 		if (kip->insns <= slot &&
-		    slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) {
-			int i = (slot - kip->insns) / MAX_INSN_SIZE;
+		    slot < kip->insns + (max_insn * plist->insn_size)) {
+			int i = (slot - kip->insns) / plist->insn_size;
 			if (dirty) {
 				kip->slot_used[i] = -1;
 				kip->ngarbage++;
 			} else {
-				collect_one_slot(kip, i);
+				collect_one_slot(plist, kip, i);
 				break;
 			}
 		}
 	}
-	if (dirty && (++kprobe_garbage_slots > INSNS_PER_PAGE)) {
-		collect_garbage_slots();
+	if (dirty && (++plist->nr_garbage > max_insn)) {
+		collect_garbage_slots(plist);
 	}
 }
+
+kprobe_opcode_t __kprobes *get_insn_slot(void)
+{
+	return __get_insn_slot(&kprobe_insn_pages);
+}
+
+void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
+{
+	__free_insn_slot(&kprobe_insn_pages, slot, dirty);
+}
 #endif

 /* We have preemption disabled.. so it is safe to use __ versions */





^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 3/5][djprobe] export set_jmp_op() for sharing
  2006-10-16 13:14 [RFC][PATCH][kprobe] enabling booster on the preemptible kernel, take 2 Masami Hiramatsu
  2006-10-19  9:00 ` [PATCH 1/5][djprobe] generalize the length of the instruction slots Masami Hiramatsu
@ 2006-10-19  9:03 ` Masami Hiramatsu
  2006-10-19  9:03 ` [PATCH 4/5][djprobe] djprobe for i386 architecture code Masami Hiramatsu
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 20+ messages in thread
From: Masami Hiramatsu @ 2006-10-19  9:03 UTC (permalink / raw)
  To: Keshavamurthy, Anil S, Ananth N Mavinakayanahalli,
	Prasanna S Panchamukhi, Ingo Molnar, SystemTAP
  Cc: Masami Hiramatsu, Satoshi Oshima, Hideo Aoki, Yumiko Sugita

Hi,

The kprobe-booster uses set_jmp_op() function, and the djprobe also
uses it. So this patch exports this function for sharing code.

-- 
Masami HIRAMATSU
Linux Technology Center
Hitachi, Ltd., Systems Development Laboratory
E-mail: masami.hiramatsu.pt@hitachi.com

---
 arch/i386/kernel/kprobes.c |   12 ------------
 include/asm-i386/kprobes.h |   12 ++++++++++++
 2 files changed, 12 insertions(+), 12 deletions(-)

Index: linux-2.6.18/arch/i386/kernel/kprobes.c
===================================================================
--- linux-2.6.18.orig/arch/i386/kernel/kprobes.c	2006-09-29 14:40:16.000000000 +0900
+++ linux-2.6.18/arch/i386/kernel/kprobes.c	2006-09-29 14:52:15.000000000 +0900
@@ -41,18 +41,6 @@
 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
 DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);

-/* insert a jmp code */
-static __always_inline void set_jmp_op(void *from, void *to)
-{
-	struct __arch_jmp_op {
-		char op;
-		long raddr;
-	} __attribute__((packed)) *jop;
-	jop = (struct __arch_jmp_op *)from;
-	jop->raddr = (long)(to) - ((long)(from) + 5);
-	jop->op = RELATIVEJUMP_INSTRUCTION;
-}
-
 /*
  * returns non-zero if opcodes can be boosted.
  */
Index: linux-2.6.18/include/asm-i386/kprobes.h
===================================================================
--- linux-2.6.18.orig/include/asm-i386/kprobes.h	2006-09-29 14:40:09.000000000 +0900
+++ linux-2.6.18/include/asm-i386/kprobes.h	2006-09-29 16:55:52.000000000 +0900
@@ -88,6 +88,18 @@
 		local_irq_enable();
 }

+/* Insert a jmp code */
+static __always_inline void set_jmp_op(void *from, void *to)
+{
+	struct __arch_jmp_op {
+		char op;
+		long raddr;
+	} __attribute__((packed)) *jop;
+	jop = (struct __arch_jmp_op *)from;
+	jop->raddr = (long)(to) - ((long)(from) + 5);
+	jop->op = RELATIVEJUMP_INSTRUCTION;
+}
+
 extern int kprobe_exceptions_notify(struct notifier_block *self,
 				    unsigned long val, void *data);
 #endif				/* _ASM_KPROBES_H */







^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 4/5][djprobe] djprobe for i386 architecture code
  2006-10-16 13:14 [RFC][PATCH][kprobe] enabling booster on the preemptible kernel, take 2 Masami Hiramatsu
  2006-10-19  9:00 ` [PATCH 1/5][djprobe] generalize the length of the instruction slots Masami Hiramatsu
  2006-10-19  9:03 ` [PATCH 3/5][djprobe] export set_jmp_op() for sharing Masami Hiramatsu
@ 2006-10-19  9:03 ` Masami Hiramatsu
  2006-10-19  9:03 ` [PATCH 2/5][djprobe] djprobe core patch Masami Hiramatsu
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 20+ messages in thread
From: Masami Hiramatsu @ 2006-10-19  9:03 UTC (permalink / raw)
  To: Keshavamurthy, Anil S, Ananth N Mavinakayanahalli,
	Prasanna S Panchamukhi, Ingo Molnar, SystemTAP
  Cc: Masami Hiramatsu, Satoshi Oshima, Hideo Aoki, Yumiko Sugita

Hi,

This patch is i386 architecture dependent part of the djprobe.
I updated the stub code to access stack pointer correctly.

-- 
Masami HIRAMATSU
Linux Technology Center
Hitachi, Ltd., Systems Development Laboratory
E-mail: masami.hiramatsu.pt@hitachi.com

 arch/i386/Kconfig               |    8 ++
 arch/i386/kernel/Makefile       |    1
 arch/i386/kernel/djprobe.c      |  143 ++++++++++++++++++++++++++++++++++++++++
 arch/i386/kernel/stub_djprobe.S |   72 ++++++++++++++++++++
 include/asm-i386/djprobe.h      |   65 ++++++++++++++++++
 5 files changed, 289 insertions(+)
Index: linux-2.6.19-rc1-mm1/arch/i386/Kconfig
===================================================================
--- linux-2.6.19-rc1-mm1.orig/arch/i386/Kconfig	2006-10-16 22:13:44.000000000 +0900
+++ linux-2.6.19-rc1-mm1/arch/i386/Kconfig	2006-10-16 22:22:34.000000000 +0900
@@ -1185,6 +1185,14 @@
 	  a probepoint and specifies the callback.  Kprobes is useful
 	  for kernel debugging, non-intrusive instrumentation and testing.
 	  If in doubt, say "N".
+
+config DJPROBE
+        bool "Direct Jump probe (EXPERIMENTAL)"
+	depends on KPROBES && (!PREEMPT || PM)
+	help
+	 Djprobe allows you to dynamically hook at any kernel function
+	 entry points and collect the debugging or performance analysis
+	 information non-disruptively.
 endmenu

 source "arch/i386/Kconfig.debug"
Index: linux-2.6.19-rc1-mm1/arch/i386/kernel/Makefile
===================================================================
--- linux-2.6.19-rc1-mm1.orig/arch/i386/kernel/Makefile	2006-10-16 22:13:44.000000000 +0900
+++ linux-2.6.19-rc1-mm1/arch/i386/kernel/Makefile	2006-10-16 22:22:34.000000000 +0900
@@ -29,6 +29,7 @@
 obj-$(CONFIG_X86_NUMAQ)		+= numaq.o
 obj-$(CONFIG_X86_SUMMIT_NUMA)	+= summit.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
+obj-$(CONFIG_DJPROBE)		+= stub_djprobe.o djprobe.o
 obj-$(CONFIG_MODULES)		+= module.o
 obj-y				+= sysenter.o vsyscall.o
 obj-$(CONFIG_ACPI_SRAT) 	+= srat.o
Index: linux-2.6.19-rc1-mm1/arch/i386/kernel/djprobe.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.19-rc1-mm1/arch/i386/kernel/djprobe.c	2006-10-16 22:22:34.000000000 +0900
@@ -0,0 +1,143 @@
+/*
+ *  Kernel Direct Jump Probe (Djprobes)
+ *  arch/i386/kernel/djprobe.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) Hitachi, Ltd. 2005,2006
+ *
+ * 2005-Aug	Created by Masami HIRAMATSU <masami.hiramatsu.pt@hitachi.com>
+ * 		Initial implementation of Direct jump probe (djprobe)
+ *              to reduce overhead.
+ */
+
+#include <linux/djprobe.h>
+#include <linux/ptrace.h>
+#include <linux/spinlock.h>
+#include <linux/preempt.h>
+#include <asm/cacheflush.h>
+#include <asm/kdebug.h>
+#include <asm/desc.h>
+#include <asm/processor.h>
+
+/*
+ * On pentium series, Unsynchronized cross-modifying code
+ * operations can cause unexpected instruction execution results.
+ * So after code modified, we should synchronize it on each processor.
+ */
+static void __local_serialize_cpu(void *info)
+{
+	sync_core();
+}
+
+void arch_serialize_cpus(void)
+{
+	on_each_cpu(__local_serialize_cpu, NULL, 1, 1);
+}
+
+static __always_inline void __codecopy(void *dest, const void *src, size_t size)
+{
+	memcpy(dest, src, size);
+	flush_icache_range((unsigned long)dest, (unsigned long)dest + size);
+}
+
+/* djprobe call back function: called from stub code */
+static void asmlinkage djprobe_callback(struct djprobe_instance *djpi,
+					struct pt_regs *regs)
+{
+	struct djprobe *djp;
+	rcu_read_lock();
+	list_for_each_entry_rcu(djp, &djpi->plist, plist) {
+		if (djp->handler)
+			djp->handler(djp, regs);
+	}
+	rcu_read_unlock();
+}
+
+/*
+ * Copy post processing instructions
+ * Target instructions MUST be relocatable.
+ */
+int __kprobes arch_prepare_djprobe_instance(struct djprobe_instance *djpi,
+					    struct arch_djprobe_param *param)
+{
+	kprobe_opcode_t *stub;
+	stub = djpi->stub.insn;
+	djpi->stub.size = djprobe_param_length(param);
+
+	/* copy arch-dep-instance from template */
+	memcpy((void *)stub, (void *)&arch_tmpl_stub_entry, ARCH_STUB_SIZE);
+
+	/* set probe information */
+	*((long *)(stub + ARCH_STUB_VAL_IDX)) = (long)djpi;
+	/* set probe function */
+	*((long *)(stub + ARCH_STUB_CALL_IDX)) = (long)djprobe_callback;
+
+	/* copy instructions into the middle of djporbe instance */
+	memcpy((void *)(stub + ARCH_STUB_INST_IDX),
+	       (void *)djprobe_param_address(param), djpi->stub.size);
+
+	/* set returning jmp instruction at the tail of djporbe instance */
+	set_jmp_op(stub + ARCH_STUB_INST_IDX + djpi->stub.size,
+		   djprobe_param_address(param) + djpi->stub.size);
+
+	return 0;
+}
+
+#define INT3_SIZE 1
+#define ADDR_SIZE 4
+#define JMPOP_SIZE 5
+
+void __kprobes arch_install_djprobe_instance(struct djprobe_instance *djpi)
+{
+	long rel =
+	    (long)(djpi->stub.insn) - ((long)(djpi->kp.addr) + JMPOP_SIZE);
+	/* insert the destination address only */
+	__codecopy((void *)((long)djpi->kp.addr + INT3_SIZE), (void *)&rel,
+		   ADDR_SIZE);
+}
+
+void __kprobes arch_post_install_djprobe_instance(struct djprobe_instance *djpi)
+{
+	kprobe_opcode_t op = RELATIVEJUMP_INSTRUCTION;
+	__codecopy((void *)djpi->kp.addr, (void *)&op, sizeof(kprobe_opcode_t));
+}
+
+void __kprobes arch_pre_remove_djprobe_instance(struct djprobe_instance *djpi)
+{
+	/* change (the 1st byte of) jump to int3. */
+	arch_arm_kprobe(&djpi->kp);
+}
+
+void __kprobes arch_remove_djprobe_instance(struct djprobe_instance *djpi)
+{
+	/*
+	 * recover the instructions covered by the destination address.
+	 * the int3 will be removed by unregister_kprobe()
+	 */
+	__codecopy((void *)((long)djpi->kp.addr + INT3_SIZE),
+		   (void *)((long)&djpi->stub.insn[ARCH_STUB_INST_IDX] +
+			    INT3_SIZE), ADDR_SIZE);
+}
+
+/* djprobe handler : switch to a bypass code */
+int __kprobes arch_switch_to_stub(struct djprobe_instance *djpi,
+				  struct pt_regs *regs)
+{
+	regs->eip = (unsigned long)djpi->stub.insn;
+	reset_current_kprobe();
+	preempt_enable_no_resched();
+	return 1;		/* already prepared */
+}
Index: linux-2.6.19-rc1-mm1/arch/i386/kernel/stub_djprobe.S
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.19-rc1-mm1/arch/i386/kernel/stub_djprobe.S	2006-10-18 11:37:28.000000000 +0900
@@ -0,0 +1,72 @@
+/*
+ *  linux/arch/i386/stub_djprobe.S
+ *
+ *  Copyright (C) Hitachi, Ltd. 2005,2006
+ *  Created by Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+ */
+
+#include <linux/autoconf.h>
+
+# jmp into this function from other functions.
+.global arch_tmpl_stub_entry
+arch_tmpl_stub_entry:
+	pushf
+	subl $20, %esp	#skip segment registers.
+	pushl %eax
+	pushl %ebp
+	pushl %edi
+	pushl %esi
+	pushl %edx
+	pushl %ecx
+	pushl %ebx
+
+	movl %esp, %eax
+	pushl %eax
+.global arch_tmpl_stub_val
+arch_tmpl_stub_val:
+	movl $0xffffffff, %eax
+	pushl %eax
+.global arch_tmpl_stub_call
+arch_tmpl_stub_call:
+	movl $0xffffffff, %eax
+	call *%eax
+	addl $8, %esp
+
+	popl %ebx
+	popl %ecx
+	popl %edx
+	popl %esi
+	popl %edi
+	popl %ebp
+	popl %eax
+	addl $20, %esp
+	popf
+.global arch_tmpl_stub_inst
+arch_tmpl_stub_inst:
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+.global arch_tmpl_stub_end
+arch_tmpl_stub_end:
Index: linux-2.6.19-rc1-mm1/include/asm-i386/djprobe.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.19-rc1-mm1/include/asm-i386/djprobe.h	2006-10-16 22:22:34.000000000 +0900
@@ -0,0 +1,65 @@
+#ifndef _ASM_DJPROBE_H
+#define _ASM_DJPROBE_H
+/*
+ *  Kernel Direct Jump Probe (Djprobe)
+ *  include/asm-i386/djprobe.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) Hitachi, Ltd. 2005,2006
+ *
+ * 2005-Aug	Created by Masami HIRAMATSU <masami.hiramatsu.pt@hitachi.com>
+ * 		Initial implementation of Direct jump probe (djprobe)
+ *              to reduce overhead.
+ */
+
+#include <asm/kprobes.h>
+
+/* stub template code */
+extern kprobe_opcode_t arch_tmpl_stub_entry;
+extern kprobe_opcode_t arch_tmpl_stub_val;
+extern kprobe_opcode_t arch_tmpl_stub_call;
+extern kprobe_opcode_t arch_tmpl_stub_inst;
+extern kprobe_opcode_t arch_tmpl_stub_end;
+
+#define ARCH_STUB_VAL_IDX \
+	((long)&arch_tmpl_stub_val - (long)&arch_tmpl_stub_entry + 1)
+#define ARCH_STUB_CALL_IDX \
+	((long)&arch_tmpl_stub_call - (long)&arch_tmpl_stub_entry + 1)
+#define ARCH_STUB_INST_IDX \
+	((long)&arch_tmpl_stub_inst - (long)&arch_tmpl_stub_entry)
+#define ARCH_STUB_SIZE \
+	(((long)&arch_tmpl_stub_end - \
+	  (long)&arch_tmpl_stub_entry)/sizeof(kprobe_opcode_t))
+
+#define ARCH_STUB_INSN_MAX 20
+#define ARCH_STUB_INSN_MIN 5
+
+struct arch_djprobe_stub {
+	kprobe_opcode_t *insn;
+	int size;
+};
+
+struct arch_djprobe_param {
+	kprobe_opcode_t * addr;
+	int size;
+};
+
+#define djprobe_param_address(p) ((p)->addr)
+#define djprobe_param_length(p) ((p)->size)
+
+#define DJPI_ARCH_SIZE(djpi) ((djpi)->stub.size)
+
+#endif				/* _ASM_DJPROBE_H */





^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 2/5][djprobe] djprobe core patch
  2006-10-16 13:14 [RFC][PATCH][kprobe] enabling booster on the preemptible kernel, take 2 Masami Hiramatsu
                   ` (2 preceding siblings ...)
  2006-10-19  9:03 ` [PATCH 4/5][djprobe] djprobe for i386 architecture code Masami Hiramatsu
@ 2006-10-19  9:03 ` Masami Hiramatsu
  2006-10-27 23:34   ` Keshavamurthy, Anil S
  2006-10-19  9:04 ` [PATCH 5/5][djprobe] delayed invoking commit_djprobes() Masami Hiramatsu
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 20+ messages in thread
From: Masami Hiramatsu @ 2006-10-19  9:03 UTC (permalink / raw)
  To: Keshavamurthy, Anil S, Ananth N Mavinakayanahalli,
	Prasanna S Panchamukhi, Ingo Molnar, SystemTAP
  Cc: Masami Hiramatsu, Satoshi Oshima, Hideo Aoki, Yumiko Sugita

Hi,

This patch is the architecture independent part of the djprobe.
In this patch, both of jump inserting and buffer removing are
executed by the commit_djprobes() batch function. This batch
function freezes processes while inserting jumps and removing
buffers on the preemptible kernel. So we can do it safely.
However, the handlers of djprobes are activated/deactivated right
after calling register/unregister_djprobe() functions. Before calling
commit_djprobes(), these handlers are invoked by normal kprobes.

I also updated API reference.

API Reference
-------------
The Djprobe API includes "register_djprobe" function,
"unregister_djprobe" function and "commit_djprobes" function.
Here are specifications for these functions and the
associated probe handlers.

1. register_djprobe

#include <linux/djprobe.h>
int register_djprobe(struct djprobe *djp);

Registers djprobe at the specified address.
When the kernel hits the address, Djprobe calls djp->handler.

register_djprobe() returns 0 on success, or a negative errno otherwise.

User's probe handler (djp->handler):
#include <linux/djprobe.h>
#include <linux/ptrace.h>
void handler(struct djprobe *djp, struct pt_regs *regs);

Called with p pointing to the djprobe associated with the probe point,
and regs pointing to the struct containing the registers saved when
the probe point was hit.

2. unregister_djprobe

#include <linux/djprobe.h>
void unregister_djprobe(struct djprobe *djp);

Removes the specified probe handler.  The unregister function can
be called at anytime after the probe has been registered.

3. commit_djprobes

#include <linux/djprobe.h>
int commit_djprobes(void);

Inserts jump instruction of registered probes and releases buffers
of unregistered probes. The commit function can be called at anytime.

commit_djprobes() returns 0 on success, or a negative errno otherwise.

-----

Thanks,

-- 
Masami HIRAMATSU
Linux Technology Center
Hitachi, Ltd., Systems Development Laboratory
E-mail: masami.hiramatsu.pt@hitachi.com

---
 include/linux/djprobe.h |   89 ++++++++++++++
 include/linux/kprobes.h |    5
 kernel/Makefile         |    1
 kernel/djprobe.c        |  303 ++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/kprobes.c        |   18 ++
 5 files changed, 415 insertions(+), 1 deletion(-)
Index: linux-2.6.19-rc1-mm1/include/linux/djprobe.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.19-rc1-mm1/include/linux/djprobe.h	2006-10-16 22:06:22.000000000 +0900
@@ -0,0 +1,89 @@
+#ifndef _LINUX_DJPROBE_H
+#define _LINUX_DJPROBE_H
+/*
+ *  Kernel Direct Jump Probe (Djprobe)
+ *  include/linux/djprobe.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) Hitachi, Ltd. 2005,2006
+ *
+ * 2005-Aug	Created by Masami HIRAMATSU <masami.hiramatsu.pt@hitachi.com>
+ * 		Initial implementation of Direct jump probe (djprobe)
+ *              to reduce overhead.
+ */
+#include <linux/autoconf.h>
+#include <linux/list.h>
+#include <linux/smp.h>
+#include <linux/kprobes.h>
+#include <asm/djprobe.h>
+
+struct djprobe;
+/* djprobe's instance (internal use)*/
+struct djprobe_instance {
+	struct list_head plist; /* list of djprobes for multiprobe support */
+	struct arch_djprobe_stub stub;
+	struct kprobe kp;
+	struct hlist_node hlist; /* list of djprobe_instances */
+	struct list_head rlist;
+};
+
+struct djprobe;
+typedef void (*djprobe_handler_t) (struct djprobe *, struct pt_regs *);
+/*
+ * Direct Jump probe interface structure
+ */
+struct djprobe {
+	struct list_head plist;		/* list of djprobes */
+	djprobe_handler_t handler;	/* probing handler (pre-executed) */
+	struct djprobe_instance *inst;	/* pointer for instance */
+	struct arch_djprobe_param param;	/* arch dependent parameter */
+};
+
+#ifdef CONFIG_DJPROBE
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+extern struct mutex djprobe_mutex;
+struct djprobe_instance * __get_djprobe_instance(void *addr, int size);
+
+/* architecture dependent functions */
+extern int arch_prepare_djprobe_instance(struct djprobe_instance *djpi,
+					 struct arch_djprobe_param *param);
+extern void arch_install_djprobe_instance(struct djprobe_instance *djpi);
+extern void arch_post_install_djprobe_instance(struct djprobe_instance *djpi);
+extern void arch_pre_remove_djprobe_instance(struct djprobe_instance *djpi);
+extern void arch_remove_djprobe_instance(struct djprobe_instance *djpi);
+extern void arch_serialize_cpus(void);
+extern int arch_switch_to_stub(struct djprobe_instance *djpi,
+			       struct pt_regs * regs);
+extern int djprobe_kprobe(struct kprobe *kp);
+
+/* user interfaces */
+int register_djprobe(struct djprobe *p);
+void unregister_djprobe(struct djprobe *p);
+int commit_djprobes(void);
+#else /* CONFIG_DJPROBE */
+static inline int register_djprobe(struct djprobe *p)
+{
+	return -ENOSYS;
+}
+static inline int commit_djprobes(void)
+{
+	return -ENOSYS;
+}
+static inline void unregister_djprobe(struct djprobe *p)
+{
+}
+#endif				/* CONFIG_DJPROBE */
+#endif				/* _LINUX_DJPROBE_H */
Index: linux-2.6.19-rc1-mm1/include/linux/kprobes.h
===================================================================
--- linux-2.6.19-rc1-mm1.orig/include/linux/kprobes.h	2006-10-16 22:04:37.000000000 +0900
+++ linux-2.6.19-rc1-mm1/include/linux/kprobes.h	2006-10-16 22:06:22.000000000 +0900
@@ -173,6 +173,11 @@
 extern kprobe_opcode_t *get_insn_slot(void);
 extern void free_insn_slot(kprobe_opcode_t *slot, int dirty);
 extern void kprobes_inc_nmissed_count(struct kprobe *p);
+extern kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_page_list *pages);
+extern void __free_insn_slot(struct kprobe_insn_page_list *pages,
+			     kprobe_opcode_t * slot, int dirty);
+extern int in_kprobes_functions(unsigned long addr);
+

 /* Get the kprobe at this addr (if any) - called with preemption disabled */
 struct kprobe *get_kprobe(void *addr);
Index: linux-2.6.19-rc1-mm1/kernel/Makefile
===================================================================
--- linux-2.6.19-rc1-mm1.orig/kernel/Makefile	2006-10-16 21:43:03.000000000 +0900
+++ linux-2.6.19-rc1-mm1/kernel/Makefile	2006-10-16 22:06:22.000000000 +0900
@@ -42,6 +42,7 @@
 obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
 obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
 obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_DJPROBE) += djprobe.o
 obj-$(CONFIG_SYSFS) += ksysfs.o
 obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
 obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
Index: linux-2.6.19-rc1-mm1/kernel/djprobe.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.19-rc1-mm1/kernel/djprobe.c	2006-10-16 22:06:22.000000000 +0900
@@ -0,0 +1,303 @@
+/*
+ *  Kernel Direct Jump Probe (Djprobe)
+ *  kernel/djprobes.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) Hitachi, Ltd. 2005,2006
+ *
+ * 2005-Aug	Created by Masami HIRAMATSU <masami.hiramatsu.pt@hitachi.com>
+ * 		Initial implementation of Direct jump probe (djprobe)
+ *              to reduce overhead.
+ * 2006-Sep	add preemptive kernel support.
+ */
+#include <linux/djprobe.h>
+#include <linux/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleloader.h>
+#include <linux/sched.h>
+#include <asm-generic/sections.h>
+#include <asm/errno.h>
+#include <linux/hardirq.h>
+#include <linux/cpu.h>
+#include <linux/percpu.h>
+#include <linux/mutex.h>
+
+/*
+ * The djprobe do not refer instances list when probe function called.
+ * This list is operated on registering and unregistering djprobe.
+ */
+#define DJPROBE_BLOCK_BITS 6
+#define DJPROBE_BLOCK_SIZE (1 << DJPROBE_BLOCK_BITS)
+#define DJPROBE_HASH_BITS 8
+#define DJPROBE_TABLE_SIZE (1 << DJPROBE_HASH_BITS)
+#define DJPROBE_TABLE_MASK (DJPROBE_TABLE_SIZE - 1)
+
+/* djprobe instance hash table */
+static struct hlist_head djprobe_inst_table[DJPROBE_TABLE_SIZE];
+
+#define hash_djprobe(key) \
+	(((unsigned long)(key) >> DJPROBE_BLOCK_BITS) & DJPROBE_TABLE_MASK)
+
+DEFINE_MUTEX(djprobe_mutex);
+
+/* Instruction pages for djprobe's stub code */
+static struct kprobe_insn_page_list djprobe_insn_pages = {
+	.list = HLIST_HEAD_INIT,
+	.insn_size = 0,
+	.nr_garbage = 0
+};
+
+static LIST_HEAD(registering_list);
+static LIST_HEAD(unregistering_list);
+
+static int __djprobe_pre_handler(struct kprobe *kp, struct pt_regs *regs)
+{
+	struct djprobe_instance *djpi =
+	    container_of(kp, struct djprobe_instance, kp);
+
+	return arch_switch_to_stub(djpi, regs);
+}
+
+int __kprobes djprobe_kprobe(struct kprobe *kp)
+{
+	return kp->pre_handler == __djprobe_pre_handler;
+}
+
+static void __free_djprobe_instance(struct djprobe_instance *djpi)
+{
+	hlist_del(&djpi->hlist);
+	if (djpi->kp.addr) {
+		unregister_kprobe(&(djpi->kp));	/* including safety check */
+	}
+	if (djpi->stub.insn)
+		__free_insn_slot(&djprobe_insn_pages, djpi->stub.insn, 0);
+	kfree(djpi);
+}
+
+static __always_inline
+    struct djprobe_instance *__create_djprobe_instance(struct arch_djprobe_param
+						       *param)
+{
+	struct djprobe_instance *djpi;
+	void * addr = (void *)djprobe_param_address(param);
+	/* allocate a new instance */
+	djpi = kcalloc(1, sizeof(struct djprobe_instance), GFP_ATOMIC);
+	if (djpi == NULL) {
+		goto out;
+	}
+
+	/* allocate stub */
+	djpi->stub.insn = __get_insn_slot(&djprobe_insn_pages);
+	if (djpi->stub.insn == NULL) {
+		__free_djprobe_instance(djpi);
+		djpi = NULL;
+		goto out;
+	}
+
+	arch_prepare_djprobe_instance(djpi, param);
+
+	INIT_LIST_HEAD(&djpi->plist);
+	INIT_LIST_HEAD(&djpi->rlist);
+	INIT_HLIST_NODE(&djpi->hlist);
+	hlist_add_head(&djpi->hlist,
+		       &djprobe_inst_table[hash_djprobe(addr)]);
+
+	/* register as a kprobe */
+	djpi->kp.addr = addr;
+	djpi->kp.pre_handler = __djprobe_pre_handler;
+	if (register_kprobe(&djpi->kp) < 0) {
+		djpi->kp.addr = NULL;
+		__free_djprobe_instance(djpi);
+		djpi = NULL;
+	} else {
+		list_add(&djpi->rlist, &registering_list);
+	}
+
+      out:
+	return djpi;
+}
+
+struct djprobe_instance *__kprobes __get_djprobe_instance(void *addr, int size)
+{
+	struct djprobe_instance *djpi;
+	struct hlist_node *node;
+	unsigned long idx, eidx;
+
+	idx = hash_djprobe(addr - ARCH_STUB_INSN_MAX);
+	eidx = ((hash_djprobe(addr + size) + 1) & DJPROBE_TABLE_MASK);
+	do {
+		hlist_for_each_entry(djpi, node, &djprobe_inst_table[idx],
+				     hlist) {
+			if (((long)addr <
+			     (long)djpi->kp.addr + DJPI_ARCH_SIZE(djpi))
+			    && ((long)djpi->kp.addr < (long)addr + size)) {
+				return djpi;
+			}
+		}
+		idx = ((idx + 1) & DJPROBE_TABLE_MASK);
+	} while (idx != eidx);
+
+	return NULL;
+}
+
+/* make sure calling with locking djprobe_mutex */
+static int __commit_djprobes(void)
+{
+	struct djprobe_instance *djpi;
+	int ret = -EAGAIN;
+
+	if (list_empty(&registering_list) &&
+	    list_empty(&unregistering_list)) {
+		return 0;
+	}
+
+#ifdef CONFIG_PREEMPT
+	if (freeze_processes() != 0)
+		goto out;
+#endif
+	/* remove jump code */
+	if (!list_empty(&unregistering_list)) {
+		list_for_each_entry(djpi, &unregistering_list, rlist) {
+			arch_remove_djprobe_instance(djpi);
+		}
+		arch_serialize_cpus();
+	}
+
+	/* ensure safety */
+	synchronize_sched();
+
+	/* inserting jump code */
+	if (!list_empty(&registering_list)) {
+		list_for_each_entry(djpi, &registering_list, rlist) {
+			arch_install_djprobe_instance(djpi);
+		}
+		arch_serialize_cpus();
+		while (!list_empty(&registering_list)) {
+			djpi = list_entry(registering_list.next,
+					  struct djprobe_instance, rlist);
+			list_del_init(&djpi->rlist);
+			arch_post_install_djprobe_instance(djpi);
+		}
+	}
+	/* release code buffer */
+	while (!list_empty(&unregistering_list)) {
+		djpi = list_entry(unregistering_list.next,
+				  struct djprobe_instance, rlist);
+		list_del(&djpi->rlist);
+		__free_djprobe_instance(djpi);
+	}
+	ret = 0;
+
+#ifdef CONFIG_PREEMPT
+      out:
+	thaw_processes();
+#endif
+	return ret;
+}
+
+int __kprobes commit_djprobes(void)
+{
+	int ret;
+	BUG_ON(in_interrupt());
+	mutex_lock(&djprobe_mutex);
+	ret = __commit_djprobes();
+	mutex_unlock(&djprobe_mutex);
+	return ret;
+}
+
+int __kprobes register_djprobe(struct djprobe *djp)
+{
+	struct djprobe_instance *djpi;
+	int ret = 0, i;
+	void * addr = (void *)djprobe_param_address(&djp->param);
+	unsigned long len = (unsigned long)djprobe_param_length(&djp->param);
+
+	BUG_ON(in_interrupt());
+
+	if (len > ARCH_STUB_INSN_MAX || len < ARCH_STUB_INSN_MIN)
+		return -EINVAL;
+
+	if ((ret = in_kprobes_functions((long)addr)) != 0)
+		return ret;
+
+	mutex_lock(&djprobe_mutex);
+
+	/* check confliction with other djprobes */
+	djpi = __get_djprobe_instance(addr, len);
+	if (djpi) {
+		if (djpi->kp.addr == addr) {
+			djp->inst = djpi;	/* add to another instance */
+			list_add_rcu(&djp->plist, &djpi->plist);
+		} else {
+			ret = -EEXIST;	/* other djprobes were inserted */
+		}
+		goto out;
+	}
+	/* check confliction with kprobes */
+	for (i = 1; i < len; i++) {
+		if (get_kprobe((void *)((long)addr + i))) {
+			ret = -EEXIST;	/* a kprobes were inserted */
+			goto out;
+		}
+	}
+
+	djpi = __create_djprobe_instance(&djp->param);
+	if (djpi == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/* attach */
+	djp->inst = djpi;
+	INIT_LIST_HEAD(&djp->plist);
+	list_add_rcu(&djp->plist, &djpi->plist);
+
+      out:
+	mutex_unlock(&djprobe_mutex);
+	return ret;
+}
+
+void __kprobes unregister_djprobe(struct djprobe *djp)
+{
+	struct djprobe_instance *djpi;
+
+	BUG_ON(in_interrupt());
+
+	mutex_lock(&djprobe_mutex);
+	djpi = djp->inst;
+	list_del_rcu(&djp->plist);
+	djp->inst = NULL;
+	if (list_empty(&djpi->plist)) {
+		if (!list_empty(&djpi->rlist)) /* this is not committed yet */
+			list_del_init(&djpi->rlist);
+		arch_pre_remove_djprobe_instance(djpi);
+		list_add(&djpi->rlist, &unregistering_list);
+	}
+	mutex_unlock(&djprobe_mutex);
+}
+
+static int __init init_djprobe(void)
+{
+	djprobe_insn_pages.insn_size = ARCH_STUB_SIZE;
+	return 0;
+}
+
+__initcall(init_djprobe);
+
+EXPORT_SYMBOL_GPL(commit_djprobes);
+EXPORT_SYMBOL_GPL(register_djprobe);
+EXPORT_SYMBOL_GPL(unregister_djprobe);
Index: linux-2.6.19-rc1-mm1/kernel/kprobes.c
===================================================================
--- linux-2.6.19-rc1-mm1.orig/kernel/kprobes.c	2006-10-16 22:04:22.000000000 +0900
+++ linux-2.6.19-rc1-mm1/kernel/kprobes.c	2006-10-16 22:06:22.000000000 +0900
@@ -39,6 +39,7 @@
 #include <linux/moduleloader.h>
 #include <linux/kallsyms.h>
 #include <linux/sched.h>
+#include <linux/djprobe.h>
 #include <asm-generic/sections.h>
 #include <asm/cacheflush.h>
 #include <asm/errno.h>
@@ -532,7 +533,7 @@
 	return ret;
 }

-static int __kprobes in_kprobes_functions(unsigned long addr)
+int __kprobes in_kprobes_functions(unsigned long addr)
 {
 	if (addr >= (unsigned long)__kprobes_text_start
 		&& addr < (unsigned long)__kprobes_text_end)
@@ -582,6 +583,16 @@
 			probed_mod = NULL;
 	}

+#ifdef CONFIG_DJPROBE
+	if (!djprobe_kprobe(p)) {
+		mutex_lock(&djprobe_mutex);
+		if (__get_djprobe_instance(p->addr, 1) != NULL) {
+			mutex_unlock(&djprobe_mutex);
+			return -EEXIST;
+		}
+	}
+#endif /* CONFIG_DJPROBE */
+
 	p->nmissed = 0;
 	mutex_lock(&kprobe_mutex);
 	old_p = get_kprobe(p->addr);
@@ -608,6 +619,11 @@
 out:
 	mutex_unlock(&kprobe_mutex);

+#ifdef CONFIG_DJPROBE
+	if (!djprobe_kprobe(p))
+		mutex_unlock(&djprobe_mutex);
+#endif /* CONFIG_DJPROBE */
+
 	if (ret && probed_mod)
 		module_put(probed_mod);
 	return ret;







^ permalink raw reply	[flat|nested] 20+ messages in thread

* [RFC][djprobe] djprobe examples
  2006-10-16 13:14 [RFC][PATCH][kprobe] enabling booster on the preemptible kernel, take 2 Masami Hiramatsu
                   ` (4 preceding siblings ...)
  2006-10-19  9:04 ` [PATCH 5/5][djprobe] delayed invoking commit_djprobes() Masami Hiramatsu
@ 2006-10-19  9:04 ` Masami Hiramatsu
  2006-10-30  6:37 ` [RFC][PATCH][kprobe] enabling booster on the preemptible kernel, take 2 bibo,mao
  6 siblings, 0 replies; 20+ messages in thread
From: Masami Hiramatsu @ 2006-10-19  9:04 UTC (permalink / raw)
  To: Keshavamurthy, Anil S, Ananth N Mavinakayanahalli,
	Prasanna S Panchamukhi, Ingo Molnar, SystemTAP
  Cc: Masami Hiramatsu, Satoshi Oshima, Hideo Aoki, Yumiko Sugita

[-- Attachment #1: Type: text/plain, Size: 2182 bytes --]

Hi,

Here are an example module of djprobe and a simple helper script.

NOTE:
Currently, this helper script can ONLY measure the *LENGTH* of the
instruction-block which will be overwritten by a jump code. It can
*NOT* check whether this instruction-block can be executed out of
line and no branch jumps into the target area.
However, now we're developing more useful helper tool which can
check it.

Here is the example of usage;
1) Analyze the kernel code by using the helper script.

$ ./disym.sh sys_symlink
sys_symlink
0xc017bbe0

/lib/modules/2.6.19-rc1-mm1/build/vmlinux:     file format elf32-i386

Disassembly of section .text:

c017bbe0 <sys_symlink>:
c017bbe0:       83 ec 0c                sub    $0xc,%esp
c017bbe3:       8b 44 24 14             mov    0x14(%esp),%eax

Please be sure that the above-disassembled instructions are relocatable.
Parameter: addr=0xc017bbe0 size=7


2) If the instructions can be executed out of line (ex. load/store,
 compare, add/sub, etc.) and no branch jumps into it (you can dump whole
 of the function by using disym.sh with '-a' option),
 Install the example module with the above parameters.

$ sudo /sbin/insmod ./djprobe_ex.ko addr=0xc017bbe0 size=7


3) and test it.

$ ln -s hoge huga
$ dmesg | tail -n 4
probe install at c017bbe0, size 7
Stopping tasks: =======================================|
Restarting tasks... done
probe call:c017bbe0, caller:c01030c5

$ rm huga
$ ln -s hoge huga
$ dmesg | tail -n 5
probe install at c017bbe0, size 7
Stopping tasks: =======================================|
Restarting tasks... done
probe call:c017bbe0, caller:c01030c5
probe call:c017bbe0, caller:c01030c5

4) Finally, remove the module.

$ sudo /sbin/rmmod djprobe_ex.ko
$ dmesg | tail -n 8
probe install at c017bbe0, size 7
Stopping tasks: =======================================|
Restarting tasks... done
probe call:c017bbe0, caller:c01030c5
probe call:c017bbe0, caller:c01030c5
probe uninstall at c017bbe0
Stopping tasks: =======================================|
Restarting tasks... done


Thanks,
-- 
Masami HIRAMATSU
Linux Technology Center
Hitachi, Ltd., Systems Development Laboratory
E-mail: masami.hiramatsu.pt@hitachi.com






[-- Attachment #2: disym.sh --]
[-- Type: text/plain, Size: 1743 bytes --]

#!/bin/sh
# Copyright (C) HITACHI, Ltd. 2005
# Created by M.Hiramatsu <hiramatu@sdl.hitachi.co.jp>

[ $# -gt 3 -o $# -lt 1 ] && echo "usage: disym.sh [-a] <kernel_symbol> [kernel-version]" && exit 0

DISALL=0
if [ $1 = "-a" ] ;then
DISALL=1
shift 1
fi

SYM=$1
KVER=$2
[ -z "$KVER" ] && KVER=`uname -r`

function cntarg () {
return $#
}

SYSMAP=/lib/modules/$KVER/build/System.map
[ -f $SYSMAP ] || SYSMAP=/boot/System.map-`uname -r`
[ -f $SYSMAP ] || SYSMAP=/proc/kallsyms

VMLINUX=/lib/modules/$KVER/build/vmlinux
[ -f $VMLINUX ] || VMLINUX=/boot/vmlinux-`uname -r`
[ -f $VMLINUX ] || VMLINUX=/usr/lib/debug/lib/modules/$KVER/vmlinux

setaddrs () {
XADDR=$1
XEADDR=$2
}

echo $SYM
case $SYM in
	0x*)
	XADDR=$SYM
	SADDR=`printf "%d" $SYM`
	EADDR=`expr $SADDR + 5`
	;;
	*)
	if [ $DISALL -eq 1 ] ;then
	setaddrs `sort $SYSMAP | grep -A1 " $SYM"$  | cut -f 1 -d\ `
	if [ -z "$XADDR" ] ; then 
		echo "Error : $SYM was not found in "$SYSMAP
		exit 0;
	fi
	XADDR=0x$XADDR
	XEADDR=0x$XEADDR
	SADDR=`printf "%d" $XADDR` 
	EADDR=`printf "%d" $XEADDR` 
	else
	XADDR=0x`grep " $SYM"$ $SYSMAP | cut -f 1 -d\ `
	if [ "$XADDR" = "0x" ] ; then 
		echo "Error : $SYM was not found in "$SYSMAP
		exit 0;
	fi
	SADDR=`printf "%d" $XADDR` 
	EADDR=`expr $SADDR + 5`
	fi
	;;
esac
echo $XADDR

objdump -w --start-address=$SADDR --stop-address=$EADDR -j ".text" -d $VMLINUX
echo 
LLINE=`objdump -w --start-address=$SADDR --stop-address=$EADDR -j ".text" -d $VMLINUX | tail -n 1 | sed s/"	"/\:/g`
EXADDR=`echo $LLINE | cut -f 1 -d:`
cntarg `echo $LLINE | cut -f 3 -d:`
DIFF=$?
EADDR=`printf "%d" 0x$EXADDR`
SIZE=`expr $EADDR - $SADDR + $DIFF`
echo "Please be sure that the above-disassembled instructions are relocatable."
echo "Parameter: addr=$XADDR size=$SIZE"




[-- Attachment #3: djprobe_ex.c --]
[-- Type: text/plain, Size: 2174 bytes --]

/* 
 djprobe_ex.c -- Direct Jump Probe Example
 Copyright (c) 2005,2006 Hitachi,Ltd.,
 Created by Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
#include <linux/version.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/djprobe.h>
#include <linux/rcupdate.h>

static long addr=0;
module_param(addr, long, 0444);
static long size=0;
module_param(size, long, 0444);
static long show_arg=0;
module_param(show_arg, long, 0444);

#define CALLER(regs) (((unsigned long *)&regs->esp)[0])
#define ARG(n,regs) (((unsigned long *)&regs->esp)[n]) /*arg1: ARG(1,stadr)*/

static void probe_func(struct djprobe *djp, struct pt_regs *regs)
{
	int i;
	printk("probe call:%p, caller:%lx", 
	       (void*)djp->inst->kp.addr, CALLER(regs));
	for (i = 1; i <= show_arg; i++) {
		printk(" arg[%d]:%lx", i, ARG(i, regs));
	}
	printk("\n");
}

static struct djprobe djp = {0};

static int install_probe(void) 
{
	if (addr == 0 || size < 5 || size > 16 ) {
		return -1;
	}
	printk("probe install at %p, size %ld\n", (void*)addr, size);

	djp.handler = probe_func;
	djprobe_param_address(&djp.param) = (void *)addr;
	djprobe_param_length(&djp.param) = size;
	if (register_djprobe(&djp) != 0) return -1;
	
	return 0;
}

static void uninstall_probe(void)
{
	unregister_djprobe(&djp);
	printk("probe uninstall at %p\n", (void*)addr);
}

module_init(install_probe);
module_exit(uninstall_probe);
MODULE_AUTHOR("M.Hiramatsu <masami.hiramatsu.pt@hitachi.com>");
MODULE_LICENSE("GPL");





^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 5/5][djprobe] delayed invoking commit_djprobes()
  2006-10-16 13:14 [RFC][PATCH][kprobe] enabling booster on the preemptible kernel, take 2 Masami Hiramatsu
                   ` (3 preceding siblings ...)
  2006-10-19  9:03 ` [PATCH 2/5][djprobe] djprobe core patch Masami Hiramatsu
@ 2006-10-19  9:04 ` Masami Hiramatsu
  2006-10-19  9:04 ` [RFC][djprobe] djprobe examples Masami Hiramatsu
  2006-10-30  6:37 ` [RFC][PATCH][kprobe] enabling booster on the preemptible kernel, take 2 bibo,mao
  6 siblings, 0 replies; 20+ messages in thread
From: Masami Hiramatsu @ 2006-10-19  9:04 UTC (permalink / raw)
  To: Keshavamurthy, Anil S, Ananth N Mavinakayanahalli,
	Prasanna S Panchamukhi, Ingo Molnar, SystemTAP
  Cc: Masami Hiramatsu, Satoshi Oshima, Hideo Aoki, Yumiko Sugita

Hi,

This patch invokes commit_djprobes() function from a worker.
This worker is scheduled automatically by register/
unregister_djprobe() functions and is executed in 0.1 second.
If the worker fails freezing processes, it will try again
after 0.1 second.

Thanks,
-- 
Masami HIRAMATSU
Linux Technology Center
Hitachi, Ltd., Systems Development Laboratory
E-mail: masami.hiramatsu.pt@hitachi.com

---
 kernel/djprobe.c |   23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

Index: linux-2.6.19-rc1-mm1/kernel/djprobe.c
===================================================================
--- linux-2.6.19-rc1-mm1.orig/kernel/djprobe.c	2006-10-16 22:06:22.000000000 +0900
+++ linux-2.6.19-rc1-mm1/kernel/djprobe.c	2006-10-16 22:06:44.000000000 +0900
@@ -64,6 +64,15 @@
 static LIST_HEAD(registering_list);
 static LIST_HEAD(unregistering_list);

+static void commit_work_fn(void * data);
+static DECLARE_WORK(commit_work, commit_work_fn, 0);
+#define DJPROBE_COMMIT_DELAY (HZ/10)
+
+static __always_inline void kick_delayed_commit(void)
+{
+	schedule_delayed_work(&commit_work, DJPROBE_COMMIT_DELAY);
+}
+
 static int __djprobe_pre_handler(struct kprobe *kp, struct pt_regs *regs)
 {
 	struct djprobe_instance *djpi =
@@ -211,12 +220,18 @@

 int __kprobes commit_djprobes(void)
 {
-	int ret;
+	int ret = 0;
 	BUG_ON(in_interrupt());
+	commit_work_fn(NULL);
+	return ret;
+}
+
+static void commit_work_fn(void * data)
+{
 	mutex_lock(&djprobe_mutex);
-	ret = __commit_djprobes();
+	if (__commit_djprobes() < 0)
+		kick_delayed_commit();/* try again */
 	mutex_unlock(&djprobe_mutex);
-	return ret;
 }

 int __kprobes register_djprobe(struct djprobe *djp)
@@ -265,6 +280,7 @@
 	djp->inst = djpi;
 	INIT_LIST_HEAD(&djp->plist);
 	list_add_rcu(&djp->plist, &djpi->plist);
+	kick_delayed_commit();

       out:
 	mutex_unlock(&djprobe_mutex);
@@ -286,6 +302,7 @@
 			list_del_init(&djpi->rlist);
 		arch_pre_remove_djprobe_instance(djpi);
 		list_add(&djpi->rlist, &unregistering_list);
+		kick_delayed_commit();
 	}
 	mutex_unlock(&djprobe_mutex);
 }






^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/5][djprobe] djprobe core patch
  2006-10-19  9:03 ` [PATCH 2/5][djprobe] djprobe core patch Masami Hiramatsu
@ 2006-10-27 23:34   ` Keshavamurthy, Anil S
  2006-10-30 14:07     ` Masami Hiramatsu
  0 siblings, 1 reply; 20+ messages in thread
From: Keshavamurthy, Anil S @ 2006-10-27 23:34 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: Keshavamurthy, Anil S, Ananth N Mavinakayanahalli,
	Prasanna S Panchamukhi, Ingo Molnar, SystemTAP, Satoshi Oshima,
	Hideo Aoki, Yumiko Sugita

Hi Masami,
	Sorry for the delayed response. My comments are inline.

On Thu, Oct 19, 2006 at 06:02:32PM +0900, Masami Hiramatsu wrote:
> I also updated API reference.
> 
> API Reference
> -------------
> The Djprobe API includes "register_djprobe" function,
> "unregister_djprobe" function and "commit_djprobes" function.
> Here are specifications for these functions and the
> associated probe handlers.

I am curious as to why you are introducing a new interface
for registering djprobes. Can't this be done by modifying the
existing register_kprobe()/unregister_kprobe() and under the
covers you can choose to implement djprobes. 

The benifit of hiding djprobes under kprobes would be 

1)Users of kernel probe need not have to maintain two
version of their instrumentation code (one with kprobes 
and one with djprobes).

2)If for some reason djprobes fails (might be  because there exists
a kprobes in that djprobe address + size range), then the user of the
probe interface has to try kprobes in order to succeed the registration.
However if you choose to implement djprobes under the covers of kprobes, then you can
transparently support the insertion of the probes, through aggregate kprobes instead
of failing the djprobes.

Also with this approach, today's systemtap can easily take advantage of djprobes,
since djprobes is implemented under the kprobes implementation. Since you are targeting
your code for mainline, don't worry about kabi of kprobes interface.

Oaky, some more comments.
1) I did not see where you check for whether the 
target instruction of the djprobe is relocatable.
Are you currently assuming that the target instruction is simply relocatable?

2) You are not fully populating the pt_regs which the probe handler receives,
( you have bogus values in eflags, eip, esp, etc) so I am not sure whether 
this is a must for instrumentation code who might need these values.

[...]	
> +int __kprobes djprobe_kprobe(struct kprobe *kp)
Did not see where you are using this function.
> +{
> +	return kp->pre_handler == __djprobe_pre_handler;
> +}
[...]

> +
> +static __always_inline
> +    struct djprobe_instance *__create_djprobe_instance(struct arch_djprobe_param
> +						       *param)
> +{
> +	struct djprobe_instance *djpi;
> +	void * addr = (void *)djprobe_param_address(param);
> +	/* allocate a new instance */
> +	djpi = kcalloc(1, sizeof(struct djprobe_instance), GFP_ATOMIC);
I think you can use GFP_KERNEL.

> +	if (djpi == NULL) {
> +		goto out;
> +	}
> +
> +	/* allocate stub */
> +	djpi->stub.insn = __get_insn_slot(&djprobe_insn_pages);
> +	if (djpi->stub.insn == NULL) {
> +		__free_djprobe_instance(djpi);
kfree(djpi) should do.

> +		djpi = NULL;
> +		goto out;
> +	}
> +
> +	arch_prepare_djprobe_instance(djpi, param);

[...]
> +
> +	if ((ret = in_kprobes_functions((long)addr)) != 0)
> +		return ret;
> +
> +	mutex_lock(&djprobe_mutex);
> +
> +	/* check confliction with other djprobes */
> +	djpi = __get_djprobe_instance(addr, len);
> +	if (djpi) {
> +		if (djpi->kp.addr == addr) {
> +			djp->inst = djpi;	/* add to another instance */
> +			list_add_rcu(&djp->plist, &djpi->plist);
> +		} else {
> +			ret = -EEXIST;	/* other djprobes were inserted */
> +		}
> +		goto out;
> +	}
> +	/* check confliction with kprobes */
> +	for (i = 1; i < len; i++) {
> +		if (get_kprobe((void *)((long)addr + i))) {
> +			ret = -EEXIST;	/* a kprobes were inserted */
> +			goto out;
> +		}
> +	}
You need similar check in the kprobes registration, as we don;t want
to insert kprobes on the djprobe where you have jump target address.


regards,
Anil Keshavamurthy

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC][PATCH][kprobe] enabling booster on the preemptible kernel,   take 2
  2006-10-16 13:14 [RFC][PATCH][kprobe] enabling booster on the preemptible kernel, take 2 Masami Hiramatsu
                   ` (5 preceding siblings ...)
  2006-10-19  9:04 ` [RFC][djprobe] djprobe examples Masami Hiramatsu
@ 2006-10-30  6:37 ` bibo,mao
  2006-10-30 14:07   ` Masami Hiramatsu
  6 siblings, 1 reply; 20+ messages in thread
From: bibo,mao @ 2006-10-30  6:37 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: Keshavamurthy, Anil S, Ananth N Mavinakayanahalli,
	Prasanna S Panchamukhi, Ingo Molnar, SystemTAP, Satoshi Oshima,
	Hideo Aoki, Yumiko Sugita

This patch will boost kprobe on preemptible kernel, I think
it is deserved to waster some memory for better performance
by deferring memory free after freeze_processes.

thanks
bibo,mao

Masami Hiramatsu wrote:
> Hi,
> 
> Here is the patch which enables kprobe-booster on
> the preemptive kernel.
> 
> When we are unregistering a kprobe-booster, we can't release
> its buffer immediately on the preemptive kernel, because
> some processes might be preempted on the buffer.
> The freeze_processes() and thaw_processes() functions can
> clean those processes up from the buffer. However, the
> processing of those functions takes a long time.
> So, this patch introduces the garbage collection mechanism
> of insn_slot. It also introduces the "dirty" flag to
> free_insn_slot because of efficiency.
> 
> The "clean" instruction slots (dirty flag is cleared) are
> released immediately. But the "dirty" slots which are used
> by boosted kprobes, are marked as garbages.
> collect_garbage_slots() will be invoked to release "dirty"
> slots if 1) there are more than INSNS_PER_PAGE garbage slots
> or 2) there are no unused slots.
> 
> Thanks,
> --
> Masami HIRAMATSU
> 2nd Research Dept.
> Hitachi, Ltd., Systems Development Laboratory
> E-mail: masami.hiramatsu.pt@hitachi.com
> 
> 
> ---
>  arch/i386/kernel/kprobes.c    |    4 -
>  arch/ia64/kernel/kprobes.c    |    2
>  arch/powerpc/kernel/kprobes.c |    2
>  arch/s390/kernel/kprobes.c    |    2
>  arch/x86_64/kernel/kprobes.c  |    2
>  include/linux/kprobes.h       |    2
>  kernel/kprobes.c              |  101 +++++++++++++++++++++++++++++++++---------
>  7 files changed, 87 insertions(+), 28 deletions(-)
> 
> Index: linux-2.6.19-rc1-mm1/kernel/kprobes.c
> ===================================================================
> --- linux-2.6.19-rc1-mm1.orig/kernel/kprobes.c  2006-10-16 10:40:02.000000000 +0900
> +++ linux-2.6.19-rc1-mm1/kernel/kprobes.c       2006-10-16 21:50:44.000000000 +0900
> @@ -38,6 +38,7 @@
>  #include <linux/module.h>
>  #include <linux/moduleloader.h>
>  #include <linux/kallsyms.h>
> +#include <linux/sched.h>
>  #include <asm-generic/sections.h>
>  #include <asm/cacheflush.h>
>  #include <asm/errno.h>
> @@ -83,9 +84,12 @@
>         kprobe_opcode_t *insns;         /* Page of instruction slots */
>         char slot_used[INSNS_PER_PAGE];
>         int nused;
> +       int ngarbage;
>  };
> 
>  static struct hlist_head kprobe_insn_pages;
> +static int kprobe_garbage_slots;
> +static int collect_garbage_slots(void);
> 
>  /**
>   * get_insn_slot() - Find a slot on an executable page for an instruction.
> @@ -96,6 +100,7 @@
>         struct kprobe_insn_page *kip;
>         struct hlist_node *pos;
> 
> +      retry:
>         hlist_for_each(pos, &kprobe_insn_pages) {
>                 kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
>                 if (kip->nused < INSNS_PER_PAGE) {
> @@ -112,7 +117,11 @@
>                 }
>         }
> 
> -       /* All out of space.  Need to allocate a new page. Use slot 0.*/
> +       /* If there are any garbage slots, collect it and try again. */
> +       if (kprobe_garbage_slots && collect_garbage_slots() == 0) {
I am not familiar with freeze_processes/thaw_process, but I only think that
it will bring performance downgrade greatly for the moment.I think kmalloc
method is better than collect_garbage_slots.
> +               goto retry;
> +       }
> +       /* All out of space.  Need to allocate a new page. Use slot 0. */
>         kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL);
>         if (!kip) {
>                 return NULL;
> @@ -133,10 +142,70 @@
>         memset(kip->slot_used, 0, INSNS_PER_PAGE);
>         kip->slot_used[0] = 1;
>         kip->nused = 1;
> +       kip->ngarbage = 0;
>         return kip->insns;
>  }
> 
> -void __kprobes free_insn_slot(kprobe_opcode_t *slot)
> +/* Return 1 if all garbages are collected, otherwise 0. */
> +static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
> +{
> +       kip->slot_used[idx] = 0;
> +       kip->nused--;
> +       if (kip->nused == 0) {
> +               /*
> +                * Page is no longer in use.  Free it unless
> +                * it's the last one.  We keep the last one
> +                * so as not to have to set it up again the
> +                * next time somebody inserts a probe.
> +                */
> +               hlist_del(&kip->hlist);
> +               if (hlist_empty(&kprobe_insn_pages)) {
> +                       INIT_HLIST_NODE(&kip->hlist);
> +                       hlist_add_head(&kip->hlist,
> +                                      &kprobe_insn_pages);
> +                       return 1;
> +               } else {
> +                       module_free(NULL, kip->insns);
> +                       kfree(kip);
> +               }
> +       }
> +       return 0;
> +}
> +
> +static int __kprobes collect_garbage_slots(void)
> +{
> +       struct kprobe_insn_page *kip;
> +       struct hlist_node *pos, *next;
> +       int ret = -1;
> +
> +#if defined(CONFIG_PREEMPT) && defined(CONFIG_PM)
> +       /* Ensure no-one is preepmted on the garbages */
> +       if (freeze_processes() != 0)
I do not know whether there exists non-freezeable and preemptive 
kernel thread, if there exist then this thread will not be frozen. 
> +               goto thaw_all;
> +#endif
> +       hlist_for_each_safe(pos, next, &kprobe_insn_pages) {
> +               int i;
> +               kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
> +               if (kip->ngarbage == 0)
> +                       continue;
> +               kip->ngarbage = 0;      /* we will collect all garbages */
> +               for (i = 0; i < INSNS_PER_PAGE; i++) {
> +                       if (kip->slot_used[i] == -1 &&
> +                           collect_one_slot(kip, i))
if collect_one_slot executes kfree(kip) and return 0, then kernel will continue
execute the for () loop sentence and access freed kip point by kip->slot_used.
> +                               goto collected;
> +               }
> +       }
> +      collected:
> +       kprobe_garbage_slots = 0;
> +       ret = 0;
> +#if defined(CONFIG_PREEMPT) && defined(CONFIG_PM)
> +      thaw_all:
> +       thaw_processes();
> +#endif
> +       return ret;
> +}
> +
> +void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
>  {
>         struct kprobe_insn_page *kip;
>         struct hlist_node *pos;
> @@ -146,28 +215,18 @@
>                 if (kip->insns <= slot &&
>                     slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) {
>                         int i = (slot - kip->insns) / MAX_INSN_SIZE;
> -                       kip->slot_used[i] = 0;
> -                       kip->nused--;
> -                       if (kip->nused == 0) {
> -                               /*
> -                                * Page is no longer in use.  Free it unless
> -                                * it's the last one.  We keep the last one
> -                                * so as not to have to set it up again the
> -                                * next time somebody inserts a probe.
> -                                */
> -                               hlist_del(&kip->hlist);
> -                               if (hlist_empty(&kprobe_insn_pages)) {
> -                                       INIT_HLIST_NODE(&kip->hlist);
> -                                       hlist_add_head(&kip->hlist,
> -                                               &kprobe_insn_pages);
> -                               } else {
> -                                       module_free(NULL, kip->insns);
> -                                       kfree(kip);
> -                               }
> +                       if (dirty) {
> +                               kip->slot_used[i] = -1;
> +                               kip->ngarbage++;
it seems that break sentence is missing.
> +                       } else {
> +                               collect_one_slot(kip, i);
> +                               break;
>                         }
> -                       return;
>                 }
>         }
> +       if (dirty && (++kprobe_garbage_slots > INSNS_PER_PAGE)) {
> +               collect_garbage_slots();
> +       }
>  }
>  #endif
> 
> Index: linux-2.6.19-rc1-mm1/arch/i386/kernel/kprobes.c
> ===================================================================
> --- linux-2.6.19-rc1-mm1.orig/arch/i386/kernel/kprobes.c        2006-10-16 
> 10:40:00.000000000 +0900
> +++ linux-2.6.19-rc1-mm1/arch/i386/kernel/kprobes.c     2006-10-16 
> 21:43:03.000000000 +0900
> @@ -184,7 +184,7 @@
>  void __kprobes arch_remove_kprobe(struct kprobe *p)
>  {
>         mutex_lock(&kprobe_mutex);
> -       free_insn_slot(p->ainsn.insn);
> +       free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1));
>         mutex_unlock(&kprobe_mutex);
>  }
> 
> @@ -333,7 +333,7 @@
>                 return 1;
> 
>  ss_probe:
> -#ifndef CONFIG_PREEMPT
> +#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
>         if (p->ainsn.boostable == 1 && !p->post_handler){
>                 /* Boost up -- we can execute copied instructions directly */
>                 reset_current_kprobe();
> Index: linux-2.6.19-rc1-mm1/arch/ia64/kernel/kprobes.c
> ===================================================================
> --- linux-2.6.19-rc1-mm1.orig/arch/ia64/kernel/kprobes.c        2006-10-16 
> 10:40:00.000000000 +0900
> +++ linux-2.6.19-rc1-mm1/arch/ia64/kernel/kprobes.c     2006-10-16 
> 10:54:09.000000000 +0900
> @@ -481,7 +481,7 @@
>  void __kprobes arch_remove_kprobe(struct kprobe *p)
>  {
>         mutex_lock(&kprobe_mutex);
> -       free_insn_slot(p->ainsn.insn);
> +       free_insn_slot(p->ainsn.insn, 0);
>         mutex_unlock(&kprobe_mutex);
>  }
>  /*
> Index: linux-2.6.19-rc1-mm1/arch/powerpc/kernel/kprobes.c
> ===================================================================
> --- linux-2.6.19-rc1-mm1.orig/arch/powerpc/kernel/kprobes.c     2006-10-16 
> 10:40:00.000000000 +0900
> +++ linux-2.6.19-rc1-mm1/arch/powerpc/kernel/kprobes.c  2006-10-16 
> 10:54:09.000000000 +0900
> @@ -85,7 +85,7 @@
>  void __kprobes arch_remove_kprobe(struct kprobe *p)
>  {
>         mutex_lock(&kprobe_mutex);
> -       free_insn_slot(p->ainsn.insn);
> +       free_insn_slot(p->ainsn.insn, 0);
>         mutex_unlock(&kprobe_mutex);
>  }
> 
> Index: linux-2.6.19-rc1-mm1/arch/s390/kernel/kprobes.c
> ===================================================================
> --- linux-2.6.19-rc1-mm1.orig/arch/s390/kernel/kprobes.c        2006-10-16 
> 10:40:00.000000000 +0900
> +++ linux-2.6.19-rc1-mm1/arch/s390/kernel/kprobes.c     2006-10-16 
> 10:54:09.000000000 +0900
> @@ -200,7 +200,7 @@
>  void __kprobes arch_remove_kprobe(struct kprobe *p)
>  {
>         mutex_lock(&kprobe_mutex);
> -       free_insn_slot(p->ainsn.insn);
> +       free_insn_slot(p->ainsn.insn, 0);
>         mutex_unlock(&kprobe_mutex);
>  }
> 
> Index: linux-2.6.19-rc1-mm1/arch/x86_64/kernel/kprobes.c
> ===================================================================
> --- linux-2.6.19-rc1-mm1.orig/arch/x86_64/kernel/kprobes.c      2006-10-16 
> 10:40:00.000000000 +0900
> +++ linux-2.6.19-rc1-mm1/arch/x86_64/kernel/kprobes.c   2006-10-16 
> 10:54:09.000000000 +0900
> @@ -224,7 +224,7 @@
>  void __kprobes arch_remove_kprobe(struct kprobe *p)
>  {
>         mutex_lock(&kprobe_mutex);
> -       free_insn_slot(p->ainsn.insn);
> +       free_insn_slot(p->ainsn.insn, 0);
>         mutex_unlock(&kprobe_mutex);
>  }
> 
> Index: linux-2.6.19-rc1-mm1/include/linux/kprobes.h
> ===================================================================
> --- linux-2.6.19-rc1-mm1.orig/include/linux/kprobes.h   2006-10-16 
> 10:40:02.000000000 +0900
> +++ linux-2.6.19-rc1-mm1/include/linux/kprobes.h        2006-10-16 
> 21:43:07.000000000 +0900
> @@ -165,7 +165,7 @@
>  extern int arch_init_kprobes(void);
>  extern void show_registers(struct pt_regs *regs);
>  extern kprobe_opcode_t *get_insn_slot(void);
> -extern void free_insn_slot(kprobe_opcode_t *slot);
> +extern void free_insn_slot(kprobe_opcode_t *slot, int dirty);
>  extern void kprobes_inc_nmissed_count(struct kprobe *p);
> 
>  /* Get the kprobe at this addr (if any) - called with preemption disabled */
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/5][djprobe] djprobe core patch
  2006-10-27 23:34   ` Keshavamurthy, Anil S
@ 2006-10-30 14:07     ` Masami Hiramatsu
  2006-10-30 14:11       ` Ingo Molnar
  0 siblings, 1 reply; 20+ messages in thread
From: Masami Hiramatsu @ 2006-10-30 14:07 UTC (permalink / raw)
  To: Keshavamurthy, Anil S
  Cc: Ananth N Mavinakayanahalli, Prasanna S Panchamukhi, Ingo Molnar,
	SystemTAP, Satoshi Oshima, Hideo Aoki, Yumiko Sugita

Hi Anil,

Thank you for your advice.

Keshavamurthy@bambi.jf.intel.com wrote:
> Hi Masami,
> 	Sorry for the delayed response. My comments are inline.
> 
> On Thu, Oct 19, 2006 at 06:02:32PM +0900, Masami Hiramatsu wrote:
>> I also updated API reference.
>>
>> API Reference
>> -------------
>> The Djprobe API includes "register_djprobe" function,
>> "unregister_djprobe" function and "commit_djprobes" function.
>> Here are specifications for these functions and the
>> associated probe handlers.
> 
> I am curious as to why you are introducing a new interface
> for registering djprobes. Can't this be done by modifying the
> existing register_kprobe()/unregister_kprobe() and under the
> covers you can choose to implement djprobes. 

There were two reasons.

1) I thought that should be waste of memory. As you can see,
the djprobe structure doesn't have its kprobe nor instruction
buffer, because it is just an interface structure. Instead of
that, it has an pointer to the djprobe_instance structure which
is true instance of the djprobe. This "instance" has the kprobe
structure and instruction buffer. So, I could keep the djprobe
structure small.
2) The other reason is from difference of the usage between djprobe
and kprobe. Kprobe users have to ensure that the target address
points to the 1st byte of instruction. On the other hand, djprobe
users additionally have to count the length of the target
instructions, and ensure those instructions are relocatable.

Therefore, I provided those special interfaces.

If you never mind that the jump-based-kprobe needs two
kprobe structures per one probe point (one is for the interface,
another is for the instance -- for deferred releasing), I can
integrate these interfaces.

> The benifit of hiding djprobes under kprobes would be 
> 
> 1)Users of kernel probe need not have to maintain two
> version of their instrumentation code (one with kprobes 
> and one with djprobes).

Indeed.
(I think users have to prepare additional code which counts
 the length and checks whether the instructions are
 relocatable or not.)

> 2)If for some reason djprobes fails (might be  because there exists
> a kprobes in that djprobe address + size range), then the user of the
> probe interface has to try kprobes in order to succeed the registration.
> However if you choose to implement djprobes under the covers of kprobes, then you can
> transparently support the insertion of the probes, through aggregate kprobes instead
> of failing the djprobes.

I agree. This advantage is important.

> Also with this approach, today's systemtap can easily take advantage of djprobes,
> since djprobes is implemented under the kprobes implementation. Since you are targeting
> your code for mainline, don't worry about kabi of kprobes interface.

Thanks, I'll try it.

> Oaky, some more comments.
> 1) I did not see where you check for whether the 
> target instruction of the djprobe is relocatable.
> Are you currently assuming that the target instruction 
> is simply relocatable?

In this version, that check must be done in the user-space,
because parsing and analyzing CISC instructions are too hard
to be done in the kernel code.
Oshima-san is working for developing this check routine.

> 2) You are not fully populating the pt_regs which the probe handler receives,
> ( you have bogus values in eflags, eip, esp, etc) so I am not sure whether 
> this is a must for instrumentation code who might need these values.

OK, I'll fix that.

> [...]	
>> +int __kprobes djprobe_kprobe(struct kprobe *kp)
> Did not see where you are using this function.

This function is called from kernel/kprobe.c:register_kprobe()

>> +
>> +static __always_inline
>> +    struct djprobe_instance *__create_djprobe_instance(struct arch_djprobe_param
>> +						       *param)
>> +{
>> +	struct djprobe_instance *djpi;
>> +	void * addr = (void *)djprobe_param_address(param);
>> +	/* allocate a new instance */
>> +	djpi = kcalloc(1, sizeof(struct djprobe_instance), GFP_ATOMIC);
> I think you can use GFP_KERNEL.

Indeed.

>> +	if (djpi == NULL) {
>> +		goto out;
>> +	}
>> +
>> +	/* allocate stub */
>> +	djpi->stub.insn = __get_insn_slot(&djprobe_insn_pages);
>> +	if (djpi->stub.insn == NULL) {
>> +		__free_djprobe_instance(djpi);
> kfree(djpi) should do.

Thanks. I'll fix it.

> [...]
>> +
>> +	if ((ret = in_kprobes_functions((long)addr)) != 0)
>> +		return ret;
>> +
>> +	mutex_lock(&djprobe_mutex);
>> +
>> +	/* check confliction with other djprobes */
>> +	djpi = __get_djprobe_instance(addr, len);
>> +	if (djpi) {
>> +		if (djpi->kp.addr == addr) {
>> +			djp->inst = djpi;	/* add to another instance */
>> +			list_add_rcu(&djp->plist, &djpi->plist);
>> +		} else {
>> +			ret = -EEXIST;	/* other djprobes were inserted */
>> +		}
>> +		goto out;
>> +	}
>> +	/* check confliction with kprobes */
>> +	for (i = 1; i < len; i++) {
>> +		if (get_kprobe((void *)((long)addr + i))) {
>> +			ret = -EEXIST;	/* a kprobes were inserted */
>> +			goto out;
>> +		}
>> +	}
> You need similar check in the kprobes registration, as we don;t want
> to insert kprobes on the djprobe where you have jump target address.

I checked it in the kernel/kprobes.c as below:

> --- linux-2.6.19-rc1-mm1.orig/kernel/kprobes.c	2006-10-16 22:04:22.000000000 +0900
> +++ linux-2.6.19-rc1-mm1/kernel/kprobes.c	2006-10-16 22:06:22.000000000 +0900
[...]
> @@ -582,6 +583,16 @@
>  			probed_mod = NULL;
>  	}
> 
> +#ifdef CONFIG_DJPROBE
> +	if (!djprobe_kprobe(p)) {
> +		mutex_lock(&djprobe_mutex);
> +		if (__get_djprobe_instance(p->addr, 1) != NULL) {

This __get_djprobe_instance() returns the djprobe_instance which
partially or fully covers the specified region.

> +			mutex_unlock(&djprobe_mutex);
> +			return -EEXIST;
> +		}
> +	}
> +#endif /* CONFIG_DJPROBE */
> +


Best regards,

-- 
Masami HIRAMATSU
Linux Technology Center
Hitachi, Ltd., Systems Development Laboratory
E-mail: masami.hiramatsu.pt@hitachi.com


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC][PATCH][kprobe] enabling booster on the preemptible kernel,   take 2
  2006-10-30  6:37 ` [RFC][PATCH][kprobe] enabling booster on the preemptible kernel, take 2 bibo,mao
@ 2006-10-30 14:07   ` Masami Hiramatsu
  2006-10-31  9:14     ` bibo,mao
  0 siblings, 1 reply; 20+ messages in thread
From: Masami Hiramatsu @ 2006-10-30 14:07 UTC (permalink / raw)
  To: bibo,mao
  Cc: Keshavamurthy, Anil S, Ananth N Mavinakayanahalli,
	Prasanna S Panchamukhi, Ingo Molnar, SystemTAP, Satoshi Oshima,
	Hideo Aoki, Yumiko Sugita

Hi bibo,

Thank you for your review!

bibo,mao wrote:
> This patch will boost kprobe on preemptible kernel, I think
> it is deserved to waster some memory for better performance
> by deferring memory free after freeze_processes.

I think it doesn't waste memory so much, because it tries
to reuse garbage memories before the kernel allocates an
additional page.

[...]
>> +static int __kprobes collect_garbage_slots(void)
>> +{
>> +       struct kprobe_insn_page *kip;
>> +       struct hlist_node *pos, *next;
>> +       int ret = -1;
>> +
>> +#if defined(CONFIG_PREEMPT) && defined(CONFIG_PM)
>> +       /* Ensure no-one is preepmted on the garbages */
>> +       if (freeze_processes() != 0)
> I do not know whether there exists non-freezeable and preemptive kernel
> thread, if there exist then this thread will not be frozen.

In that case, freeze_processes() returns the positive value which
means how many processes are not frozen. If freeze_processes()
returns non-zero, this function aborts the garbage collection.

>> +               goto thaw_all;
>> +#endif
>> +       hlist_for_each_safe(pos, next, &kprobe_insn_pages) {
>> +               int i;
>> +               kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
>> +               if (kip->ngarbage == 0)
>> +                       continue;
>> +               kip->ngarbage = 0;      /* we will collect all
>> garbages */
>> +               for (i = 0; i < INSNS_PER_PAGE; i++) {
>> +                       if (kip->slot_used[i] == -1 &&
>> +                           collect_one_slot(kip, i))
> if collect_one_slot executes kfree(kip) and return 0, then kernel will
> continue
> execute the for () loop sentence and access freed kip point by
> kip->slot_used.

Exactly, it's a bug.
Thank you. I'll fix that.

>> @@ -146,28 +215,18 @@
>>                 if (kip->insns <= slot &&
>>                     slot < kip->insns + (INSNS_PER_PAGE *
>> MAX_INSN_SIZE)) {
>>                         int i = (slot - kip->insns) / MAX_INSN_SIZE;
>> -                       kip->slot_used[i] = 0;
>> -                       kip->nused--;
>> -                       if (kip->nused == 0) {
>> -                               /*
>> -                                * Page is no longer in use.  Free it
>> unless
>> -                                * it's the last one.  We keep the
>> last one
>> -                                * so as not to have to set it up
>> again the
>> -                                * next time somebody inserts a probe.
>> -                                */
>> -                               hlist_del(&kip->hlist);
>> -                               if (hlist_empty(&kprobe_insn_pages)) {
>> -                                       INIT_HLIST_NODE(&kip->hlist);
>> -                                       hlist_add_head(&kip->hlist,
>> -                                               &kprobe_insn_pages);
>> -                               } else {
>> -                                       module_free(NULL, kip->insns);
>> -                                       kfree(kip);
>> -                               }
>> +                       if (dirty) {
>> +                               kip->slot_used[i] = -1;
>> +                               kip->ngarbage++;
> it seems that break sentence is missing.

Oh, it's my mistake. Thanks.

>> +                       } else {
>> +                               collect_one_slot(kip, i);
>> +                               break;
>>                         }
>> -                       return;

So, I will add a break here.

Best regards,

-- 
Masami HIRAMATSU
Linux Technology Center
Hitachi, Ltd., Systems Development Laboratory
E-mail: masami.hiramatsu.pt@hitachi.com


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/5][djprobe] djprobe core patch
  2006-10-30 14:07     ` Masami Hiramatsu
@ 2006-10-30 14:11       ` Ingo Molnar
  0 siblings, 0 replies; 20+ messages in thread
From: Ingo Molnar @ 2006-10-30 14:11 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: Keshavamurthy, Anil S, Ananth N Mavinakayanahalli,
	Prasanna S Panchamukhi, SystemTAP, Satoshi Oshima, Hideo Aoki,
	Yumiko Sugita

On Mon, 2006-10-30 at 23:06 +0900, Masami Hiramatsu wrote:
> 1) I thought that should be waste of memory. As you can see,
> the djprobe structure doesn't have its kprobe nor instruction
> buffer, because it is just an interface structure. Instead of
> that, it has an pointer to the djprobe_instance structure which
> is true instance of the djprobe. This "instance" has the kprobe
> structure and instruction buffer. So, I could keep the djprobe
> structure small.
> 2) The other reason is from difference of the usage between djprobe
> and kprobe. Kprobe users have to ensure that the target address
> points to the 1st byte of instruction. On the other hand, djprobe
> users additionally have to count the length of the target
> instructions, and ensure those instructions are relocatable.
> 
> Therefore, I provided those special interfaces.
> 
> If you never mind that the jump-based-kprobe needs two
> kprobe structures per one probe point (one is for the interface,
> another is for the instance -- for deferred releasing), I can
> integrate these interfaces. 

I dont think memory overhead is a big issue - API compatibility is
important and it will also speed up the adoption of djprobes.

could the size restriction of djprobes be relaxed by it doing a
deassembly of the instruction? (only to figure out the size of the
instruction)

basically, i think the best model would be to make djprobes a
transparent speedup of kprobes.

	Ingo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC][PATCH][kprobe] enabling booster on the preemptible kernel,    take 2
  2006-10-30 14:07   ` Masami Hiramatsu
@ 2006-10-31  9:14     ` bibo,mao
  2006-10-31 13:47       ` Masami Hiramatsu
  0 siblings, 1 reply; 20+ messages in thread
From: bibo,mao @ 2006-10-31  9:14 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: bibo,mao, Keshavamurthy, Anil S, Ananth N Mavinakayanahalli,
	Prasanna S Panchamukhi, Ingo Molnar, SystemTAP, Satoshi Oshima,
	Hideo Aoki, Yumiko Sugita

Masami Hiramatsu wrote:
> Hi bibo,
> 
> Thank you for your review!
> 
> bibo,mao wrote:
>> This patch will boost kprobe on preemptible kernel, I think
>> it is deserved to waster some memory for better performance
>> by deferring memory free after freeze_processes.
> 
> I think it doesn't waste memory so much, because it tries
> to reuse garbage memories before the kernel allocates an
> additional page.
> 
> [...]
>>> +static int __kprobes collect_garbage_slots(void)
>>> +{
>>> +       struct kprobe_insn_page *kip;
>>> +       struct hlist_node *pos, *next;
>>> +       int ret = -1;
>>> +
>>> +#if defined(CONFIG_PREEMPT) && defined(CONFIG_PM)
>>> +       /* Ensure no-one is preepmted on the garbages */
>>> +       if (freeze_processes() != 0)
>> I do not know whether there exists non-freezeable and preemptive kernel
>> thread, if there exist then this thread will not be frozen.
> 
> In that case, freeze_processes() returns the positive value which
> means how many processes are not frozen. If freeze_processes()
> returns non-zero, this function aborts the garbage collection.
> 
But from the code, return value of freeze_processes() represents how many
processes can be frozen but are not frozen. I grep the kernel code, there
still exists many processes which flag is PF_NOFREEZE.
I think if current probed thread is PF_NOFREEZE, then kprobe_handler need 
skip the bootser.

thanks
bibo,mao

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC][PATCH][kprobe] enabling booster on the preemptible kernel,    take 2
  2006-10-31  9:14     ` bibo,mao
@ 2006-10-31 13:47       ` Masami Hiramatsu
  2006-10-31 13:49         ` Ingo Molnar
  2006-10-31 14:13         ` Ingo Molnar
  0 siblings, 2 replies; 20+ messages in thread
From: Masami Hiramatsu @ 2006-10-31 13:47 UTC (permalink / raw)
  To: bibo,mao
  Cc: bibo,mao, Keshavamurthy, Anil S, Ananth N Mavinakayanahalli,
	Prasanna S Panchamukhi, Ingo Molnar, SystemTAP, Satoshi Oshima,
	Hideo Aoki, Yumiko Sugita

Hi bibo,

bibo,mao wrote:
>>> I do not know whether there exists non-freezeable and preemptive kernel
>>> thread, if there exist then this thread will not be frozen.
>>
>> In that case, freeze_processes() returns the positive value which
>> means how many processes are not frozen. If freeze_processes()
>> returns non-zero, this function aborts the garbage collection.
>>
> But from the code, return value of freeze_processes() represents how many
> processes can be frozen but are not frozen. I grep the kernel code, there
> still exists many processes which flag is PF_NOFREEZE.
> I think if current probed thread is PF_NOFREEZE, then kprobe_handler
> need skip the bootser.

OK, I see.
It seems problematic because the softirqd is PF_NOFREEZE and it
can execute most of functions...
I think we need to find a new way to solve this problem.

Thank you for your advice.

-- 
Masami HIRAMATSU
Linux Technology Center
Hitachi, Ltd., Systems Development Laboratory
E-mail: masami.hiramatsu.pt@hitachi.com


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC][PATCH][kprobe] enabling booster on the preemptible  kernel, take 2
  2006-10-31 13:47       ` Masami Hiramatsu
@ 2006-10-31 13:49         ` Ingo Molnar
  2006-10-31 14:13         ` Ingo Molnar
  1 sibling, 0 replies; 20+ messages in thread
From: Ingo Molnar @ 2006-10-31 13:49 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: bibo,mao, bibo,mao, Keshavamurthy, Anil S,
	Ananth N Mavinakayanahalli, Prasanna S Panchamukhi, SystemTAP,
	Satoshi Oshima, Hideo Aoki, Yumiko Sugita

On Tue, 2006-10-31 at 22:17 +0900, Masami Hiramatsu wrote:
> OK, I see.
> It seems problematic because the softirqd is PF_NOFREEZE and it
> can execute most of functions...
> I think we need to find a new way to solve this problem.

could you outline the problem to me? freeze_processes() should be a
generic facility to move all kernel processing into a 'known' context of
execution. All the PF_NOFREEZE kernel threads are supposed to do
periodic calls to try_to_freeze(). They should not (and most of the time
they do not) prevent freezing of all state on the system.

am i misunderstanding the problem?

	Ingo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC][PATCH][kprobe] enabling booster on the preemptible  kernel, take 2
  2006-10-31 13:47       ` Masami Hiramatsu
  2006-10-31 13:49         ` Ingo Molnar
@ 2006-10-31 14:13         ` Ingo Molnar
  2006-10-31 16:39           ` Masami Hiramatsu
  1 sibling, 1 reply; 20+ messages in thread
From: Ingo Molnar @ 2006-10-31 14:13 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: bibo,mao, bibo,mao, Keshavamurthy, Anil S,
	Ananth N Mavinakayanahalli, Prasanna S Panchamukhi, SystemTAP,
	Satoshi Oshima, Hideo Aoki, Yumiko Sugita


another thought:

would it help if we extended the 'soft lockup watchdog' with periodic
'try whether the whole system can be frozen' checks? That would make
testing coverage alot wider, and it would also give kernel developers an
easy way to make sure the kernel is still freezable. (instead of having
to try something like sw-suspend explicitly)

	Ingo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC][PATCH][kprobe] enabling booster on the preemptible kernel,  take 2
  2006-10-31 14:13         ` Ingo Molnar
@ 2006-10-31 16:39           ` Masami Hiramatsu
  0 siblings, 0 replies; 20+ messages in thread
From: Masami Hiramatsu @ 2006-10-31 16:39 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: bibo,mao, bibo,mao, Keshavamurthy, Anil S,
	Ananth N Mavinakayanahalli, Prasanna S Panchamukhi, SystemTAP,
	Satoshi Oshima, Hideo Aoki, Yumiko Sugita

Hi Ingo,

Ingo Molnar wrote:
> another thought:
> 
> would it help if we extended the 'soft lockup watchdog' with periodic
> 'try whether the whole system can be frozen' checks? That would make
> testing coverage alot wider, and it would also give kernel developers an
> easy way to make sure the kernel is still freezable. (instead of having
> to try something like sw-suspend explicitly)

It seems useful to me, I'll check that.
Thank you!

-- 
Masami HIRAMATSU
Linux Technology Center
Hitachi, Ltd., Systems Development Laboratory
E-mail: masami.hiramatsu.pt@hitachi.com


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC][PATCH][kprobe] enabling booster on the preemptible kernel,   take 2
  2006-11-02 18:51 ` Masami Hiramatsu
@ 2006-11-03  9:16   ` bibo,mao
  0 siblings, 0 replies; 20+ messages in thread
From: bibo,mao @ 2006-11-03  9:16 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: bibo mao, mingo, Keshavamurthy, Anil S, ananth, prasanna,
	systemtap, soshima, haoki, yumiko.sugita.yf

Masami Hiramatsu wrote:
> Hi Bibo,
> 
> bibo mao wrote:
>  > I am not familiar with freeze_processes(), I only view code.
>  > And I write simple program(though buggy) to test:
> [...]
>  > it seems that many threads are not frozen even freeze_processes
>  > return 0.
> 
> I think the most important issue is whether those threads are
> preempted (suddenly scheduled out) or not.
> Those preempted threads might be preempted on the instruction
> buffer or on the middle of the target instructions. In that
> case, we cannot free the buffer or overwrite a jump code.
> But, if those threads are sleeping at the known places, we can
> ensure safety of freeing/overwriting.
> Therefore, I think it is necessary to check whether all
> non-frozen threads are sleeping or not, like as below;
I think that will be ok, it will be safe to free jump buffer 
at that time.

thanks
bibo,mao
 
> ---------------------------------------
> int check_safety(void)
> {
>  int ret = 0;
>  struct task_struct *g, *p;
>  if (freeze_processes()) {
>   goto Thaw;
>  }
>  do_each_thread(g, p) {
>   if (frozen(p))
>    continue;
>   if (p->state == TASK_RUNNING) {
>    ret = -EAGAIN;
>    break;
>   }
>  } while_each_thread(g, p);
> Thaw:
>  thaw_processes();
>  return ret;
> }
> 
> 
> Thanks,
> 
> 
> --
> Masami HIRAMATSU
> Linux Technology Center
> Hitachi, Ltd., Systems Development Laboratory
> E-mail: masami.hiramatsu.pt@hitachi.com
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC][PATCH][kprobe] enabling booster on the preemptible kernel,  take 2
  2006-11-01 17:01 bibo mao
@ 2006-11-02 18:51 ` Masami Hiramatsu
  2006-11-03  9:16   ` bibo,mao
  0 siblings, 1 reply; 20+ messages in thread
From: Masami Hiramatsu @ 2006-11-02 18:51 UTC (permalink / raw)
  To: bibo mao
  Cc: mingo, anil.s.keshavamurthy, ananth, prasanna, systemtap,
	soshima, haoki, yumiko.sugita.yf

Hi Bibo,

bibo mao wrote:
> I am not familiar with freeze_processes(), I only view code.
> And I write simple program(though buggy) to test:
[...]
> it seems that many threads are not frozen even freeze_processes
> return 0.

I think the most important issue is whether those threads are
preempted (suddenly scheduled out) or not.
Those preempted threads might be preempted on the instruction
buffer or on the middle of the target instructions. In that
case, we cannot free the buffer or overwrite a jump code.
But, if those threads are sleeping at the known places, we can
ensure safety of freeing/overwriting.
Therefore, I think it is necessary to check whether all
non-frozen threads are sleeping or not, like as below;

---------------------------------------
int check_safety(void)
{
 int ret = 0;
 struct task_struct *g, *p;
 if (freeze_processes()) {
  goto Thaw;
 }
 do_each_thread(g, p) {
  if (frozen(p))
   continue;
  if (p->state == TASK_RUNNING) {
   ret = -EAGAIN;
   break;
  }
 } while_each_thread(g, p);
Thaw:
 thaw_processes();
 return ret;
}


Thanks,


-- 
Masami HIRAMATSU
Linux Technology Center
Hitachi, Ltd., Systems Development Laboratory
E-mail: masami.hiramatsu.pt@hitachi.com


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC][PATCH][kprobe] enabling booster on the preemptible kernel, take 2
@ 2006-11-01 17:01 bibo mao
  2006-11-02 18:51 ` Masami Hiramatsu
  0 siblings, 1 reply; 20+ messages in thread
From: bibo mao @ 2006-11-01 17:01 UTC (permalink / raw)
  To: mingo
  Cc: anil.s.keshavamurthy, ananth, prasanna, systemtap, soshima,
	haoki, yumiko.sugita.yf

I am not familiar with freeze_processes(), I only view code.
And I write simple program(though buggy) to test:
---------------------------------------
 struct task_struct *g, *p;

 if (freeze_processes()) {
  goto Thaw;
 }
 do_each_thread(g, p) {
  if (frozen(p))
   continue;
  printk("%s not stopped\n", p->comm );
 } while_each_thread(g, p);
Thaw:
 thaw_processes();
------------------------------------
the output is this(except for current thread):
ksoftirqd/0 not stopped
watchdog/0 not stopped
events/0 not stopped
khelper not stopped
kthread not stopped
kblockd/0 not stopped
kacpid not stopped
aio/0 not stopped
xfslogd/0 not stopped
xfsdatad/0 not stopped
kpsmoused not stopped
ipw2100/0 not stopped

it seems that many threads are not frozen even freeze_processes
return 0.

thanks
bibo,mao

"Ingo Molnar" <mingo@redhat.com> wrote:
> On Tue, 2006-10-31 at 22:17 +0900, Masami Hiramatsu wrote:
>> OK, I see.
>> It seems problematic because the softirqd is PF_NOFREEZE and it
>> can execute most of functions...
>> I think we need to find a new way to solve this problem.
>
> could you outline the problem to me? freeze_processes() should be a
> generic facility to move all kernel processing into a 'known' context of
> execution. All the PF_NOFREEZE kernel threads are supposed to do
> periodic calls to try_to_freeze(). They should not (and most of the time
> they do not) prevent freezing of all state on the system.
>
> am i misunderstanding the problem?
>
> Ingo
>
>

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2006-11-03  2:22 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-10-16 13:14 [RFC][PATCH][kprobe] enabling booster on the preemptible kernel, take 2 Masami Hiramatsu
2006-10-19  9:00 ` [PATCH 1/5][djprobe] generalize the length of the instruction slots Masami Hiramatsu
2006-10-19  9:03 ` [PATCH 3/5][djprobe] export set_jmp_op() for sharing Masami Hiramatsu
2006-10-19  9:03 ` [PATCH 4/5][djprobe] djprobe for i386 architecture code Masami Hiramatsu
2006-10-19  9:03 ` [PATCH 2/5][djprobe] djprobe core patch Masami Hiramatsu
2006-10-27 23:34   ` Keshavamurthy, Anil S
2006-10-30 14:07     ` Masami Hiramatsu
2006-10-30 14:11       ` Ingo Molnar
2006-10-19  9:04 ` [PATCH 5/5][djprobe] delayed invoking commit_djprobes() Masami Hiramatsu
2006-10-19  9:04 ` [RFC][djprobe] djprobe examples Masami Hiramatsu
2006-10-30  6:37 ` [RFC][PATCH][kprobe] enabling booster on the preemptible kernel, take 2 bibo,mao
2006-10-30 14:07   ` Masami Hiramatsu
2006-10-31  9:14     ` bibo,mao
2006-10-31 13:47       ` Masami Hiramatsu
2006-10-31 13:49         ` Ingo Molnar
2006-10-31 14:13         ` Ingo Molnar
2006-10-31 16:39           ` Masami Hiramatsu
2006-11-01 17:01 bibo mao
2006-11-02 18:51 ` Masami Hiramatsu
2006-11-03  9:16   ` bibo,mao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).