Hi, Anil and Ananth I ported the kprobe-booster to the IA64 architecture. This patch can be applied against 2.6.17-rc5-mm3. And here is the patch. Could you review it? This patch modifies kprobe as below; - Boost if the target bundle don't use B and X unit. - Introduce INST_FLAG_BOOSTABLE value for ainsn.insn_flag. If this flag is set, the kprobe is boostable. - Change instruction buffer(ainsn.insn) to an array of bundles which has three elements. The 2nd element and the 3rd element of this array are used for dynamic execution. And this patch is Lindent clean ;) I measured the overhead of the booster by using the benchmark kernel module attached to this mail. noprobe: 436 machine cycles noboost: 1162 machine cycles boosted: 829 machine cycles CPU spec: Itanium2 1.3GHz (2CPUs) -- Masami HIRAMATSU 2nd Research Dept. Hitachi, Ltd., Systems Development Laboratory E-mail: hiramatu@sdl.hitachi.co.jp arch/ia64/kernel/kprobes.c | 84 +++++++++++++++++++++++++++++++++++++++++---- include/asm-ia64/kprobes.h | 9 +++- 2 files changed, 85 insertions(+), 8 deletions(-) diff --exclude=CVS -Narup a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c --- a/arch/ia64/kernel/kprobes.c 2006-06-05 13:02:54.000000000 +0900 +++ b/arch/ia64/kernel/kprobes.c 2006-06-05 13:34:07.000000000 +0900 @@ -78,6 +78,35 @@ static enum instruction_type bundle_enco }; /* + * In this function, we check whether the target bundle is possible + * to modify IP. + */ +static __always_inline int can_boost(uint template) +{ + template &= 0x1e; + if (template >= 0x10 || /* including B unit */ + template == 0x04 || /* including X unit */ + template == 0x06) { /* undefined */ + return 0; + } + return 1; +} + +/* Insert a long branch code */ +static __always_inline void set_brl_inst(void *from, void *to) +{ + s64 rel = ((s64) to - (s64) from) >> 4; + bundle_t *brl; + brl = (bundle_t *) ((u64) from & ~0xf); + brl->quad0.template = 0x05; /* [MLX](stop) */ + brl->quad0.slot0 = NOP_M_INST; /* nop.m 0x0 */ + brl->quad0.slot1_p0 = ((rel >> 20) & 0x7fffffffff) << 2; + brl->quad1.slot1_p1 = (((rel >> 20) & 0x7fffffffff) << 2) >> (64 - 46); + /* brl.cond.sptk.many.clr rel<<4 (qp=0) */ + brl->quad1.slot2 = BRL_INST(rel >> 59, rel & 0xfffff); +} + +/* * In this function we check to see if the instruction * is IP relative instruction and update the kprobe * inst flag accordingly @@ -125,6 +154,10 @@ static void __kprobes update_kprobe_inst break; } } + + if (can_boost(template)) { + p->ainsn.inst_flag |= INST_FLAG_BOOSTABLE; + } return; } @@ -218,7 +251,7 @@ static void __kprobes prepare_break_inst struct kprobe *p) { unsigned long break_inst = BREAK_INST; - bundle_t *bundle = &p->ainsn.insn.bundle; + bundle_t *bundle = &p->ainsn.insn[0].bundle; /* * Copy the original kprobe_inst qualifying predicate(qp) @@ -249,6 +282,14 @@ static void __kprobes prepare_break_inst * single step on original instruction */ update_kprobe_inst_flag(template, slot, major_opcode, kprobe_inst, p); + + /* If the bundle can be boosted, prepare boost bundles */ + if (p->ainsn.inst_flag & INST_FLAG_BOOSTABLE) { + memcpy(&p->ainsn.insn[1].bundle, &p->opcode.bundle, + sizeof(bundle_t)); + set_brl_inst(&p->ainsn.insn[2].bundle, + (bundle_t *) p->addr + 1); + } } static void __kprobes get_kprobe_inst(bundle_t *bundle, uint slot, @@ -424,10 +465,10 @@ int __kprobes arch_prepare_kprobe(struct unsigned long *kprobe_addr = (unsigned long *)(addr & ~0xFULL); unsigned long kprobe_inst=0; unsigned int slot = addr & 0xf, template, major_opcode = 0; - bundle_t *bundle = &p->ainsn.insn.bundle; + bundle_t *bundle = &p->ainsn.insn[0].bundle; memcpy(&p->opcode.bundle, kprobe_addr, sizeof(bundle_t)); - memcpy(&p->ainsn.insn.bundle, kprobe_addr, sizeof(bundle_t)); + memcpy(&p->ainsn.insn[0].bundle, kprobe_addr, sizeof(bundle_t)); template = bundle->quad0.template; @@ -454,7 +495,7 @@ void __kprobes arch_arm_kprobe(struct kp unsigned long addr = (unsigned long)p->addr; unsigned long arm_addr = addr & ~0xFULL; - memcpy((char *)arm_addr, &p->ainsn.insn.bundle, sizeof(bundle_t)); + memcpy((char *)arm_addr, &p->ainsn.insn[0].bundle, sizeof(bundle_t)); flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t)); } @@ -471,7 +512,7 @@ void __kprobes arch_disarm_kprobe(struct /* * We are resuming execution after a single step fault, so the pt_regs * structure reflects the register state after we executed the instruction - * located in the kprobe (p->ainsn.insn.bundle). We still need to adjust + * located in the kprobe (p->ainsn.insn[0].bundle). We still need to adjust * the ip to point back to the original stack address. To set the IP address * to original stack address, handle the case where we need to fixup the * relative IP address and/or fixup branch register. @@ -488,7 +529,7 @@ static void __kprobes resume_execution(s if (slot == 1 && bundle_encoding[template][1] == L) slot = 2; - if (p->ainsn.inst_flag) { + if (p->ainsn.inst_flag & ~INST_FLAG_BOOSTABLE) { if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) { /* Fix relative IP address */ @@ -563,6 +604,24 @@ static void __kprobes prepare_ss(struct ia64_psr(regs)->ss = 1; } +/* prepare to execute directly */ +static void __kprobes prepare_boost(struct kprobe *p, struct pt_regs *regs) +{ + unsigned long slot = (unsigned long)p->addr & 0xf; + + regs->cr_iip = (unsigned long)&p->ainsn.insn[1].bundle & ~0xFULL; + + if (slot > 2) + slot = 0; + + ia64_psr(regs)->ri = slot; + + /* turn off single stepping */ + ia64_psr(regs)->ss = 0; + + reset_current_kprobe(); +} + static int __kprobes is_ia64_break_inst(struct pt_regs *regs) { unsigned int slot = ia64_psr(regs)->ri; @@ -602,6 +661,11 @@ static int __kprobes pre_kprobes_handler struct pt_regs *regs = args->regs; kprobe_opcode_t *addr = (kprobe_opcode_t *)instruction_pointer(regs); struct kprobe_ctlblk *kcb; +#ifdef CONFIG_PREEMPT + unsigned pre_preempt_count = preempt_count(); +#else + unsigned pre_preempt_count = 1; +#endif /* * We don't want to be preempted for the entire @@ -681,6 +745,14 @@ static int __kprobes pre_kprobes_handler */ return 1; + if (pre_preempt_count && p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && + !p->post_handler) { + /* Boost up -- we can execute copied instructions directly */ + prepare_boost(p, regs); + preempt_enable_no_resched(); + return 1; + } + ss_probe: prepare_ss(p, regs); kcb->kprobe_status = KPROBE_HIT_SS; diff --exclude=CVS -Narup a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h --- a/include/asm-ia64/kprobes.h 2006-06-05 13:03:07.000000000 +0900 +++ b/include/asm-ia64/kprobes.h 2006-06-05 13:34:07.000000000 +0900 @@ -29,8 +29,12 @@ #include #include -#define MAX_INSN_SIZE 16 +#define MAX_INSN_SIZE 3 /* 3 bundles */ #define BREAK_INST (long)(__IA64_BREAK_KPROBE << 6) +#define NOP_M_INST (long)(1<<27) +#define BRL_INST(i1,i2) (long)((0xcL << 37) | /* brl */ \ + (1L << 12) | /* many */ \ + (((i1) & 1) << 36) | ((i2) << 13)) /* imm */ typedef union cmp_inst { struct { @@ -108,10 +112,11 @@ struct fnptr { /* Architecture specific copy of original instruction*/ struct arch_specific_insn { /* copy of the instruction to be emulated */ - kprobe_opcode_t insn; + kprobe_opcode_t insn[3]; #define INST_FLAG_FIX_RELATIVE_IP_ADDR 1 #define INST_FLAG_FIX_BRANCH_REG 2 #define INST_FLAG_BREAK_INST 4 + #define INST_FLAG_BOOSTABLE 8 unsigned long inst_flag; unsigned short target_br_reg; };