From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 6429 invoked by alias); 6 Jan 2012 12:25:01 -0000 Received: (qmail 6408 invoked by uid 22791); 6 Jan 2012 12:24:49 -0000 X-SWARE-Spam-Status: No, hits=-1.8 required=5.0 tests=AWL,BAYES_00,T_RP_MATCHES_RCVD X-Spam-Check-By: sourceware.org Received: from e38.co.us.ibm.com (HELO e38.co.us.ibm.com) (32.97.110.159) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Fri, 06 Jan 2012 12:24:34 +0000 Received: from /spool/local by e38.co.us.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted for from ; Fri, 6 Jan 2012 05:24:32 -0700 Received: from d03relay02.boulder.ibm.com (9.17.195.227) by e38.co.us.ibm.com (192.168.1.138) with IBM ESMTP SMTP Gateway: Authorized Use Only! Violators will be prosecuted; Fri, 6 Jan 2012 05:24:25 -0700 Received: from d03av02.boulder.ibm.com (d03av02.boulder.ibm.com [9.17.195.168]) by d03relay02.boulder.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id q06COOZU112792 for ; Fri, 6 Jan 2012 05:24:24 -0700 Received: from d03av02.boulder.ibm.com (loopback [127.0.0.1]) by d03av02.boulder.ibm.com (8.14.4/8.13.1/NCO v10.0 AVout) with ESMTP id q06COOQ8014523 for ; Fri, 6 Jan 2012 05:24:24 -0700 Received: from linux.vnet.ibm.com ([9.124.158.122]) by d03av02.boulder.ibm.com (8.14.4/8.13.1/NCO v10.0 AVin) with SMTP id q06COLYZ014356; Fri, 6 Jan 2012 05:24:22 -0700 Date: Fri, 06 Jan 2012 12:25:00 -0000 From: Srikar Dronamraju To: Frank Ch Eigler Cc: systemtap@sourceware.org Subject: Re: [Bug uprobes/13539] occasional oops, kernel SEGV, RHEL5, :uprobes:uprobe_free_process+0xba/0x131 Message-ID: <20120106122516.GA17178@linux.vnet.ibm.com> Reply-To: Srikar Dronamraju References: MIME-Version: 1.0 Content-Type: text/plain; charset=iso-8859-1 Content-Disposition: inline In-Reply-To: User-Agent: Mutt/1.5.21 (2010-09-15) x-cbid: 12010612-5518-0000-0000-000001610E10 X-IsSubscribed: yes Mailing-List: contact systemtap-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Post: List-Help: , Sender: systemtap-owner@sourceware.org X-SW-Source: 2012-q1/txt/msg00007.txt.bz2 While I still cannot see a reason how uprobe_{free,put}_process can race uprobe_report_{exit,exec}, I certainly think somebit of cleanup can be done. However I am dont think we need to do a utask or uproc lookup from the table. Especially in case of callbacks. Mostly similar to what Jim proposed. I havent tested this patch myself and I couldnt reproduce the problem. --- runtime/uprobes/uprobes.c | 106 ++++++++++++++++++++++++++++++++----------- runtime/uprobes2/uprobes.c | 105 ++++++++++++++++++++++++++++--------------- 2 files changed, 148 insertions(+), 63 deletions(-) diff --git a/runtime/uprobes/uprobes.c b/runtime/uprobes/uprobes.c index 47fb1c9..ed1e94b 100644 --- a/runtime/uprobes/uprobes.c +++ b/runtime/uprobes/uprobes.c @@ -114,33 +114,39 @@ struct delayed_signal { siginfo_t info; }; -static struct uprobe_task *uprobe_find_utask(struct task_struct *tsk) +static struct uprobe_task *uprobe_find_utask_locked(struct task_struct *tsk) { struct hlist_head *head; struct hlist_node *node; struct uprobe_task *utask; - unsigned long flags; head = &utask_table[hash_ptr(tsk, UPROBE_HASH_BITS)]; - lock_utask_table(flags); hlist_for_each_entry(utask, node, head, hlist) { - if (utask->tsk == tsk) { - unlock_utask_table(flags); + if (utask->tsk == tsk) return utask; - } } - unlock_utask_table(flags); return NULL; } +static struct uprobe_task *uprobe_find_utask(struct task_struct *tsk) +{ + struct uprobe_task *utask; + unsigned long flags; + + lock_utask_table(flags); + utask = uprobe_find_utask_locked(tsk); + unlock_utask_table(flags); + return utask; +} + static void uprobe_hash_utask(struct uprobe_task *utask) { struct hlist_head *head; unsigned long flags; INIT_HLIST_NODE(&utask->hlist); - head = &utask_table[hash_ptr(utask->tsk, UPROBE_HASH_BITS)]; lock_utask_table(flags); + head = &utask_table[hash_ptr(utask->tsk, UPROBE_HASH_BITS)]; hlist_add_head(&utask->hlist, head); unlock_utask_table(flags); } @@ -154,9 +160,11 @@ static void uprobe_unhash_utask(struct uprobe_task *utask) unlock_utask_table(flags); } -static inline void uprobe_get_process(struct uprobe_process *uproc) +static struct uprobe_process * uprobe_get_process(struct uprobe_process *uproc) { - atomic_inc(&uproc->refcount); + if (atomic_inc_not_zero(&uproc->refcount)) + return uproc; + return NULL; } /* @@ -186,8 +194,9 @@ static struct uprobe_process *uprobe_find_process(pid_t tgid) head = &uproc_table[hash_long(tgid, UPROBE_HASH_BITS)]; hlist_for_each_entry(uproc, node, head, hlist) { if (uproc->tgid == tgid && !uproc->finished) { - uprobe_get_process(uproc); - down_write(&uproc->rwsem); + uproc = uprobe_get_process(uproc); + if (uproc) + down_write(&uproc->rwsem); return uproc; } } @@ -476,6 +485,12 @@ static void uprobe_free_task(struct uprobe_task *utask) struct deferred_registration *dr, *d; struct delayed_signal *ds, *ds2; + printk(KERN_INFO "uprobe_free_task %p (tid %ld), caller %pS, ctid %ld\n", utask, utask->tsk->pid, _RET_IP_, current->pid); + + /* + * Do this first, since a utask that's still in the utask_table + * is assumed (e.g., by uprobe_report_exit) to be valid. + */ uprobe_unhash_utask(utask); list_del(&utask->list); list_for_each_entry_safe(dr, d, &utask->deferred_registrations, list) { @@ -487,7 +502,6 @@ static void uprobe_free_task(struct uprobe_task *utask) list_del(&ds->list); kfree(ds); } - utask_free_uretprobe_instances(utask); kfree(utask); @@ -499,12 +513,13 @@ static void uprobe_free_process(struct uprobe_process *uproc) struct uprobe_task *utask, *tmp; struct uprobe_ssol_area *area = &uproc->ssol_area; + printk(KERN_INFO "uprobe_free_process %p (pid %ld), caller %pS, ctid %ld\n", uproc, uproc->tgid, _RET_IP_, current->pid); + if (!uproc->finished) uprobe_release_ssol_vma(uproc); if (area->slots) kfree(area->slots); - if (!hlist_unhashed(&uproc->hlist)) - hlist_del(&uproc->hlist); + hlist_del(&uproc->hlist); list_for_each_entry_safe(utask, tmp, &uproc->thread_list, list) { /* * utrace_detach() is OK here (required, it seems) even if @@ -515,6 +530,7 @@ static void uprobe_free_process(struct uprobe_process *uproc) uprobe_free_task(utask); } up_write(&uproc->rwsem); // So kfree doesn't complain + printk(KERN_INFO "uprobe_free_process zap %p\n", uproc); kfree(uproc); } @@ -539,7 +555,9 @@ static int uprobe_put_process(struct uprobe_process *uproc) /* * The works because uproc_mutex is held any * time the ref count can go from 0 to 1 -- e.g., - * register_uprobe() sneaks in with a new probe. + * register_uprobe() snuck in with a new probe, + * or a callback such as uprobe_report_exit() + * just started. */ up_write(&uproc->rwsem); } else { @@ -852,7 +870,7 @@ static int uprobe_validate_vma(struct task_struct *t, unsigned long vaddr) mmput(mm); return ret; } - + /* Probed address must be in an executable VM area, outside the SSOL area. */ static int uprobe_validate_vaddr(struct task_struct *p, unsigned long vaddr, struct uprobe_process *uproc) @@ -1961,9 +1979,15 @@ static u32 uprobe_report_quiesce(struct utrace_attached_engine *engine, struct uprobe_task *utask; struct uprobe_process *uproc; + rcu_read_lock(); utask = (struct uprobe_task *)rcu_dereference(engine->data); BUG_ON(!utask); - uproc = utask->uproc; + uproc = uprobe_get_process(utask->uproc); + rcu_read_unlock(); + + if (!uproc) + return UTRACE_ACTION_DETACH|UTRACE_ACTION_RESUME; + if (current == utask->quiesce_master) { /* * tsk was already quiescent when quiesce_all_threads() @@ -1977,8 +2001,11 @@ static u32 uprobe_report_quiesce(struct utrace_attached_engine *engine, } BUG_ON(utask->active_probe); + down_write(&uproc->rwsem); + printk(KERN_INFO "uprobe_report_quiesce2 %p %ld=%ld\n", uproc, uproc->tgid, current->pid); + /* * When a thread hits a breakpoint or single-steps, utrace calls * this quiesce callback before our signal callback. We must @@ -1998,6 +2025,8 @@ static u32 uprobe_report_quiesce(struct utrace_attached_engine *engine, check_uproc_quiesced(uproc, tsk); done: up_write(&uproc->rwsem); + uprobe_put_process(uproc); + printk(KERN_INFO "uprobe_report_quiesce3 %p %ld=%ld\n", uproc, uproc->tgid, current->pid); return UTRACE_ACTION_RESUME; } @@ -2074,8 +2103,14 @@ static u32 uprobe_report_exit(struct utrace_attached_engine *engine, int utask_quiescing; utask = (struct uprobe_task *)rcu_dereference(engine->data); - uproc = utask->uproc; - uprobe_get_process(uproc); + if (utask) + uproc = uprobe_get_process(utask->uproc); + + if (!utask || !uproc) + /* uprobe_free_process() has probably clobbered utask->proc. */ + return UTRACE_ACTION_DETACH; + + printk(KERN_INFO "uprobe_report_exit %p %ld=%ld\n", uproc, uproc->tgid, current->pid); ppt = utask->active_probe; if (ppt) { @@ -2108,6 +2143,9 @@ static u32 uprobe_report_exit(struct utrace_attached_engine *engine, } down_write(&uproc->rwsem); + + printk(KERN_INFO "uprobe_report_exit2 %p %ld=%ld\n", uproc, uproc->tgid, current->pid); + utask_quiescing = utask->quiescing; uprobe_free_task(utask); @@ -2129,6 +2167,8 @@ static u32 uprobe_report_exit(struct utrace_attached_engine *engine, uprobe_cleanup_process(uproc); } up_write(&uproc->rwsem); + printk(KERN_INFO "uprobe_report_exit3 %p %ld=%ld\n", uproc, uproc->tgid, current->pid); + uprobe_put_process(uproc); return UTRACE_ACTION_DETACH; @@ -2241,7 +2281,7 @@ static int uprobe_fork_uproc(struct uprobe_process *parent_uproc, child_utask = uprobe_find_utask(child_tsk); BUG_ON(!child_utask); ret = uprobe_fork_uretprobe_instances(parent_utask, child_utask); - + hlist_add_head(&child_uproc->hlist, &uproc_table[hash_long(child_uproc->tgid, UPROBE_HASH_BITS)]); @@ -2271,6 +2311,7 @@ static u32 uprobe_report_clone(struct utrace_attached_engine *engine, ptask = (struct uprobe_task *)rcu_dereference(engine->data); uproc = ptask->uproc; + printk(KERN_INFO "uprobe_report_clone %p %ld=%ld\n", uproc, uproc->tgid, current->pid); /* * Lock uproc so no new uprobes can be installed 'til all * report_clone activities are completed. Lock uproc_table @@ -2280,6 +2321,8 @@ static u32 uprobe_report_clone(struct utrace_attached_engine *engine, down_write(&uproc->rwsem); get_task_struct(child); + printk(KERN_INFO "uprobe_report_clone2 %p %ld=%ld\n", uproc, uproc->tgid, current->pid); + if (clone_flags & (CLONE_THREAD|CLONE_VM)) { /* New thread in the same process (CLONE_THREAD) or * processes sharing the same memory space (CLONE_VM). */ @@ -2323,7 +2366,7 @@ static u32 uprobe_report_clone(struct utrace_attached_engine *engine, } } } - + if (!hlist_empty(&ptask->uretprobe_instances)) (void) uprobe_fork_uproc(uproc, ptask, child); } @@ -2360,8 +2403,14 @@ static u32 uprobe_report_exec(struct utrace_attached_engine *engine, u32 ret = UTRACE_ACTION_RESUME; utask = (struct uprobe_task *)rcu_dereference(engine->data); - uproc = utask->uproc; - uprobe_get_process(uproc); + if (utask) + uproc = uprobe_get_process(utask->uproc); + + if (!utask || !uproc) + /* uprobe_free_process() has probably clobbered utask->proc. */ + return UTRACE_ACTION_DETACH; + + printk(KERN_INFO "uprobe_report_exec %p %ld=%ld\n", uproc, uproc->tgid, current->pid); /* * Only cleanup if we're the last thread. If we aren't, @@ -2390,11 +2439,14 @@ static u32 uprobe_report_exec(struct utrace_attached_engine *engine, ret = UTRACE_ACTION_DETACH; } up_write(&uproc->rwsem); + printk(KERN_INFO "uprobe_report_exec2 %p %ld=%ld\n", uproc, uproc->tgid, current->pid); /* If any [un]register_uprobe is pending, it'll clean up. */ if (uprobe_put_process(uproc)) ret = UTRACE_ACTION_DETACH; + printk(KERN_INFO "uprobe_report_exec4 %p %ld=%ld ret=%lu\n", uproc, uproc->tgid, current->pid, (unsigned long)ret); + return ret; } @@ -2631,7 +2683,7 @@ static inline unsigned long lookup_uretprobe(struct hlist_node *r, { struct uretprobe_instance *ret_inst; unsigned long trampoline_addr; - + if (IS_ERR(uproc->uretprobe_trampoline_addr)) return pc; trampoline_addr = (unsigned long)uproc->uretprobe_trampoline_addr; @@ -2672,7 +2724,7 @@ unsigned long uprobe_get_pc(struct uretprobe_instance *ri, unsigned long pc, if (!uk) return 0; uproc = uk->ppt->uproc; - r = &ri->hlist; + r = &ri->hlist; } return lookup_uretprobe(r, uproc, pc, sp); } @@ -2685,7 +2737,7 @@ unsigned long uprobe_get_pc_task(struct task_struct *task, unsigned long pc, struct uprobe_task *utask; struct uprobe_process *uproc; unsigned long result; - + utask = uprobe_find_utask(task); if (!utask) { return pc; diff --git a/runtime/uprobes2/uprobes.c b/runtime/uprobes2/uprobes.c index b3c6e57..953ddf3 100644 --- a/runtime/uprobes2/uprobes.c +++ b/runtime/uprobes2/uprobes.c @@ -114,33 +114,39 @@ struct delayed_signal { siginfo_t info; }; -static struct uprobe_task *uprobe_find_utask(struct task_struct *tsk) +static struct uprobe_task *uprobe_find_utask_locked(struct task_struct *tsk) { struct hlist_head *head; struct hlist_node *node; struct uprobe_task *utask; - unsigned long flags; head = &utask_table[hash_ptr(tsk, UPROBE_HASH_BITS)]; - lock_utask_table(flags); hlist_for_each_entry(utask, node, head, hlist) { - if (utask->tsk == tsk) { - unlock_utask_table(flags); + if (utask->tsk == tsk) return utask; - } } - unlock_utask_table(flags); return NULL; } +static struct uprobe_task *uprobe_find_utask(struct task_struct *tsk) +{ + struct uprobe_task *utask; + unsigned long flags; + + lock_utask_table(flags); + utask = uprobe_find_utask_locked(tsk); + unlock_utask_table(flags); + return utask; +} + static void uprobe_hash_utask(struct uprobe_task *utask) { struct hlist_head *head; unsigned long flags; INIT_HLIST_NODE(&utask->hlist); - head = &utask_table[hash_ptr(utask->tsk, UPROBE_HASH_BITS)]; lock_utask_table(flags); + head = &utask_table[hash_ptr(utask->tsk, UPROBE_HASH_BITS)]; hlist_add_head(&utask->hlist, head); unlock_utask_table(flags); } @@ -154,9 +160,11 @@ static void uprobe_unhash_utask(struct uprobe_task *utask) unlock_utask_table(flags); } -static inline void uprobe_get_process(struct uprobe_process *uproc) +static inline uprobe_process * uprobe_get_process(struct uprobe_process *uproc) { - atomic_inc(&uproc->refcount); + if (atomic_inc_not_zero(&uproc->refcount)) + return uproc; + return NULL; } /* @@ -186,8 +194,9 @@ static struct uprobe_process *uprobe_find_process(struct pid *tg_leader) head = &uproc_table[hash_ptr(tg_leader, UPROBE_HASH_BITS)]; hlist_for_each_entry(uproc, node, head, hlist) { if (uproc->tg_leader == tg_leader && !uproc->finished) { - uprobe_get_process(uproc); - down_write(&uproc->rwsem); + uproc = uprobe_get_process(uproc); + if (uproc) + down_write(&uproc->rwsem); return uproc; } } @@ -547,24 +556,32 @@ static void uprobe_free_task(struct uprobe_task *utask, bool in_callback) struct deferred_registration *dr, *d; struct delayed_signal *ds, *ds2; - if (utask->engine && (utask->tsk != current || !in_callback)) { - int result; + /* + * Do this first, since a utask that's still in the utask_table + * is assumed (e.g., by uprobe_report_exit) to be valid. + */ + uprobe_unhash_utask(utask); + if (utask->engine && (utask->tsk != current || !in_callback)) { /* - * No other tasks in this process should be running - * uprobe_report_* callbacks. (If they are, utrace_barrier() - * here could deadlock.) + * If we're racing with (say) uprobe_report_exit() here, + * utrace_control_pid() may fail with -EINPROGRESS. That's + * OK. The callback will abort with UTRACE_DETACH after + * we're done. It is NOT OK to call utrace_barrier() here, + * since the callback would probably deadlock awaiting + * uproc->rwsem. + * * utrace_control_pid calls task_pid() so we should hold the * rcu_read_lock. */ rcu_read_lock(); - result = utrace_control_pid(utask->pid, utask->engine, - UTRACE_DETACH); + if (utrace_control_pid(utask->pid, utask->engine, + UTRACE_DETACH) != 0) + /* Ignore it. */ + ; rcu_read_unlock(); - BUG_ON(result == -EINPROGRESS); } put_pid(utask->pid); /* null pid OK */ - uprobe_unhash_utask(utask); list_del(&utask->list); list_for_each_entry_safe(dr, d, &utask->deferred_registrations, list) { list_del(&dr->list); @@ -594,8 +611,7 @@ static void uprobe_free_process(struct uprobe_process *uproc, int in_callback) if (area->slots) kfree(area->slots); - if (!hlist_unhashed(&uproc->hlist)) - hlist_del(&uproc->hlist); + hlist_del(&uproc->hlist); list_for_each_entry_safe(utask, tmp, &uproc->thread_list, list) uprobe_free_task(utask, in_callback); put_pid(uproc->tg_leader); @@ -622,9 +638,9 @@ static int uprobe_put_process(struct uprobe_process *uproc, bool in_callback) down_write(&uproc->rwsem); if (unlikely(atomic_read(&uproc->refcount) != 0)) { /* - * The works because uproc_mutex is held any - * time the ref count can go from 0 to 1 -- e.g., - * register_uprobe() sneaks in with a new probe. + * register_uprobe() snuck in with a new probe, + * or a callback such as uprobe_report_exit() + * just started. */ up_write(&uproc->rwsem); } else { @@ -1771,9 +1787,10 @@ static int utask_fake_quiesce(struct uprobe_task *utask) } else { utask->state = UPTASK_SLEEPING; uproc->n_quiescent_threads++; - up_write(&uproc->rwsem); + /* We ref-count sleepers. */ uprobe_get_process(uproc); + up_write(&uproc->rwsem); wait_event(uproc->waitq, !utask->quiescing); @@ -1921,9 +1938,15 @@ static u32 uprobe_report_signal(u32 action, rcu_read_lock(); utask = (struct uprobe_task *)rcu_dereference(engine->data); BUG_ON(!utask); - uproc = utask->uproc; + /* Keep uproc intact until just before we return. */ + uproc = uprobe_get_process(utask->uproc); + rcu_read_unlock(); + if (!uproc) + /* uprobe_free_process() has probably clobbered utask->proc. */ + return UTRACE_SIGNAL_IGN | UTRACE_DETACH; + /* * We may need to re-assert UTRACE_SINGLESTEP if this signal * is not associated with the breakpoint. @@ -1933,9 +1956,6 @@ static u32 uprobe_report_signal(u32 action, else resume_action = UTRACE_RESUME; - /* Keep uproc intact until just before we return. */ - uprobe_get_process(uproc); - if (unlikely(signal_action == UTRACE_SIGNAL_REPORT)) { /* This thread was quiesced using UTRACE_INTERRUPT. */ bool done_quiescing; @@ -2188,7 +2208,7 @@ static u32 uprobe_report_quiesce( return UTRACE_SINGLESTEP; BUG_ON(utask->active_probe); - uproc = utask->uproc; + uproc = uprobe_get_process(utask->uproc); down_write(&uproc->rwsem); #if 0 // TODO: Is this a concern any more? @@ -2211,6 +2231,7 @@ static u32 uprobe_report_quiesce( done_quiescing = utask_quiesce(utask); // done: up_write(&uproc->rwsem); + uprobe_put_process(utask->uproc); return (done_quiescing ? UTRACE_RESUME : UTRACE_STOP); } @@ -2295,9 +2316,15 @@ static u32 uprobe_report_exit(enum utrace_resume_action action, rcu_read_lock(); utask = (struct uprobe_task *)rcu_dereference(engine->data); - uproc = utask->uproc; + BUG_ON(!utask); + /* Keep uproc intact until just before we return. */ + uproc = uprobe_get_process(utask->uproc); + rcu_read_unlock(); - uprobe_get_process(uproc); + + if (!uproc) + /* uprobe_free_process() has probably clobbered utask->proc. */ + return UTRACE_DETACH; ppt = utask->active_probe; if (ppt) { @@ -2626,9 +2653,15 @@ static u32 uprobe_report_exec( rcu_read_lock(); utask = (struct uprobe_task *)rcu_dereference(engine->data); - uproc = utask->uproc; + BUG_ON(!utask); + /* Keep uproc intact until just before we return. */ + uproc = uprobe_get_process(utask->uproc); + rcu_read_unlock(); - uprobe_get_process(uproc); + + if (!uproc) + /* uprobe_free_process() has probably clobbered utask->proc. */ + return UTRACE_DETACH; /* * Only cleanup if we're the last thread. If we aren't,