Performance monitoring hardware access for SystemTap

public inbox for systemtap@sourceware.org
 help / color / mirror / Atom feed

* Performance monitoring hardware access for SystemTap
@ 2006-12-16 10:07 William Cohen
  2006-12-16 10:13 ` Back to backtraces Michael Grundy
  0 siblings, 1 reply; 2+ messages in thread
From: William Cohen @ 2006-12-16 10:07 UTC (permalink / raw)
  To: SystemTAP

There is a great desire to use the performance counters on processors
to gain a better understanding what is going on in the code.  There
are various mechanisms such as Perfmon2 and Perfctr that provide
infrastructure to access and manage the performance monitoring
hardware.  However, these interfaces are not currently in the mainline
kernel. OProfile is a performance monitoring mechanism currently in
the mainlin kernel.

The OProfile kernel code sets up the performance counters to trigger
interrupts on overflow, records the counter that overflowed and
location of the interrupt, and transports this data to a userspace. A
user space daemon processes these samples and tracks where the
interrupts occur.

OProfile handles a variety of performance monitoring hardware, such as
x86-64, ppc64, and most i386 processors. The Oprofile mechanism doe
not accumulate event counts. Proposed modifications would allow code,
e.g. Systemtap to look at these counts.

-have data structure keep track of the number of interrupts per
counter/processor combination (need to make sure that all counters for
a processor adjacent in data structure)

-have have entry in the processor specific struct that reads counter
  like pseudo code below and export a call in oprofile-like driver for
  that function

u64 read_pmd_counter(int counter)

	u64 total;

	/* FIXME check counter reasonable value */

	high1 = read_int_count(counter, processor);
retry:
	low = read_low(counter);
	high 2 = read_int_count(counter, processor);
	if (high1 != high2) goto retry;
	total = adjust_high(high2) + adjust_low(low);
	return total;
}

-thinking about exporting the counter information to
  /dev/oprofile/stats/cpu[0-9]+/[0-9]+ (but this would be an expensive
  way to read the counters)

-systemtap translator
	-extract counter information
	-generate arguments for userspace setup code

-systemtap runtime
	-functions to read the counters

-userspace helper code:
	-pick out the appropriate values
	 and configure the counters appropriately
	-pass which counters counting which events to module

Issues:
	What happens when more than one probe using the counters?
		there could be enough registers to satisfy both scripts
	Cost of reading value? computing the total needs to be cheap
	     doing an arbitrary 64-bit multiply might not be desirable
	Portability and maintainability?
	Counters are free running and not tied to threads/processes
		What happens when measurement taken across processors?
		e.g. start on one processor finish on another?

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Back to backtraces
  2006-12-16 10:07 Performance monitoring hardware access for SystemTap William Cohen
@ 2006-12-16 10:13 ` Michael Grundy
  0 siblings, 0 replies; 2+ messages in thread
From: Michael Grundy @ 2006-12-16 10:13 UTC (permalink / raw)
  To: SystemTAP

Will's note about oprofile reminded me I had a jprobe module I meant to 
inject into the backtrace discussions.I copped the backtrace code for a 
jprobe module (included). It will go as far as it can up the chain, and 
comparing with gdb backtraces seems pretty reliable.

Thanks
Mike

=========================================
Michael Grundy - grundym@us.ibm.com

Hey Phil, if we wanted to hit mailboxes we could let Ralph drive.




/* backtrace portions (c) 2002 OProfile authors, John Levon, David Smith 
*/

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/fs.h>
#include <linux/uio.h>
#include <linux/kprobes.h>
#include <linux/kallsyms.h>
#include <linux/smp_lock.h>
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/oprofile.h>
#include <linux/mm.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
#include <asm/uaccess.h>

char *jprobe_comm;

struct frame_head {
        struct frame_head * ebp;
        unsigned long ret;
} __attribute__((packed));


static struct frame_head *
dump_user_backtrace(struct frame_head * head)
{
        struct frame_head bufhead[2];

        /* Also check accessibility of one struct frame_head beyond */
        if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
                return NULL;
        if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
                return NULL;

        printk("[<%lx>]\n", bufhead[0].ret);
        /* frame pointers should strictly progress back up the stack
         * (towards higher addresses) */
        if (head >= bufhead[0].ebp)
                return NULL;

        return bufhead[0].ebp;
}

void x86_backtrace(struct pt_regs * const regs, unsigned int depth)
{
        struct frame_head *head;

#ifdef CONFIG_X86_64
        head = (struct frame_head *)regs->rbp;
#else
        head = (struct frame_head *)regs->ebp;
#endif

        if (user_mode_vm(regs)) {
                while (depth-- && head) {
                        printk("%3d: ", depth);
                        head = dump_user_backtrace(head);
                        }
        }

}


int jdo_exit(long code)
{
        struct task_struct *t = current;
        extern char *jprobe_comm;
         /* 
         * dump stack when specified process exits
         */
        if (strcmp(t->comm, jprobe_comm) == 0) {
                printk("process(%d:%s) is exiting\n....", t->pid, 
t->comm);
                printk("exit_code: 0x%lx, flags: 0x%lx\n....", code, 
t->flags);
                if (t->thread_info)
                        printk("thread flags: 0x%lx, status: 0x%lx, cpu: 
%d\n",
                                        t->thread_info->flags, 
                                        t->thread_info->status,
                                        t->thread_info->cpu);
                dump_stack();
                /* ebp is the last reg pushed by switch_to */
                printk("user backtrace\neip: [<%lx>]\n", 
task_pt_regs(t)->eip);
                x86_backtrace(task_pt_regs(t), 30);
        }
        /* Always end with a call to jprobe_return(). */
        jprobe_return();
        /*NOTREACHED*/
        return 0;
}



static struct jprobe my_jprobe = {
        .entry = (kprobe_opcode_t *) jdo_exit
};

int init_module(void)
{
        int ret;
        char *func = "do_exit";

        if (jprobe_comm == NULL) {
                printk("Command to probe not specified.\n");
                return -1;
        }

        my_jprobe.kp.addr = (kprobe_opcode_t *) 
kallsyms_lookup_name(func);
        if (!my_jprobe.kp.addr) {
                printk("Couldn't find %s to plant jprobe\n", func);
                return -1;
        }

        if ((ret = register_jprobe(&my_jprobe)) <0) {
                printk("register_jprobe failed, returned %d\n", ret);
                return -1;
        }

        printk("Planted jprobe at %p, handler addr %p\n",
                my_jprobe.kp.addr, my_jprobe.entry);
        printk("Probing command: %s\n", jprobe_comm);

        return 0;
}

void cleanup_module(void)
{
        unregister_jprobe(&my_jprobe);
        printk("jprobe unregistered\n");
}

MODULE_LICENSE("GPL");
module_param(jprobe_comm, charp, 0444);
MODULE_PARM_DESC(jprobe_comm, "Name of command to probe.");

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2006-12-15 21:46 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-12-16 10:07 Performance monitoring hardware access for SystemTap William Cohen
2006-12-16 10:13 ` Back to backtraces Michael Grundy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).