public inbox for systemtap@sourceware.org
 help / color / mirror / Atom feed
* [RFC] Support for performance event sampling
@ 2010-02-18 16:29 William Cohen
  2010-02-24 23:17 ` Josh Stone
  0 siblings, 1 reply; 2+ messages in thread
From: William Cohen @ 2010-02-18 16:29 UTC (permalink / raw)
  To: systemtap

[-- Attachment #1: Type: text/plain, Size: 1178 bytes --]

I have been working on PR909 and have revised the perf.c and perf.h file in the 
runtime to make use of the new performance event kernel api. Given the changes 
it seems easier to send the files as complete files rather than patches on the 
old versions.

The perf_test2.stp is a simple example that is compiled using guru mode (-g) to 
exercise the run time. Due to check that the performance event kernel api makes 
the script will need to be run as root.

One difficulty is the perf_event_create_kernel_counter() allocates memory for 
the event descriptor and returns a pointer to that structure. When the 
performance event overflows the callback function has the following prototype:

int perf_event_overflow(struct perf_event *event, int nmi,
			  struct perf_sample_data *data,
			  struct pt_regs *regs)

For hrtimer probes one function is used to dispatch all of hrtimer related 
events. Unfortunately, the approach used for the hrtimer doesn't won't work 
here. the hrtimer probes embed the hrtimer struct in a struct that includes the 
other useful information. So the  systemtap translator will need to generate a 
unique callback for each performance event.

-Will

[-- Attachment #2: perf.h --]
[-- Type: text/x-chdr, Size: 892 bytes --]

/* -*- linux-c -*- 
 * Perf Header File
 * Copyright (C) 2006 Red Hat Inc.
 * Copyright (C) 2010 Red Hat Inc.
 *
 * This file is part of systemtap, and is free software.  You can
 * redistribute it and/or modify it under the terms of the GNU General
 * Public License (GPL); either version 2, or (at your option) any
 * later version.
 */

#ifndef _PERF_H_
#define _PERF_H_

/** @file perf.h
 * @brief Header file for performance monitoring hardware support
 */

typedef struct {
	struct perf_event *event;
	const char *pp;
	void (*ph) (struct context *);
} perfcpu;

typedef struct {
	/* per-cpu data. allocated with _stp_alloc_percpu() */
	perfcpu *pd;
} Perf;

static Perf *_stp_perf_init (struct perf_event_attr *attr,
			     perf_overflow_handler_t callback,
			     const char *pp,
			     void (*ph) (struct context *) );

static void _stp_perf_del (Perf *pe);

#endif /* _PERF_H_ */

[-- Attachment #3: perf.c --]
[-- Type: text/x-csrc, Size: 2255 bytes --]

/* -*- linux-c -*- 
 * Perf Functions
 * Copyright (C) 2006 Red Hat Inc.
 * Copyright (C) 2010 Red Hat Inc.
 *
 * This file is part of systemtap, and is free software.  You can
 * redistribute it and/or modify it under the terms of the GNU General
 * Public License (GPL); either version 2, or (at your option) any
 * later version.
 */

#ifndef _PERF_C_
#define _PERF_C_

#include <linux/perf_event.h>

#include "perf.h"

/** @file perf.c
 * @brief Implements performance monitoring hardware support
 */

/** Initialize performance sampling
 * Call this during probe initialization to set up performance event sampling
 *
 * @param attr description of event to sample
 * @param callback function to call when perf event overflows
 * @param pp associated probe point
 * @param ph probe handler
 */
static Perf *_stp_perf_init (struct perf_event_attr *attr,
			     perf_overflow_handler_t callback,
			     const char *pp, void (*ph) (struct context *) )
{
	int cpu;
	Perf *pe;

	pe = (Perf *) _stp_kmalloc (sizeof(Perf));
	if (pe == NULL)
		return NULL;

	/* allocate space for the event descriptor for each cpu */
	pe->pd = (perfcpu *) _stp_alloc_percpu (sizeof(perfcpu));
	if (pe->pd == NULL)
		goto exit1;

	/* initialize event on each processor */
	stp_for_each_cpu(cpu) {
		perfcpu *pd = per_cpu_ptr (pe->pd, cpu);
		struct perf_event **event = &(pd->event);
		*event = perf_event_create_kernel_counter(attr, cpu, -1,
							  callback);

		if (IS_ERR(*event)) {
			*event = NULL;
			goto exit2;
		}
		pd->pp = pp;
		pd->ph = ph;
	}
	return pe;

exit2:
	stp_for_each_cpu(cpu) {
		perfcpu *pd = per_cpu_ptr (pe->pd, cpu);
		struct perf_event **event = &(pd->event);
		if (*event) perf_event_release_kernel(*event);
	}
	_stp_free_percpu(pe->pd);
exit1:
	_stp_kfree(pe);
	return NULL;
}

/** Delete performance event.
 * Call this to shutdown performance event sampling
 *
 * @param pe
 */
static void _stp_perf_del (Perf *pe)
{
	if (pe) {
		int cpu;
		/* shut down performance event sampling */
		stp_for_each_cpu(cpu) {
			perfcpu *pd = per_cpu_ptr (pe->pd, cpu);
			struct perf_event **event = &(pd->event);
			if (*event) {
				perf_event_release_kernel(*event);
			}
		}
		_stp_free_percpu (pe->pd);
		_stp_kfree (pe);
	}
}

#endif /* _PERF_C_ */

[-- Attachment #4: perf_test2.stp --]
[-- Type: text/plain, Size: 1065 bytes --]

#! /usr/local/bin/stap -g -p4
# simple script to test out the per runtime libraries


%{
#include "perf.c"

static struct perf_event_attr cycles_attr = {
	.type		= PERF_TYPE_HARDWARE,
	.config		= PERF_COUNT_HW_CPU_CYCLES,
	{.sample_period	= 5000000}
};

static Perf *ppp;

static atomic_t s_count[NR_CPUS];

/* silly routine to count off the interrupts */
static void sample_event_handler(struct perf_event *e, int nmi,
			       struct perf_sample_data *data,
			       struct pt_regs *regs)
{
	int p = smp_processor_id();
	atomic_inc(&(s_count[p]));
}

%}

function perf_start:long ()
%{
	ppp = _stp_perf_init (&cycles_attr, sample_event_handler, NULL, NULL);
%}

function ppp_value:long ()
%{
	THIS->__retvalue = (int64_t) ppp;
%}

function perf_stop:long ()
%{
	int cpu;

	stp_for_each_cpu(cpu){
		printk(KERN_INFO "sample_event_handler cpu%d count %d\n",
				 cpu, atomic_read(&(s_count[cpu])));
	}
	/* shut down performance event sampling */
	_stp_perf_del (ppp);
%}

probe begin { perf_start(); printf("ppp = %p\n", ppp_value()); }
probe end { perf_stop() }

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [RFC] Support for performance event sampling
  2010-02-18 16:29 [RFC] Support for performance event sampling William Cohen
@ 2010-02-24 23:17 ` Josh Stone
  0 siblings, 0 replies; 2+ messages in thread
From: Josh Stone @ 2010-02-24 23:17 UTC (permalink / raw)
  To: William Cohen; +Cc: systemtap

On 02/18/2010 08:27 AM, William Cohen wrote:
> Due to check that the performance event kernel api makes the script
> will need to be run as root.

Or "echo 0 >/proc/sys/kernel/perf_counter_paranoid", but that opens it
up for everyone...

That's only for cpu-global counters though.  We could also use
task_finder and attach to user-owned processes, which requires just
ptrace-equivalent privilege.

> One difficulty is the perf_event_create_kernel_counter() allocates
> memory for the event descriptor and returns a pointer to that
> structure. [...] So the systemtap translator will need to generate a 
> unique callback for each performance event.

You have the saved perf_event*, so a shared callback could do a linear
scan to find the matching probepoint.

Josh

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2010-02-24 23:17 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-02-18 16:29 [RFC] Support for performance event sampling William Cohen
2010-02-24 23:17 ` Josh Stone

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).