incorrect perf counters

public inbox for systemtap@sourceware.org
 help / color / mirror / Atom feed

* incorrect perf counters
@ 2016-07-25 19:18 riya khanna
  0 siblings, 0 replies; 11+ messages in thread
From: riya khanna @ 2016-07-25 19:18 UTC (permalink / raw)
  To: systemtap

[-- Attachment #1: Type: text/plain, Size: 992 bytes --]

Hi,

I'm trying to collect perf counters using systemtap. I followed
systemtap/EXAMPLES/profiling/perf.stp to create a script to monitor a
user process, enable counters on function entry, and spew out counters
on function return. Please find the script (perf.stp) attached.

I use the stap script described above to monitor function func()'s
entry and exit.

void func(void)
{
    printf("Measuring instruction count for this printf\n");
}

Just to verify systemtap reported counters, I created a program
(test.c) that uses perf_event_open() syscall to monitor func()'s
entry/exit and spew out counters. Please find it attached as well.

However, the counters reported by perf.stp are very different from
what is reported by test.c. For example, instruction count reported by
test.c and perf.stp are 593 and 10610-10690.

This is how I launch perf.stp to monitor func() in test.c:

sudo /usr/local/bin/stap -s1 perf.stp test func

Why such a difference? Am I missing something?

Thanks,
Riya

[-- Attachment #2: perf.stp --]
[-- Type: application/octet-stream, Size: 1929 bytes --]

#! /usr/bin/env stap
global cycles_per_insn
global branch_per_insn
global cacheref_per_insn
global insns
global cycles
global branches
global cacherefs
global insn

probe perf.hw.instructions.process(@1).counter("printf_insns") {} 
probe perf.hw.cpu_cycles.process(@1).counter("printf_cycles") {} 
probe perf.hw.branch_instructions.process(@1).counter("printf_branches") {} 
probe perf.hw.cache_references.process(@1).counter("printf_cache_refs") {} 

probe process(@1).function(@2)
{
 insn["printf_insns"] = @perf("printf_insns")
 insns <<< (insn["printf_insns"])
 insn["printf_cycles"] = @perf("printf_cycles")
 cycles <<< insn["printf_cycles"]
 insn["printf_branches"] = @perf("printf_branches")
 branches <<< insn["printf_branches"]
 insn["printf_cache_refs"] = @perf("printf_cache_refs")
 cacherefs <<< insn["printf_cache_refs"]
}


probe process(@1).function(@2).return
{
    dividend = (@perf("printf_cycles") - insn["printf_cycles"])
    printf ("cycles = %d\n\n", dividend)
    divisor =  (@perf("printf_insns") - insn["printf_insns"])
    printf ("instructions = %d\n\n", divisor)
    q = (divisor > 0 ? dividend / divisor : 0)
    if (q > 0)
	cycles_per_insn <<< q

    dividend = (@perf("printf_branches") - insn["printf_branches"])
    printf ("branches = %d\n\n", dividend)
    q = (divisor > 0 ? dividend / divisor : 0)
    if (q > 0)
	branch_per_insn <<< q

    dividend = (@perf("printf_cache_refs") - insn["printf_cache_refs"])
    printf ("cache refs = %d\n\n", dividend)
    q = (divisor > 0 ? dividend / divisor : 0)
    if (q > 0)
	cacheref_per_insn <<< q
}

probe end
{
 if (@count(cycles_per_insn)) {
   printf ("Cycles per Insn\n\n")
   print (@hist_log(cycles_per_insn))
 }
 if (@count(branch_per_insn)) {
   printf ("\nBranches per Insn\n\n")
   print (@hist_log(branch_per_insn))
 }
 if (@count(cacheref_per_insn)) {
   printf ("Cache Refs per Insn\n\n")
   print (@hist_log(cacheref_per_insn))
 }
}


[-- Attachment #3: test.c --]
[-- Type: text/x-csrc, Size: 3115 bytes --]

#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
#include <stdint.h>
#include <errno.h>

struct perf_event {
	const char *name;
	uint64_t type;
	uint64_t config;
};

#define NUM_REQ_HW_CNTRS 6
static const struct perf_event events[] =
{
	{ "Cycles",					PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES },
	{ "Instrs",					PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS },
	{ "Cache-Refs",				PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES },
	{ "Cache-Misses",			PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES },
	{ "Branches",				PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
};

static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
               int cpu, int group_fd, unsigned long flags)
{
   int ret;

   ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
                  group_fd, flags);
   return ret;
}

static int open_event(uint64_t config, uint64_t type, int group_leader)
{
  static struct perf_event_attr pe;
  pe.size = sizeof(struct perf_event_attr);
  pe.type = type;
  pe.config = config;
  if (group_leader == -1)
    pe.disabled = 1;
  pe.mmap = 1;
  pe.comm = 1;

  pe.read_format = PERF_FORMAT_GROUP;
  pe.exclude_kernel = 1;
  pe.exclude_hv = 1;

  return perf_event_open(&pe, 0, -1, group_leader, 0);
}

void perf_enable_perf(int fd)
{
	if (fd > 0) {
		ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
		ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
	} else
		fprintf(stderr, "enable failed fd: %d", fd);
}

void perf_disable_perf(int fd)
{
	if (fd > 0) {
		ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);

		int num_events = sizeof(events) / sizeof(events[0]);
		uint64_t buf[num_events + 1];

		memset(buf, 0, ((num_events + 1) * sizeof(uint64_t)));
		if (read(fd, &buf, ((num_events + 1) * sizeof(uint64_t))) < 0) {
			fprintf(stderr, "fd: %d read failed: %s", fd, strerror(errno));
		} else {
			//fprintf(stdout, "fd: %d nr_events %ld\n", fd, buf[0]);
			int idx;
			for (idx = 0; idx < buf[0]; idx++)
				fprintf(stdout, "%s: %ld\n", events[idx].name, buf[idx + 1]);
		}
	 } else
			fprintf(stderr, "disable failed fd: %d", fd);
}

int perf_init_perf(void)
{
	int idx, fd, group_fd = -1;
	int num_events = sizeof(events) / sizeof(events[0]);

	for (idx = 0; idx < num_events; idx++) {

		fd = open_event(events[idx].config, events[idx].type, group_fd);
		if (fd == -1) {
			fprintf(stderr, "Error opening event %s : %s\n",
					events[idx].name, strerror(errno));
			return -1;
		} else {
			//fprintf(stdout, "event %d open fd %d", idx, fd);
		}

		if (group_fd == -1)
			group_fd = fd;
	}

	return group_fd;
}

void perf_exit_perf(int fd)
{
	if (fd > 0)
		close(fd);
}

void func(void)
{
   	printf("Measuring instruction count for this printf\n");
}

int main(void)
{
#if DISABLE_PERF
	while (1) {
		func();
		sleep(5);
	}
#else
	int fd = perf_init_perf();
	if (fd < 0)
		return -1;

	while (1) {
		perf_enable_perf(fd);
		func();
		perf_disable_perf(fd);
		sleep(5);
	}

	perf_exit_perf(fd);
#endif
	return 0;
}

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: incorrect perf counters
  2016-07-29 16:44               ` riya khanna
@ 2016-07-29 17:00                 ` Josh Stone
  0 siblings, 0 replies; 11+ messages in thread
From: Josh Stone @ 2016-07-29 17:00 UTC (permalink / raw)
  To: riya khanna, Frank Ch. Eigler; +Cc: systemtap

On 07/29/2016 09:44 AM, riya khanna wrote:
> In tapsets.cxx, emit_perf_read_handler() emits
> "_stp_perf_read(smp_processor_id(), i)" to read perf counters values.
> However, for non-systemwide perf counters (i.e.  with stp->system_wide
> = 0), "_stp_perf_init()" calls perf_event_create_kernel_counter() with
> cpu = -1.
> 
> Does this mean that probe perf.xx.process("y").counter("z") will only
> be read on current CPU? Because I see 0s being reported for hw
> counters.

That is strange.  However, if you look at the implementation of
_stp_perf_read, it doesn't actually use that "ncpu" parameter.  Since
this is a thread event, perf itself should be saving and restoring
counter values whenever that thread reschedules, even to other cpus.

However, I'm not sure we're handling multiple threads or multiple
processes correctly.  In _stp_perf_init, it just returns if the
per_thread_event is already configured.  It appears to me that the
stap_perf_probes[] array is a fixed allocation for each counter, so then
whichever task is found first will own it exclusively.

I thought we already had a bug about this, and even fixed it, but I
can't find that information now... :/

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: incorrect perf counters
  2016-07-27 23:10             ` riya khanna
@ 2016-07-29 16:44               ` riya khanna
  2016-07-29 17:00                 ` Josh Stone
  0 siblings, 1 reply; 11+ messages in thread
From: riya khanna @ 2016-07-29 16:44 UTC (permalink / raw)
  To: Frank Ch. Eigler; +Cc: Josh Stone, systemtap

In tapsets.cxx, emit_perf_read_handler() emits
"_stp_perf_read(smp_processor_id(), i)" to read perf counters values.
However, for non-systemwide perf counters (i.e.  with stp->system_wide
= 0), "_stp_perf_init()" calls perf_event_create_kernel_counter() with
cpu = -1.

Does this mean that probe perf.xx.process("y").counter("z") will only
be read on current CPU? Because I see 0s being reported for hw
counters.

On Wed, Jul 27, 2016 at 7:10 PM, riya khanna <riyakhanna1983@gmail.com> wrote:
> Thanks!
>
> This is how I'm measuring perf counters delta (attached perf.stp).
> However, this may add unnecessary overhead.
>
> Additionally, perf.stp seems to work correctly when I attach to a
> process name with single PID. However, if I attach to a process name
> (e.g. apache httpd) that has multiple PIDs (e.g. apache workers, some
> spawned at runtime to serve requests), then counters reported are
> incorrect (sometimes 0). "perf stat" tool reports correct counters.
>
> I was wondering if "probe perf" collects per-cpu counters or across
> all CPUs? Is the perf state transferred/saved upon cpu/context switch?
> How can I accomplish this similar to what "perf stat" command line
> tool does?
>
> -Riya
>
> On Wed, Jul 27, 2016 at 11:59 AM, Frank Ch. Eigler <fche@redhat.com> wrote:
>>
>> riyakhanna1983 wrote:
>>
>>> Is is possible to enable/disable counters on a function entry/exit?
>>
>> Enabling/disabling is a relatively large amount of work.  If you just
>> want a *delta* between function entry/exit, you could accumulate that
>> thusly:
>>
>> stap -e '
>> global counts
>> probe perf.sw.cpu_clock.counter("foo") {}
>> probe process.function("*").return { counts <<< @perf("foo") - @entry(@perf("foo")) }
>> ' -c /bin/ls
>>
>> ... but actually it seems we're not implementing @entry(@perf(...))
>> correctly just now, so you may need to do this instead:
>>
>> global counts, zoo%
>> probe perf.sw.cpu_clock.counter("foo") {}
>> probe process.function("main").call { zoo[tid()] = @perf("foo") }
>> probe process.function("main").return { counts <<< @perf("foo") - zoo[tid()] }
>> ' -c /bin/ls
>>
>> - FChE

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: incorrect perf counters
  2016-07-27 15:59           ` Frank Ch. Eigler
@ 2016-07-27 23:10             ` riya khanna
  2016-07-29 16:44               ` riya khanna
  0 siblings, 1 reply; 11+ messages in thread
From: riya khanna @ 2016-07-27 23:10 UTC (permalink / raw)
  To: Frank Ch. Eigler; +Cc: Josh Stone, systemtap

[-- Attachment #1: Type: text/plain, Size: 1567 bytes --]

Thanks!

This is how I'm measuring perf counters delta (attached perf.stp).
However, this may add unnecessary overhead.

Additionally, perf.stp seems to work correctly when I attach to a
process name with single PID. However, if I attach to a process name
(e.g. apache httpd) that has multiple PIDs (e.g. apache workers, some
spawned at runtime to serve requests), then counters reported are
incorrect (sometimes 0). "perf stat" tool reports correct counters.

I was wondering if "probe perf" collects per-cpu counters or across
all CPUs? Is the perf state transferred/saved upon cpu/context switch?
How can I accomplish this similar to what "perf stat" command line
tool does?

-Riya

On Wed, Jul 27, 2016 at 11:59 AM, Frank Ch. Eigler <fche@redhat.com> wrote:
>
> riyakhanna1983 wrote:
>
>> Is is possible to enable/disable counters on a function entry/exit?
>
> Enabling/disabling is a relatively large amount of work.  If you just
> want a *delta* between function entry/exit, you could accumulate that
> thusly:
>
> stap -e '
> global counts
> probe perf.sw.cpu_clock.counter("foo") {}
> probe process.function("*").return { counts <<< @perf("foo") - @entry(@perf("foo")) }
> ' -c /bin/ls
>
> ... but actually it seems we're not implementing @entry(@perf(...))
> correctly just now, so you may need to do this instead:
>
> global counts, zoo%
> probe perf.sw.cpu_clock.counter("foo") {}
> probe process.function("main").call { zoo[tid()] = @perf("foo") }
> probe process.function("main").return { counts <<< @perf("foo") - zoo[tid()] }
> ' -c /bin/ls
>
> - FChE

[-- Attachment #2: perf.stp --]
[-- Type: application/octet-stream, Size: 1929 bytes --]

#! /usr/bin/env stap
global cycles_per_insn
global branch_per_insn
global cacheref_per_insn
global insns
global cycles
global branches
global cacherefs
global insn

probe perf.hw.instructions.process(@1).counter("printf_insns") {} 
probe perf.hw.cpu_cycles.process(@1).counter("printf_cycles") {} 
probe perf.hw.branch_instructions.process(@1).counter("printf_branches") {} 
probe perf.hw.cache_references.process(@1).counter("printf_cache_refs") {} 

probe process(@1).function(@2)
{
 insn["printf_insns"] = @perf("printf_insns")
 insns <<< (insn["printf_insns"])
 insn["printf_cycles"] = @perf("printf_cycles")
 cycles <<< insn["printf_cycles"]
 insn["printf_branches"] = @perf("printf_branches")
 branches <<< insn["printf_branches"]
 insn["printf_cache_refs"] = @perf("printf_cache_refs")
 cacherefs <<< insn["printf_cache_refs"]
}


probe process(@1).function(@2).return
{
    dividend = (@perf("printf_cycles") - insn["printf_cycles"])
    printf ("cycles = %d\n\n", dividend)
    divisor =  (@perf("printf_insns") - insn["printf_insns"])
    printf ("instructions = %d\n\n", divisor)
    q = (divisor > 0 ? dividend / divisor : 0)
    if (q > 0)
	cycles_per_insn <<< q

    dividend = (@perf("printf_branches") - insn["printf_branches"])
    printf ("branches = %d\n\n", dividend)
    q = (divisor > 0 ? dividend / divisor : 0)
    if (q > 0)
	branch_per_insn <<< q

    dividend = (@perf("printf_cache_refs") - insn["printf_cache_refs"])
    printf ("cache refs = %d\n\n", dividend)
    q = (divisor > 0 ? dividend / divisor : 0)
    if (q > 0)
	cacheref_per_insn <<< q
}

probe end
{
 if (@count(cycles_per_insn)) {
   printf ("Cycles per Insn\n\n")
   print (@hist_log(cycles_per_insn))
 }
 if (@count(branch_per_insn)) {
   printf ("\nBranches per Insn\n\n")
   print (@hist_log(branch_per_insn))
 }
 if (@count(cacheref_per_insn)) {
   printf ("Cache Refs per Insn\n\n")
   print (@hist_log(cacheref_per_insn))
 }
}


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: incorrect perf counters
  2016-07-26 20:36         ` riya khanna
@ 2016-07-27 15:59           ` Frank Ch. Eigler
  2016-07-27 23:10             ` riya khanna
  0 siblings, 1 reply; 11+ messages in thread
From: Frank Ch. Eigler @ 2016-07-27 15:59 UTC (permalink / raw)
  To: riya khanna; +Cc: Josh Stone, systemtap


riyakhanna1983 wrote:

> Is is possible to enable/disable counters on a function entry/exit?

Enabling/disabling is a relatively large amount of work.  If you just
want a *delta* between function entry/exit, you could accumulate that
thusly:

stap -e '
global counts
probe perf.sw.cpu_clock.counter("foo") {}
probe process.function("*").return { counts <<< @perf("foo") - @entry(@perf("foo")) }
' -c /bin/ls

... but actually it seems we're not implementing @entry(@perf(...))
correctly just now, so you may need to do this instead:

global counts, zoo%
probe perf.sw.cpu_clock.counter("foo") {}
probe process.function("main").call { zoo[tid()] = @perf("foo") }
probe process.function("main").return { counts <<< @perf("foo") - zoo[tid()] }
' -c /bin/ls

- FChE

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: incorrect perf counters
  2016-07-25 22:52       ` riya khanna
@ 2016-07-26 20:36         ` riya khanna
  2016-07-27 15:59           ` Frank Ch. Eigler
  0 siblings, 1 reply; 11+ messages in thread
From: riya khanna @ 2016-07-26 20:36 UTC (permalink / raw)
  To: Josh Stone; +Cc: systemtap

Is is possible to enable/disable counters on a function entry/exit?

On Mon, Jul 25, 2016 at 6:52 PM, riya khanna <riyakhanna1983@gmail.com> wrote:
> Thanks! changing attr in runtime/linux/perf.c fixes the problem.
>
> On Mon, Jul 25, 2016 at 5:08 PM, Josh Stone <jistone@redhat.com> wrote:
>> On 07/25/2016 01:55 PM, riya khanna wrote:
>>> Thanks!
>>>
>>> Is there a quick way to do that? Perhaps modify tapset? I make
>>> necessary changes and test.
>>
>> For a quick hack to see if it gets values you like, you could tweak
>> stp->attr in runtime/linux/perf.c, just before the calls to
>> perf_event_create_kernel_counter().  You don't need to recompile
>> systemtap for this, as runtime code is freshly compiled into the module
>> generated for your script.
>>
>> A more permanent solution would affect how the .attr is written in
>> tapset-perfmon.cxx, perf_derived_probe_group::emit_module_decls().
>>
>>> On Mon, Jul 25, 2016 at 3:45 PM, Josh Stone <jistone@redhat.com> wrote:
>>>> On 07/25/2016 12:15 PM, riya khanna wrote:
>>>>> Hi,
>>>>>
>>>>> I'm trying to collect perf counters using systemtap. I followed
>>>>> systemtap/EXAMPLES/profiling/perf.stp to create a script to monitor a
>>>>> user process, enable counters on function entry, and spew out counters
>>>>> on function return. Please find the script (perf.stp) attached.
>>>>>
>>>>> I use the stap script described above to monitor function func()'s
>>>>> entry and exit.
>>>>>
>>>>> void func(void)
>>>>> {
>>>>>     printf("Measuring instruction count for this printf\n");
>>>>> }
>>>>>
>>>>> Just to verify systemtap reported counters, I created a program
>>>>> (test.c) that uses perf_event_open() syscall to monitor func()'s
>>>>> entry/exit and spew out counters. Please find it attached as well.
>>>>>
>>>>> However, the counters reported by perf.stp are very different from
>>>>> what is reported by test.c. For example, instruction count reported by
>>>>> test.c and perf.stp are 593 and 10610-10690.
>>>>>
>>>>> This is how I launch perf.stp to monitor func() in test.c:
>>>>>
>>>>> sudo /usr/local/bin/stap -s1 perf.stp test func
>>>>>
>>>>> Why such a difference? Am I missing something?
>>>>
>>>> I suspect it's because your calls set exclude_kernel and exclude_hv, and
>>>> systemtap doesn't do this.
>>>>
>>>> Perhaps we should do this for the .process variant, or at least make it
>>>> an option to count only userspace.
>>>>
>>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: incorrect perf counters
  2016-07-25 21:08     ` Josh Stone
@ 2016-07-25 22:52       ` riya khanna
  2016-07-26 20:36         ` riya khanna
  0 siblings, 1 reply; 11+ messages in thread
From: riya khanna @ 2016-07-25 22:52 UTC (permalink / raw)
  To: Josh Stone; +Cc: systemtap

Thanks! changing attr in runtime/linux/perf.c fixes the problem.

On Mon, Jul 25, 2016 at 5:08 PM, Josh Stone <jistone@redhat.com> wrote:
> On 07/25/2016 01:55 PM, riya khanna wrote:
>> Thanks!
>>
>> Is there a quick way to do that? Perhaps modify tapset? I make
>> necessary changes and test.
>
> For a quick hack to see if it gets values you like, you could tweak
> stp->attr in runtime/linux/perf.c, just before the calls to
> perf_event_create_kernel_counter().  You don't need to recompile
> systemtap for this, as runtime code is freshly compiled into the module
> generated for your script.
>
> A more permanent solution would affect how the .attr is written in
> tapset-perfmon.cxx, perf_derived_probe_group::emit_module_decls().
>
>> On Mon, Jul 25, 2016 at 3:45 PM, Josh Stone <jistone@redhat.com> wrote:
>>> On 07/25/2016 12:15 PM, riya khanna wrote:
>>>> Hi,
>>>>
>>>> I'm trying to collect perf counters using systemtap. I followed
>>>> systemtap/EXAMPLES/profiling/perf.stp to create a script to monitor a
>>>> user process, enable counters on function entry, and spew out counters
>>>> on function return. Please find the script (perf.stp) attached.
>>>>
>>>> I use the stap script described above to monitor function func()'s
>>>> entry and exit.
>>>>
>>>> void func(void)
>>>> {
>>>>     printf("Measuring instruction count for this printf\n");
>>>> }
>>>>
>>>> Just to verify systemtap reported counters, I created a program
>>>> (test.c) that uses perf_event_open() syscall to monitor func()'s
>>>> entry/exit and spew out counters. Please find it attached as well.
>>>>
>>>> However, the counters reported by perf.stp are very different from
>>>> what is reported by test.c. For example, instruction count reported by
>>>> test.c and perf.stp are 593 and 10610-10690.
>>>>
>>>> This is how I launch perf.stp to monitor func() in test.c:
>>>>
>>>> sudo /usr/local/bin/stap -s1 perf.stp test func
>>>>
>>>> Why such a difference? Am I missing something?
>>>
>>> I suspect it's because your calls set exclude_kernel and exclude_hv, and
>>> systemtap doesn't do this.
>>>
>>> Perhaps we should do this for the .process variant, or at least make it
>>> an option to count only userspace.
>>>
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: incorrect perf counters
  2016-07-25 20:55   ` riya khanna
@ 2016-07-25 21:08     ` Josh Stone
  2016-07-25 22:52       ` riya khanna
  0 siblings, 1 reply; 11+ messages in thread
From: Josh Stone @ 2016-07-25 21:08 UTC (permalink / raw)
  To: riya khanna; +Cc: systemtap

On 07/25/2016 01:55 PM, riya khanna wrote:
> Thanks!
> 
> Is there a quick way to do that? Perhaps modify tapset? I make
> necessary changes and test.

For a quick hack to see if it gets values you like, you could tweak
stp->attr in runtime/linux/perf.c, just before the calls to
perf_event_create_kernel_counter().  You don't need to recompile
systemtap for this, as runtime code is freshly compiled into the module
generated for your script.

A more permanent solution would affect how the .attr is written in
tapset-perfmon.cxx, perf_derived_probe_group::emit_module_decls().

> On Mon, Jul 25, 2016 at 3:45 PM, Josh Stone <jistone@redhat.com> wrote:
>> On 07/25/2016 12:15 PM, riya khanna wrote:
>>> Hi,
>>>
>>> I'm trying to collect perf counters using systemtap. I followed
>>> systemtap/EXAMPLES/profiling/perf.stp to create a script to monitor a
>>> user process, enable counters on function entry, and spew out counters
>>> on function return. Please find the script (perf.stp) attached.
>>>
>>> I use the stap script described above to monitor function func()'s
>>> entry and exit.
>>>
>>> void func(void)
>>> {
>>>     printf("Measuring instruction count for this printf\n");
>>> }
>>>
>>> Just to verify systemtap reported counters, I created a program
>>> (test.c) that uses perf_event_open() syscall to monitor func()'s
>>> entry/exit and spew out counters. Please find it attached as well.
>>>
>>> However, the counters reported by perf.stp are very different from
>>> what is reported by test.c. For example, instruction count reported by
>>> test.c and perf.stp are 593 and 10610-10690.
>>>
>>> This is how I launch perf.stp to monitor func() in test.c:
>>>
>>> sudo /usr/local/bin/stap -s1 perf.stp test func
>>>
>>> Why such a difference? Am I missing something?
>>
>> I suspect it's because your calls set exclude_kernel and exclude_hv, and
>> systemtap doesn't do this.
>>
>> Perhaps we should do this for the .process variant, or at least make it
>> an option to count only userspace.
>>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: incorrect perf counters
  2016-07-25 19:45 ` Josh Stone
@ 2016-07-25 20:55   ` riya khanna
  2016-07-25 21:08     ` Josh Stone
  0 siblings, 1 reply; 11+ messages in thread
From: riya khanna @ 2016-07-25 20:55 UTC (permalink / raw)
  To: Josh Stone; +Cc: systemtap

Thanks!

Is there a quick way to do that? Perhaps modify tapset? I make
necessary changes and test.

On Mon, Jul 25, 2016 at 3:45 PM, Josh Stone <jistone@redhat.com> wrote:
> On 07/25/2016 12:15 PM, riya khanna wrote:
>> Hi,
>>
>> I'm trying to collect perf counters using systemtap. I followed
>> systemtap/EXAMPLES/profiling/perf.stp to create a script to monitor a
>> user process, enable counters on function entry, and spew out counters
>> on function return. Please find the script (perf.stp) attached.
>>
>> I use the stap script described above to monitor function func()'s
>> entry and exit.
>>
>> void func(void)
>> {
>>     printf("Measuring instruction count for this printf\n");
>> }
>>
>> Just to verify systemtap reported counters, I created a program
>> (test.c) that uses perf_event_open() syscall to monitor func()'s
>> entry/exit and spew out counters. Please find it attached as well.
>>
>> However, the counters reported by perf.stp are very different from
>> what is reported by test.c. For example, instruction count reported by
>> test.c and perf.stp are 593 and 10610-10690.
>>
>> This is how I launch perf.stp to monitor func() in test.c:
>>
>> sudo /usr/local/bin/stap -s1 perf.stp test func
>>
>> Why such a difference? Am I missing something?
>
> I suspect it's because your calls set exclude_kernel and exclude_hv, and
> systemtap doesn't do this.
>
> Perhaps we should do this for the .process variant, or at least make it
> an option to count only userspace.
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: incorrect perf counters
  2016-07-25 19:15 riya khanna
@ 2016-07-25 19:45 ` Josh Stone
  2016-07-25 20:55   ` riya khanna
  0 siblings, 1 reply; 11+ messages in thread
From: Josh Stone @ 2016-07-25 19:45 UTC (permalink / raw)
  To: systemtap

On 07/25/2016 12:15 PM, riya khanna wrote:
> Hi,
> 
> I'm trying to collect perf counters using systemtap. I followed
> systemtap/EXAMPLES/profiling/perf.stp to create a script to monitor a
> user process, enable counters on function entry, and spew out counters
> on function return. Please find the script (perf.stp) attached.
> 
> I use the stap script described above to monitor function func()'s
> entry and exit.
> 
> void func(void)
> {
>     printf("Measuring instruction count for this printf\n");
> }
> 
> Just to verify systemtap reported counters, I created a program
> (test.c) that uses perf_event_open() syscall to monitor func()'s
> entry/exit and spew out counters. Please find it attached as well.
> 
> However, the counters reported by perf.stp are very different from
> what is reported by test.c. For example, instruction count reported by
> test.c and perf.stp are 593 and 10610-10690.
> 
> This is how I launch perf.stp to monitor func() in test.c:
> 
> sudo /usr/local/bin/stap -s1 perf.stp test func
> 
> Why such a difference? Am I missing something?

I suspect it's because your calls set exclude_kernel and exclude_hv, and
systemtap doesn't do this.

Perhaps we should do this for the .process variant, or at least make it
an option to count only userspace.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* incorrect perf counters
@ 2016-07-25 19:15 riya khanna
  2016-07-25 19:45 ` Josh Stone
  0 siblings, 1 reply; 11+ messages in thread
From: riya khanna @ 2016-07-25 19:15 UTC (permalink / raw)
  To: systemtap

[-- Attachment #1: Type: text/plain, Size: 992 bytes --]

Hi,

I'm trying to collect perf counters using systemtap. I followed
systemtap/EXAMPLES/profiling/perf.stp to create a script to monitor a
user process, enable counters on function entry, and spew out counters
on function return. Please find the script (perf.stp) attached.

I use the stap script described above to monitor function func()'s
entry and exit.

void func(void)
{
    printf("Measuring instruction count for this printf\n");
}

Just to verify systemtap reported counters, I created a program
(test.c) that uses perf_event_open() syscall to monitor func()'s
entry/exit and spew out counters. Please find it attached as well.

However, the counters reported by perf.stp are very different from
what is reported by test.c. For example, instruction count reported by
test.c and perf.stp are 593 and 10610-10690.

This is how I launch perf.stp to monitor func() in test.c:

sudo /usr/local/bin/stap -s1 perf.stp test func

Why such a difference? Am I missing something?

Thanks,
Riya

[-- Attachment #2: perf.stp --]
[-- Type: application/octet-stream, Size: 1929 bytes --]

#! /usr/bin/env stap
global cycles_per_insn
global branch_per_insn
global cacheref_per_insn
global insns
global cycles
global branches
global cacherefs
global insn

probe perf.hw.instructions.process(@1).counter("printf_insns") {} 
probe perf.hw.cpu_cycles.process(@1).counter("printf_cycles") {} 
probe perf.hw.branch_instructions.process(@1).counter("printf_branches") {} 
probe perf.hw.cache_references.process(@1).counter("printf_cache_refs") {} 

probe process(@1).function(@2)
{
 insn["printf_insns"] = @perf("printf_insns")
 insns <<< (insn["printf_insns"])
 insn["printf_cycles"] = @perf("printf_cycles")
 cycles <<< insn["printf_cycles"]
 insn["printf_branches"] = @perf("printf_branches")
 branches <<< insn["printf_branches"]
 insn["printf_cache_refs"] = @perf("printf_cache_refs")
 cacherefs <<< insn["printf_cache_refs"]
}


probe process(@1).function(@2).return
{
    dividend = (@perf("printf_cycles") - insn["printf_cycles"])
    printf ("cycles = %d\n\n", dividend)
    divisor =  (@perf("printf_insns") - insn["printf_insns"])
    printf ("instructions = %d\n\n", divisor)
    q = (divisor > 0 ? dividend / divisor : 0)
    if (q > 0)
	cycles_per_insn <<< q

    dividend = (@perf("printf_branches") - insn["printf_branches"])
    printf ("branches = %d\n\n", dividend)
    q = (divisor > 0 ? dividend / divisor : 0)
    if (q > 0)
	branch_per_insn <<< q

    dividend = (@perf("printf_cache_refs") - insn["printf_cache_refs"])
    printf ("cache refs = %d\n\n", dividend)
    q = (divisor > 0 ? dividend / divisor : 0)
    if (q > 0)
	cacheref_per_insn <<< q
}

probe end
{
 if (@count(cycles_per_insn)) {
   printf ("Cycles per Insn\n\n")
   print (@hist_log(cycles_per_insn))
 }
 if (@count(branch_per_insn)) {
   printf ("\nBranches per Insn\n\n")
   print (@hist_log(branch_per_insn))
 }
 if (@count(cacheref_per_insn)) {
   printf ("Cache Refs per Insn\n\n")
   print (@hist_log(cacheref_per_insn))
 }
}


[-- Attachment #3: test.c --]
[-- Type: text/x-csrc, Size: 3115 bytes --]

#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
#include <stdint.h>
#include <errno.h>

struct perf_event {
	const char *name;
	uint64_t type;
	uint64_t config;
};

#define NUM_REQ_HW_CNTRS 6
static const struct perf_event events[] =
{
	{ "Cycles",					PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES },
	{ "Instrs",					PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS },
	{ "Cache-Refs",				PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES },
	{ "Cache-Misses",			PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES },
	{ "Branches",				PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
};

static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
               int cpu, int group_fd, unsigned long flags)
{
   int ret;

   ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
                  group_fd, flags);
   return ret;
}

static int open_event(uint64_t config, uint64_t type, int group_leader)
{
  static struct perf_event_attr pe;
  pe.size = sizeof(struct perf_event_attr);
  pe.type = type;
  pe.config = config;
  if (group_leader == -1)
    pe.disabled = 1;
  pe.mmap = 1;
  pe.comm = 1;

  pe.read_format = PERF_FORMAT_GROUP;
  pe.exclude_kernel = 1;
  pe.exclude_hv = 1;

  return perf_event_open(&pe, 0, -1, group_leader, 0);
}

void perf_enable_perf(int fd)
{
	if (fd > 0) {
		ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
		ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
	} else
		fprintf(stderr, "enable failed fd: %d", fd);
}

void perf_disable_perf(int fd)
{
	if (fd > 0) {
		ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);

		int num_events = sizeof(events) / sizeof(events[0]);
		uint64_t buf[num_events + 1];

		memset(buf, 0, ((num_events + 1) * sizeof(uint64_t)));
		if (read(fd, &buf, ((num_events + 1) * sizeof(uint64_t))) < 0) {
			fprintf(stderr, "fd: %d read failed: %s", fd, strerror(errno));
		} else {
			//fprintf(stdout, "fd: %d nr_events %ld\n", fd, buf[0]);
			int idx;
			for (idx = 0; idx < buf[0]; idx++)
				fprintf(stdout, "%s: %ld\n", events[idx].name, buf[idx + 1]);
		}
	 } else
			fprintf(stderr, "disable failed fd: %d", fd);
}

int perf_init_perf(void)
{
	int idx, fd, group_fd = -1;
	int num_events = sizeof(events) / sizeof(events[0]);

	for (idx = 0; idx < num_events; idx++) {

		fd = open_event(events[idx].config, events[idx].type, group_fd);
		if (fd == -1) {
			fprintf(stderr, "Error opening event %s : %s\n",
					events[idx].name, strerror(errno));
			return -1;
		} else {
			//fprintf(stdout, "event %d open fd %d", idx, fd);
		}

		if (group_fd == -1)
			group_fd = fd;
	}

	return group_fd;
}

void perf_exit_perf(int fd)
{
	if (fd > 0)
		close(fd);
}

void func(void)
{
   	printf("Measuring instruction count for this printf\n");
}

int main(void)
{
#if DISABLE_PERF
	while (1) {
		func();
		sleep(5);
	}
#else
	int fd = perf_init_perf();
	if (fd < 0)
		return -1;

	while (1) {
		perf_enable_perf(fd);
		func();
		perf_disable_perf(fd);
		sleep(5);
	}

	perf_exit_perf(fd);
#endif
	return 0;
}

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2016-07-29 17:00 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-07-25 19:18 incorrect perf counters riya khanna
  -- strict thread matches above, loose matches on Subject: below --
2016-07-25 19:15 riya khanna
2016-07-25 19:45 ` Josh Stone
2016-07-25 20:55   ` riya khanna
2016-07-25 21:08     ` Josh Stone
2016-07-25 22:52       ` riya khanna
2016-07-26 20:36         ` riya khanna
2016-07-27 15:59           ` Frank Ch. Eigler
2016-07-27 23:10             ` riya khanna
2016-07-29 16:44               ` riya khanna
2016-07-29 17:00                 ` Josh Stone

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).