perf stat continues running even if the event list contains counters
that are not supported. The resulting output then contains <not counted>
for those events which gets confusing as to which events are supported,
but not counted and which are not supported.
Before:
perf stat -ddd -- sleep 1
Performance counter stats for 'sleep 1':
0.571283 task-clock # 0.001 CPUs utilized
1 context-switches # 0.002 M/sec
0 CPU-migrations # 0.000 M/sec
157 page-faults # 0.275 M/sec
1,037,707 cycles # 1.816 GHz
<not counted> stalled-cycles-frontend
<not counted> stalled-cycles-backend
654,499 instructions # 0.63 insns per cycle
136,129 branches # 238.286 M/sec
<not counted> branch-misses
<not counted> L1-dcache-loads
<not counted> L1-dcache-load-misses
<not counted> LLC-loads
<not counted> LLC-load-misses
<not counted> L1-icache-loads
<not counted> L1-icache-load-misses
<not counted> dTLB-loads
<not counted> dTLB-load-misses
<not counted> iTLB-loads
<not counted> iTLB-load-misses
<not counted> L1-dcache-prefetches
<not counted> L1-dcache-prefetch-misses
1.001004836 seconds time elapsed
After:
perf stat -ddd -- sleep 1
Performance counter stats for 'sleep 1':
1.350326 task-clock # 0.001 CPUs utilized
2 context-switches # 0.001 M/sec
0 CPU-migrations # 0.000 M/sec
157 page-faults # 0.116 M/sec
11,986 cycles # 0.009 GHz
<not supported> stalled-cycles-frontend
<not supported> stalled-cycles-backend
496,986 instructions # 41.46 insns per cycle
138,065 branches # 102.246 M/sec
7,245 branch-misses # 5.25% of all branches
<not counted> L1-dcache-loads
<not counted> L1-dcache-load-misses
<not counted> LLC-loads
<not counted> LLC-load-misses
<not counted> L1-icache-loads
<not counted> L1-icache-load-misses
<not counted> dTLB-loads
<not counted> dTLB-load-misses
<not counted> iTLB-loads
<not counted> iTLB-load-misses
<not counted> L1-dcache-prefetches
<not supported> L1-dcache-prefetch-misses
1.002397333 seconds time elapsed
v1->v2:
changed supported type from int to bool
v2->v3
fixed vertical alignment of new struct element
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1306767359-13221-1-git-send-email-dsahern@gmail.com
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
160 lines
4.2 KiB
C
160 lines
4.2 KiB
C
#ifndef __PERF_EVSEL_H
|
|
#define __PERF_EVSEL_H 1
|
|
|
|
#include <linux/list.h>
|
|
#include <stdbool.h>
|
|
#include "../../../include/linux/perf_event.h"
|
|
#include "types.h"
|
|
#include "xyarray.h"
|
|
#include "cgroup.h"
|
|
#include "hist.h"
|
|
|
|
struct perf_counts_values {
|
|
union {
|
|
struct {
|
|
u64 val;
|
|
u64 ena;
|
|
u64 run;
|
|
};
|
|
u64 values[3];
|
|
};
|
|
};
|
|
|
|
struct perf_counts {
|
|
s8 scaled;
|
|
struct perf_counts_values aggr;
|
|
struct perf_counts_values cpu[];
|
|
};
|
|
|
|
struct perf_evsel;
|
|
|
|
/*
|
|
* Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are
|
|
* more than one entry in the evlist.
|
|
*/
|
|
struct perf_sample_id {
|
|
struct hlist_node node;
|
|
u64 id;
|
|
struct perf_evsel *evsel;
|
|
};
|
|
|
|
/** struct perf_evsel - event selector
|
|
*
|
|
* @name - Can be set to retain the original event name passed by the user,
|
|
* so that when showing results in tools such as 'perf stat', we
|
|
* show the name used, not some alias.
|
|
*/
|
|
struct perf_evsel {
|
|
struct list_head node;
|
|
struct perf_event_attr attr;
|
|
char *filter;
|
|
struct xyarray *fd;
|
|
struct xyarray *sample_id;
|
|
u64 *id;
|
|
struct perf_counts *counts;
|
|
int idx;
|
|
int ids;
|
|
struct hists hists;
|
|
char *name;
|
|
union {
|
|
void *priv;
|
|
off_t id_offset;
|
|
};
|
|
struct cgroup_sel *cgrp;
|
|
bool supported;
|
|
};
|
|
|
|
struct cpu_map;
|
|
struct thread_map;
|
|
struct perf_evlist;
|
|
|
|
struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx);
|
|
void perf_evsel__init(struct perf_evsel *evsel,
|
|
struct perf_event_attr *attr, int idx);
|
|
void perf_evsel__exit(struct perf_evsel *evsel);
|
|
void perf_evsel__delete(struct perf_evsel *evsel);
|
|
|
|
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
|
|
int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
|
|
int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
|
|
void perf_evsel__free_fd(struct perf_evsel *evsel);
|
|
void perf_evsel__free_id(struct perf_evsel *evsel);
|
|
void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
|
|
|
|
int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
|
|
struct cpu_map *cpus, bool group);
|
|
int perf_evsel__open_per_thread(struct perf_evsel *evsel,
|
|
struct thread_map *threads, bool group);
|
|
int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
|
|
struct thread_map *threads, bool group);
|
|
|
|
#define perf_evsel__match(evsel, t, c) \
|
|
(evsel->attr.type == PERF_TYPE_##t && \
|
|
evsel->attr.config == PERF_COUNT_##c)
|
|
|
|
int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
|
|
int cpu, int thread, bool scale);
|
|
|
|
/**
|
|
* perf_evsel__read_on_cpu - Read out the results on a CPU and thread
|
|
*
|
|
* @evsel - event selector to read value
|
|
* @cpu - CPU of interest
|
|
* @thread - thread of interest
|
|
*/
|
|
static inline int perf_evsel__read_on_cpu(struct perf_evsel *evsel,
|
|
int cpu, int thread)
|
|
{
|
|
return __perf_evsel__read_on_cpu(evsel, cpu, thread, false);
|
|
}
|
|
|
|
/**
|
|
* perf_evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
|
|
*
|
|
* @evsel - event selector to read value
|
|
* @cpu - CPU of interest
|
|
* @thread - thread of interest
|
|
*/
|
|
static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel,
|
|
int cpu, int thread)
|
|
{
|
|
return __perf_evsel__read_on_cpu(evsel, cpu, thread, true);
|
|
}
|
|
|
|
int __perf_evsel__read(struct perf_evsel *evsel, int ncpus, int nthreads,
|
|
bool scale);
|
|
|
|
/**
|
|
* perf_evsel__read - Read the aggregate results on all CPUs
|
|
*
|
|
* @evsel - event selector to read value
|
|
* @ncpus - Number of cpus affected, from zero
|
|
* @nthreads - Number of threads affected, from zero
|
|
*/
|
|
static inline int perf_evsel__read(struct perf_evsel *evsel,
|
|
int ncpus, int nthreads)
|
|
{
|
|
return __perf_evsel__read(evsel, ncpus, nthreads, false);
|
|
}
|
|
|
|
/**
|
|
* perf_evsel__read_scaled - Read the aggregate results on all CPUs, scaled
|
|
*
|
|
* @evsel - event selector to read value
|
|
* @ncpus - Number of cpus affected, from zero
|
|
* @nthreads - Number of threads affected, from zero
|
|
*/
|
|
static inline int perf_evsel__read_scaled(struct perf_evsel *evsel,
|
|
int ncpus, int nthreads)
|
|
{
|
|
return __perf_evsel__read(evsel, ncpus, nthreads, true);
|
|
}
|
|
|
|
int __perf_evsel__sample_size(u64 sample_type);
|
|
|
|
static inline int perf_evsel__sample_size(struct perf_evsel *evsel)
|
|
{
|
|
return __perf_evsel__sample_size(evsel->attr.sample_type);
|
|
}
|
|
|
|
#endif /* __PERF_EVSEL_H */
|