perf evsel: Steal the counter reading routines from stat
Making them hopefully generic enough to be used in 'perf test', well see. Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Tom Zanussi <tzanussi@gmail.com> LKML-Reference: <new-submission> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
		
					parent
					
						
							
								70d544d057
							
						
					
				
			
			
				commit
				
					
						c52b12ed25
					
				
			
		
					 3 changed files with 196 additions and 92 deletions
				
			
		|  | @ -93,12 +93,6 @@ static const char		*cpu_list; | |||
| static const char		*csv_sep			= NULL; | ||||
| static bool			csv_output			= false; | ||||
| 
 | ||||
| struct cpu_counts { | ||||
| 	u64 val; | ||||
| 	u64 ena; | ||||
| 	u64 run; | ||||
| }; | ||||
| 
 | ||||
| static volatile int done = 0; | ||||
| 
 | ||||
| struct stats | ||||
|  | @ -108,15 +102,11 @@ struct stats | |||
| 
 | ||||
| struct perf_stat { | ||||
| 	struct stats	  res_stats[3]; | ||||
| 	int		  scaled; | ||||
| 	struct cpu_counts cpu_counts[]; | ||||
| }; | ||||
| 
 | ||||
| static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel, int ncpus) | ||||
| static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) | ||||
| { | ||||
| 	size_t priv_size = (sizeof(struct perf_stat) + | ||||
| 			    (ncpus * sizeof(struct cpu_counts))); | ||||
| 	evsel->priv = zalloc(priv_size); | ||||
| 	evsel->priv = zalloc(sizeof(struct perf_stat)); | ||||
| 	return evsel->priv == NULL ? -ENOMEM : 0; | ||||
| } | ||||
| 
 | ||||
|  | @ -238,52 +228,14 @@ static inline int nsec_counter(struct perf_evsel *evsel) | |||
|  * Read out the results of a single counter: | ||||
|  * aggregate counts across CPUs in system-wide mode | ||||
|  */ | ||||
| static void read_counter_aggr(struct perf_evsel *counter) | ||||
| static int read_counter_aggr(struct perf_evsel *counter) | ||||
| { | ||||
| 	struct perf_stat *ps = counter->priv; | ||||
| 	u64 count[3], single_count[3]; | ||||
| 	int cpu; | ||||
| 	size_t res, nv; | ||||
| 	int scaled; | ||||
| 	int i, thread; | ||||
| 	u64 *count = counter->counts->aggr.values; | ||||
| 	int i; | ||||
| 
 | ||||
| 	count[0] = count[1] = count[2] = 0; | ||||
| 
 | ||||
| 	nv = scale ? 3 : 1; | ||||
| 	for (cpu = 0; cpu < nr_cpus; cpu++) { | ||||
| 		for (thread = 0; thread < thread_num; thread++) { | ||||
| 			if (FD(counter, cpu, thread) < 0) | ||||
| 				continue; | ||||
| 
 | ||||
| 			res = read(FD(counter, cpu, thread), | ||||
| 					single_count, nv * sizeof(u64)); | ||||
| 			assert(res == nv * sizeof(u64)); | ||||
| 
 | ||||
| 			close(FD(counter, cpu, thread)); | ||||
| 			FD(counter, cpu, thread) = -1; | ||||
| 
 | ||||
| 			count[0] += single_count[0]; | ||||
| 			if (scale) { | ||||
| 				count[1] += single_count[1]; | ||||
| 				count[2] += single_count[2]; | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	scaled = 0; | ||||
| 	if (scale) { | ||||
| 		if (count[2] == 0) { | ||||
| 			ps->scaled = -1; | ||||
| 			count[0] = 0; | ||||
| 			return; | ||||
| 		} | ||||
| 
 | ||||
| 		if (count[2] < count[1]) { | ||||
| 			ps->scaled = 1; | ||||
| 			count[0] = (unsigned long long) | ||||
| 				((double)count[0] * count[1] / count[2] + 0.5); | ||||
| 		} | ||||
| 	} | ||||
| 	if (__perf_evsel__read(counter, nr_cpus, thread_num, scale) < 0) | ||||
| 		return -1; | ||||
| 
 | ||||
| 	for (i = 0; i < 3; i++) | ||||
| 		update_stats(&ps->res_stats[i], count[i]); | ||||
|  | @ -302,46 +254,24 @@ static void read_counter_aggr(struct perf_evsel *counter) | |||
| 		update_stats(&runtime_cycles_stats[0], count[0]); | ||||
| 	if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) | ||||
| 		update_stats(&runtime_branches_stats[0], count[0]); | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Read out the results of a single counter: | ||||
|  * do not aggregate counts across CPUs in system-wide mode | ||||
|  */ | ||||
| static void read_counter(struct perf_evsel *counter) | ||||
| static int read_counter(struct perf_evsel *counter) | ||||
| { | ||||
| 	struct cpu_counts *cpu_counts = counter->priv; | ||||
| 	u64 count[3]; | ||||
| 	u64 *count; | ||||
| 	int cpu; | ||||
| 	size_t res, nv; | ||||
| 
 | ||||
| 	count[0] = count[1] = count[2] = 0; | ||||
| 
 | ||||
| 	nv = scale ? 3 : 1; | ||||
| 
 | ||||
| 	for (cpu = 0; cpu < nr_cpus; cpu++) { | ||||
| 		if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0) | ||||
| 			return -1; | ||||
| 
 | ||||
| 		if (FD(counter, cpu, 0) < 0) | ||||
| 			continue; | ||||
| 
 | ||||
| 		res = read(FD(counter, cpu, 0), count, nv * sizeof(u64)); | ||||
| 
 | ||||
| 		assert(res == nv * sizeof(u64)); | ||||
| 
 | ||||
| 		close(FD(counter, cpu, 0)); | ||||
| 		FD(counter, cpu, 0) = -1; | ||||
| 
 | ||||
| 		if (scale) { | ||||
| 			if (count[2] == 0) { | ||||
| 				count[0] = 0; | ||||
| 			} else if (count[2] < count[1]) { | ||||
| 				count[0] = (unsigned long long) | ||||
| 				((double)count[0] * count[1] / count[2] + 0.5); | ||||
| 			} | ||||
| 		} | ||||
| 		cpu_counts[cpu].val = count[0]; /* scaled count */ | ||||
| 		cpu_counts[cpu].ena = count[1]; | ||||
| 		cpu_counts[cpu].run = count[2]; | ||||
| 		count = counter->counts->cpu[cpu].values; | ||||
| 
 | ||||
| 		if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) | ||||
| 			update_stats(&runtime_nsecs_stats[cpu], count[0]); | ||||
|  | @ -350,6 +280,8 @@ static void read_counter(struct perf_evsel *counter) | |||
| 		if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) | ||||
| 			update_stats(&runtime_branches_stats[cpu], count[0]); | ||||
| 	} | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static int run_perf_stat(int argc __used, const char **argv) | ||||
|  | @ -449,12 +381,17 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
| 	update_stats(&walltime_nsecs_stats, t1 - t0); | ||||
| 
 | ||||
| 	if (no_aggr) { | ||||
| 		list_for_each_entry(counter, &evsel_list, node) | ||||
| 		list_for_each_entry(counter, &evsel_list, node) { | ||||
| 			read_counter(counter); | ||||
| 	} else { | ||||
| 		list_for_each_entry(counter, &evsel_list, node) | ||||
| 			read_counter_aggr(counter); | ||||
| 			perf_evsel__close_fd(counter, nr_cpus, 1); | ||||
| 		} | ||||
| 	} else { | ||||
| 		list_for_each_entry(counter, &evsel_list, node) { | ||||
| 			read_counter_aggr(counter); | ||||
| 			perf_evsel__close_fd(counter, nr_cpus, thread_num); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return WEXITSTATUS(status); | ||||
| } | ||||
| 
 | ||||
|  | @ -550,7 +487,7 @@ static void print_counter_aggr(struct perf_evsel *counter) | |||
| { | ||||
| 	struct perf_stat *ps = counter->priv; | ||||
| 	double avg = avg_stats(&ps->res_stats[0]); | ||||
| 	int scaled = ps->scaled; | ||||
| 	int scaled = counter->counts->scaled; | ||||
| 
 | ||||
| 	if (scaled == -1) { | ||||
| 		fprintf(stderr, "%*s%s%-24s\n", | ||||
|  | @ -590,14 +527,13 @@ static void print_counter_aggr(struct perf_evsel *counter) | |||
|  */ | ||||
| static void print_counter(struct perf_evsel *counter) | ||||
| { | ||||
| 	struct perf_stat *ps = counter->priv; | ||||
| 	u64 ena, run, val; | ||||
| 	int cpu; | ||||
| 
 | ||||
| 	for (cpu = 0; cpu < nr_cpus; cpu++) { | ||||
| 		val = ps->cpu_counts[cpu].val; | ||||
| 		ena = ps->cpu_counts[cpu].ena; | ||||
| 		run = ps->cpu_counts[cpu].run; | ||||
| 		val = counter->counts->cpu[cpu].val; | ||||
| 		ena = counter->counts->cpu[cpu].ena; | ||||
| 		run = counter->counts->cpu[cpu].run; | ||||
| 		if (run == 0 || ena == 0) { | ||||
| 			fprintf(stderr, "CPU%*d%s%*s%s%-24s", | ||||
| 				csv_output ? 0 : -4, | ||||
|  | @ -818,7 +754,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
| 	} | ||||
| 
 | ||||
| 	list_for_each_entry(pos, &evsel_list, node) { | ||||
| 		if (perf_evsel__alloc_stat_priv(pos, nr_cpus) < 0 || | ||||
| 		if (perf_evsel__alloc_stat_priv(pos) < 0 || | ||||
| 		    perf_evsel__alloc_counts(pos, nr_cpus) < 0 || | ||||
| 		    perf_evsel__alloc_fd(pos, nr_cpus, thread_num) < 0) | ||||
| 			goto out_free_fd; | ||||
| 	} | ||||
|  |  | |||
|  | @ -1,6 +1,8 @@ | |||
| #include "evsel.h" | ||||
| #include "util.h" | ||||
| 
 | ||||
| #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) | ||||
| 
 | ||||
| struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx) | ||||
| { | ||||
| 	struct perf_evsel *evsel = zalloc(sizeof(*evsel)); | ||||
|  | @ -21,15 +23,101 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) | |||
| 	return evsel->fd != NULL ? 0 : -ENOMEM; | ||||
| } | ||||
| 
 | ||||
| int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus) | ||||
| { | ||||
| 	evsel->counts = zalloc((sizeof(*evsel->counts) + | ||||
| 				(ncpus * sizeof(struct perf_counts_values)))); | ||||
| 	return evsel->counts != NULL ? 0 : -ENOMEM; | ||||
| } | ||||
| 
 | ||||
| void perf_evsel__free_fd(struct perf_evsel *evsel) | ||||
| { | ||||
| 	xyarray__delete(evsel->fd); | ||||
| 	evsel->fd = NULL; | ||||
| } | ||||
| 
 | ||||
| void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) | ||||
| { | ||||
| 	int cpu, thread; | ||||
| 
 | ||||
| 	for (cpu = 0; cpu < ncpus; cpu++) | ||||
| 		for (thread = 0; thread < nthreads; ++thread) { | ||||
| 			close(FD(evsel, cpu, thread)); | ||||
| 			FD(evsel, cpu, thread) = -1; | ||||
| 		} | ||||
| } | ||||
| 
 | ||||
| void perf_evsel__delete(struct perf_evsel *evsel) | ||||
| { | ||||
| 	assert(list_empty(&evsel->node)); | ||||
| 	xyarray__delete(evsel->fd); | ||||
| 	free(evsel); | ||||
| } | ||||
| 
 | ||||
| int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, | ||||
| 			      int cpu, int thread, bool scale) | ||||
| { | ||||
| 	struct perf_counts_values count; | ||||
| 	size_t nv = scale ? 3 : 1; | ||||
| 
 | ||||
| 	if (FD(evsel, cpu, thread) < 0) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) | ||||
| 		return -errno; | ||||
| 
 | ||||
| 	if (scale) { | ||||
| 		if (count.run == 0) | ||||
| 			count.val = 0; | ||||
| 		else if (count.run < count.ena) | ||||
| 			count.val = (u64)((double)count.val * count.ena / count.run + 0.5); | ||||
| 	} else | ||||
| 		count.ena = count.run = 0; | ||||
| 
 | ||||
| 	evsel->counts->cpu[cpu] = count; | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| int __perf_evsel__read(struct perf_evsel *evsel, | ||||
| 		       int ncpus, int nthreads, bool scale) | ||||
| { | ||||
| 	size_t nv = scale ? 3 : 1; | ||||
| 	int cpu, thread; | ||||
| 	struct perf_counts_values *aggr = &evsel->counts->aggr, count; | ||||
| 
 | ||||
| 	aggr->val = 0; | ||||
| 
 | ||||
| 	for (cpu = 0; cpu < ncpus; cpu++) { | ||||
| 		for (thread = 0; thread < nthreads; thread++) { | ||||
| 			if (FD(evsel, cpu, thread) < 0) | ||||
| 				continue; | ||||
| 
 | ||||
| 			if (readn(FD(evsel, cpu, thread), | ||||
| 				  &count, nv * sizeof(u64)) < 0) | ||||
| 				return -errno; | ||||
| 
 | ||||
| 			aggr->val += count.val; | ||||
| 			if (scale) { | ||||
| 				aggr->ena += count.ena; | ||||
| 				aggr->run += count.run; | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	evsel->counts->scaled = 0; | ||||
| 	if (scale) { | ||||
| 		if (aggr->run == 0) { | ||||
| 			evsel->counts->scaled = -1; | ||||
| 			aggr->val = 0; | ||||
| 			return 0; | ||||
| 		} | ||||
| 
 | ||||
| 		if (aggr->run < aggr->ena) { | ||||
| 			evsel->counts->scaled = 1; | ||||
| 			aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5); | ||||
| 		} | ||||
| 	} else | ||||
| 		aggr->ena = aggr->run = 0; | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
|  |  | |||
|  | @ -2,15 +2,34 @@ | |||
| #define __PERF_EVSEL_H 1 | ||||
| 
 | ||||
| #include <linux/list.h> | ||||
| #include <stdbool.h> | ||||
| #include <linux/perf_event.h> | ||||
| #include "types.h" | ||||
| #include "xyarray.h" | ||||
|   | ||||
| struct perf_counts_values { | ||||
| 	union { | ||||
| 		struct { | ||||
| 			u64 val; | ||||
| 			u64 ena; | ||||
| 			u64 run; | ||||
| 		}; | ||||
| 		u64 values[3]; | ||||
| 	}; | ||||
| }; | ||||
| 
 | ||||
| struct perf_counts { | ||||
| 	s8		   	  scaled; | ||||
| 	struct perf_counts_values aggr; | ||||
| 	struct perf_counts_values cpu[]; | ||||
| }; | ||||
| 
 | ||||
| struct perf_evsel { | ||||
| 	struct list_head	node; | ||||
| 	struct perf_event_attr	attr; | ||||
| 	char			*filter; | ||||
| 	struct xyarray		*fd; | ||||
| 	struct perf_counts	*counts; | ||||
| 	int			idx; | ||||
| 	void			*priv; | ||||
| }; | ||||
|  | @ -19,10 +38,70 @@ struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx); | |||
| void perf_evsel__delete(struct perf_evsel *evsel); | ||||
| 
 | ||||
| int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); | ||||
| int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus); | ||||
| void perf_evsel__free_fd(struct perf_evsel *evsel); | ||||
| void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); | ||||
| 
 | ||||
| #define perf_evsel__match(evsel, t, c)		\ | ||||
| 	(evsel->attr.type == PERF_TYPE_##t &&	\ | ||||
| 	 evsel->attr.config == PERF_COUNT_##c) | ||||
| 
 | ||||
| int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, | ||||
| 			      int cpu, int thread, bool scale); | ||||
| 
 | ||||
| /**
 | ||||
|  * perf_evsel__read_on_cpu - Read out the results on a CPU and thread | ||||
|  * | ||||
|  * @evsel - event selector to read value | ||||
|  * @cpu - CPU of interest | ||||
|  * @thread - thread of interest | ||||
|  */ | ||||
| static inline int perf_evsel__read_on_cpu(struct perf_evsel *evsel, | ||||
| 					  int cpu, int thread) | ||||
| { | ||||
| 	return __perf_evsel__read_on_cpu(evsel, cpu, thread, false); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * perf_evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled | ||||
|  * | ||||
|  * @evsel - event selector to read value | ||||
|  * @cpu - CPU of interest | ||||
|  * @thread - thread of interest | ||||
|  */ | ||||
| static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel, | ||||
| 						 int cpu, int thread) | ||||
| { | ||||
| 	return __perf_evsel__read_on_cpu(evsel, cpu, thread, true); | ||||
| } | ||||
| 
 | ||||
| int __perf_evsel__read(struct perf_evsel *evsel, int ncpus, int nthreads, | ||||
| 		       bool scale); | ||||
| 
 | ||||
| /**
 | ||||
|  * perf_evsel__read - Read the aggregate results on all CPUs | ||||
|  * | ||||
|  * @evsel - event selector to read value | ||||
|  * @ncpus - Number of cpus affected, from zero | ||||
|  * @nthreads - Number of threads affected, from zero | ||||
|  */ | ||||
| static inline int perf_evsel__read(struct perf_evsel *evsel, | ||||
| 				    int ncpus, int nthreads) | ||||
| { | ||||
| 	return __perf_evsel__read(evsel, ncpus, nthreads, false); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * perf_evsel__read_scaled - Read the aggregate results on all CPUs, scaled | ||||
|  * | ||||
|  * @evsel - event selector to read value | ||||
|  * @ncpus - Number of cpus affected, from zero | ||||
|  * @nthreads - Number of threads affected, from zero | ||||
|  */ | ||||
| static inline int perf_evsel__read_scaled(struct perf_evsel *evsel, | ||||
| 					  int ncpus, int nthreads) | ||||
| { | ||||
| 	return __perf_evsel__read(evsel, ncpus, nthreads, true); | ||||
| } | ||||
| 
 | ||||
| #endif /* __PERF_EVSEL_H */ | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Arnaldo Carvalho de Melo
				Arnaldo Carvalho de Melo