 b10db7f0d2
			
		
	
	
	b10db7f0d2
	
	
	
		
			
			I was confused by FSEC = 10^15 NSEC statement, plus small whitespace fixes. When there's copyright, there should be GPL. Signed-off-by: Pavel Machek <pavel@suse.cz> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
		
			
				
	
	
		
			426 lines
		
	
	
	
		
			9.8 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			426 lines
		
	
	
	
		
			9.8 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * kernel/time/timer_stats.c
 | |
|  *
 | |
|  * Collect timer usage statistics.
 | |
|  *
 | |
|  * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar
 | |
|  * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
 | |
|  *
 | |
|  * timer_stats is based on timer_top, a similar functionality which was part of
 | |
|  * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the
 | |
|  * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based
 | |
|  * on dynamic allocation of the statistics entries and linear search based
 | |
|  * lookup combined with a global lock, rather than the static array, hash
 | |
|  * and per-CPU locking which is used by timer_stats. It was written for the
 | |
|  * pre hrtimer kernel code and therefore did not take hrtimers into account.
 | |
|  * Nevertheless it provided the base for the timer_stats implementation and
 | |
|  * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks
 | |
|  * for this effort.
 | |
|  *
 | |
|  * timer_top.c is
 | |
|  *	Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus
 | |
|  *	Written by Daniel Petrini <d.pensator@gmail.com>
 | |
|  *	timer_top.c was released under the GNU General Public License version 2
 | |
|  *
 | |
|  * We export the addresses and counting of timer functions being called,
 | |
|  * the pid and cmdline from the owner process if applicable.
 | |
|  *
 | |
|  * Start/stop data collection:
 | |
|  * # echo [1|0] >/proc/timer_stats
 | |
|  *
 | |
|  * Display the information collected so far:
 | |
|  * # cat /proc/timer_stats
 | |
|  *
 | |
|  * This program is free software; you can redistribute it and/or modify
 | |
|  * it under the terms of the GNU General Public License version 2 as
 | |
|  * published by the Free Software Foundation.
 | |
|  */
 | |
| 
 | |
| #include <linux/proc_fs.h>
 | |
| #include <linux/module.h>
 | |
| #include <linux/spinlock.h>
 | |
| #include <linux/sched.h>
 | |
| #include <linux/seq_file.h>
 | |
| #include <linux/kallsyms.h>
 | |
| 
 | |
| #include <asm/uaccess.h>
 | |
| 
 | |
| /*
 | |
|  * This is our basic unit of interest: a timer expiry event identified
 | |
|  * by the timer, its start/expire functions and the PID of the task that
 | |
|  * started the timer. We count the number of times an event happens:
 | |
|  */
 | |
| struct entry {
 | |
| 	/*
 | |
| 	 * Hash list:
 | |
| 	 */
 | |
| 	struct entry		*next;
 | |
| 
 | |
| 	/*
 | |
| 	 * Hash keys:
 | |
| 	 */
 | |
| 	void			*timer;
 | |
| 	void			*start_func;
 | |
| 	void			*expire_func;
 | |
| 	pid_t			pid;
 | |
| 
 | |
| 	/*
 | |
| 	 * Number of timeout events:
 | |
| 	 */
 | |
| 	unsigned long		count;
 | |
| 	unsigned int		timer_flag;
 | |
| 
 | |
| 	/*
 | |
| 	 * We save the command-line string to preserve
 | |
| 	 * this information past task exit:
 | |
| 	 */
 | |
| 	char			comm[TASK_COMM_LEN + 1];
 | |
| 
 | |
| } ____cacheline_aligned_in_smp;
 | |
| 
 | |
| /*
 | |
|  * Spinlock protecting the tables - not taken during lookup:
 | |
|  */
 | |
| static DEFINE_SPINLOCK(table_lock);
 | |
| 
 | |
| /*
 | |
|  * Per-CPU lookup locks for fast hash lookup:
 | |
|  */
 | |
| static DEFINE_PER_CPU(spinlock_t, lookup_lock);
 | |
| 
 | |
| /*
 | |
|  * Mutex to serialize state changes with show-stats activities:
 | |
|  */
 | |
| static DEFINE_MUTEX(show_mutex);
 | |
| 
 | |
| /*
 | |
|  * Collection status, active/inactive:
 | |
|  */
 | |
| static int __read_mostly active;
 | |
| 
 | |
| /*
 | |
|  * Beginning/end timestamps of measurement:
 | |
|  */
 | |
| static ktime_t time_start, time_stop;
 | |
| 
 | |
| /*
 | |
|  * tstat entry structs only get allocated while collection is
 | |
|  * active and never freed during that time - this simplifies
 | |
|  * things quite a bit.
 | |
|  *
 | |
|  * They get freed when a new collection period is started.
 | |
|  */
 | |
| #define MAX_ENTRIES_BITS	10
 | |
| #define MAX_ENTRIES		(1UL << MAX_ENTRIES_BITS)
 | |
| 
 | |
| static unsigned long nr_entries;
 | |
| static struct entry entries[MAX_ENTRIES];
 | |
| 
 | |
| static atomic_t overflow_count;
 | |
| 
 | |
| /*
 | |
|  * The entries are in a hash-table, for fast lookup:
 | |
|  */
 | |
| #define TSTAT_HASH_BITS		(MAX_ENTRIES_BITS - 1)
 | |
| #define TSTAT_HASH_SIZE		(1UL << TSTAT_HASH_BITS)
 | |
| #define TSTAT_HASH_MASK		(TSTAT_HASH_SIZE - 1)
 | |
| 
 | |
| #define __tstat_hashfn(entry)						\
 | |
| 	(((unsigned long)(entry)->timer       ^				\
 | |
| 	  (unsigned long)(entry)->start_func  ^				\
 | |
| 	  (unsigned long)(entry)->expire_func ^				\
 | |
| 	  (unsigned long)(entry)->pid		) & TSTAT_HASH_MASK)
 | |
| 
 | |
| #define tstat_hashentry(entry)	(tstat_hash_table + __tstat_hashfn(entry))
 | |
| 
 | |
| static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly;
 | |
| 
 | |
| static void reset_entries(void)
 | |
| {
 | |
| 	nr_entries = 0;
 | |
| 	memset(entries, 0, sizeof(entries));
 | |
| 	memset(tstat_hash_table, 0, sizeof(tstat_hash_table));
 | |
| 	atomic_set(&overflow_count, 0);
 | |
| }
 | |
| 
 | |
| static struct entry *alloc_entry(void)
 | |
| {
 | |
| 	if (nr_entries >= MAX_ENTRIES)
 | |
| 		return NULL;
 | |
| 
 | |
| 	return entries + nr_entries++;
 | |
| }
 | |
| 
 | |
| static int match_entries(struct entry *entry1, struct entry *entry2)
 | |
| {
 | |
| 	return entry1->timer       == entry2->timer	  &&
 | |
| 	       entry1->start_func  == entry2->start_func  &&
 | |
| 	       entry1->expire_func == entry2->expire_func &&
 | |
| 	       entry1->pid	   == entry2->pid;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Look up whether an entry matching this item is present
 | |
|  * in the hash already. Must be called with irqs off and the
 | |
|  * lookup lock held:
 | |
|  */
 | |
| static struct entry *tstat_lookup(struct entry *entry, char *comm)
 | |
| {
 | |
| 	struct entry **head, *curr, *prev;
 | |
| 
 | |
| 	head = tstat_hashentry(entry);
 | |
| 	curr = *head;
 | |
| 
 | |
| 	/*
 | |
| 	 * The fastpath is when the entry is already hashed,
 | |
| 	 * we do this with the lookup lock held, but with the
 | |
| 	 * table lock not held:
 | |
| 	 */
 | |
| 	while (curr) {
 | |
| 		if (match_entries(curr, entry))
 | |
| 			return curr;
 | |
| 
 | |
| 		curr = curr->next;
 | |
| 	}
 | |
| 	/*
 | |
| 	 * Slowpath: allocate, set up and link a new hash entry:
 | |
| 	 */
 | |
| 	prev = NULL;
 | |
| 	curr = *head;
 | |
| 
 | |
| 	spin_lock(&table_lock);
 | |
| 	/*
 | |
| 	 * Make sure we have not raced with another CPU:
 | |
| 	 */
 | |
| 	while (curr) {
 | |
| 		if (match_entries(curr, entry))
 | |
| 			goto out_unlock;
 | |
| 
 | |
| 		prev = curr;
 | |
| 		curr = curr->next;
 | |
| 	}
 | |
| 
 | |
| 	curr = alloc_entry();
 | |
| 	if (curr) {
 | |
| 		*curr = *entry;
 | |
| 		curr->count = 0;
 | |
| 		curr->next = NULL;
 | |
| 		memcpy(curr->comm, comm, TASK_COMM_LEN);
 | |
| 
 | |
| 		smp_mb(); /* Ensure that curr is initialized before insert */
 | |
| 
 | |
| 		if (prev)
 | |
| 			prev->next = curr;
 | |
| 		else
 | |
| 			*head = curr;
 | |
| 	}
 | |
|  out_unlock:
 | |
| 	spin_unlock(&table_lock);
 | |
| 
 | |
| 	return curr;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * timer_stats_update_stats - Update the statistics for a timer.
 | |
|  * @timer:	pointer to either a timer_list or a hrtimer
 | |
|  * @pid:	the pid of the task which set up the timer
 | |
|  * @startf:	pointer to the function which did the timer setup
 | |
|  * @timerf:	pointer to the timer callback function of the timer
 | |
|  * @comm:	name of the process which set up the timer
 | |
|  *
 | |
|  * When the timer is already registered, then the event counter is
 | |
|  * incremented. Otherwise the timer is registered in a free slot.
 | |
|  */
 | |
| void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
 | |
| 			      void *timerf, char *comm,
 | |
| 			      unsigned int timer_flag)
 | |
| {
 | |
| 	/*
 | |
| 	 * It doesnt matter which lock we take:
 | |
| 	 */
 | |
| 	spinlock_t *lock;
 | |
| 	struct entry *entry, input;
 | |
| 	unsigned long flags;
 | |
| 
 | |
| 	if (likely(!active))
 | |
| 		return;
 | |
| 
 | |
| 	lock = &per_cpu(lookup_lock, raw_smp_processor_id());
 | |
| 
 | |
| 	input.timer = timer;
 | |
| 	input.start_func = startf;
 | |
| 	input.expire_func = timerf;
 | |
| 	input.pid = pid;
 | |
| 	input.timer_flag = timer_flag;
 | |
| 
 | |
| 	spin_lock_irqsave(lock, flags);
 | |
| 	if (!active)
 | |
| 		goto out_unlock;
 | |
| 
 | |
| 	entry = tstat_lookup(&input, comm);
 | |
| 	if (likely(entry))
 | |
| 		entry->count++;
 | |
| 	else
 | |
| 		atomic_inc(&overflow_count);
 | |
| 
 | |
|  out_unlock:
 | |
| 	spin_unlock_irqrestore(lock, flags);
 | |
| }
 | |
| 
 | |
| static void print_name_offset(struct seq_file *m, unsigned long addr)
 | |
| {
 | |
| 	char symname[KSYM_NAME_LEN];
 | |
| 
 | |
| 	if (lookup_symbol_name(addr, symname) < 0)
 | |
| 		seq_printf(m, "<%p>", (void *)addr);
 | |
| 	else
 | |
| 		seq_printf(m, "%s", symname);
 | |
| }
 | |
| 
 | |
| static int tstats_show(struct seq_file *m, void *v)
 | |
| {
 | |
| 	struct timespec period;
 | |
| 	struct entry *entry;
 | |
| 	unsigned long ms;
 | |
| 	long events = 0;
 | |
| 	ktime_t time;
 | |
| 	int i;
 | |
| 
 | |
| 	mutex_lock(&show_mutex);
 | |
| 	/*
 | |
| 	 * If still active then calculate up to now:
 | |
| 	 */
 | |
| 	if (active)
 | |
| 		time_stop = ktime_get();
 | |
| 
 | |
| 	time = ktime_sub(time_stop, time_start);
 | |
| 
 | |
| 	period = ktime_to_timespec(time);
 | |
| 	ms = period.tv_nsec / 1000000;
 | |
| 
 | |
| 	seq_puts(m, "Timer Stats Version: v0.2\n");
 | |
| 	seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms);
 | |
| 	if (atomic_read(&overflow_count))
 | |
| 		seq_printf(m, "Overflow: %d entries\n",
 | |
| 			atomic_read(&overflow_count));
 | |
| 
 | |
| 	for (i = 0; i < nr_entries; i++) {
 | |
| 		entry = entries + i;
 | |
|  		if (entry->timer_flag & TIMER_STATS_FLAG_DEFERRABLE) {
 | |
| 			seq_printf(m, "%4luD, %5d %-16s ",
 | |
| 				entry->count, entry->pid, entry->comm);
 | |
| 		} else {
 | |
| 			seq_printf(m, " %4lu, %5d %-16s ",
 | |
| 				entry->count, entry->pid, entry->comm);
 | |
| 		}
 | |
| 
 | |
| 		print_name_offset(m, (unsigned long)entry->start_func);
 | |
| 		seq_puts(m, " (");
 | |
| 		print_name_offset(m, (unsigned long)entry->expire_func);
 | |
| 		seq_puts(m, ")\n");
 | |
| 
 | |
| 		events += entry->count;
 | |
| 	}
 | |
| 
 | |
| 	ms += period.tv_sec * 1000;
 | |
| 	if (!ms)
 | |
| 		ms = 1;
 | |
| 
 | |
| 	if (events && period.tv_sec)
 | |
| 		seq_printf(m, "%ld total events, %ld.%03ld events/sec\n",
 | |
| 			   events, events * 1000 / ms,
 | |
| 			   (events * 1000000 / ms) % 1000);
 | |
| 	else
 | |
| 		seq_printf(m, "%ld total events\n", events);
 | |
| 
 | |
| 	mutex_unlock(&show_mutex);
 | |
| 
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * After a state change, make sure all concurrent lookup/update
 | |
|  * activities have stopped:
 | |
|  */
 | |
| static void sync_access(void)
 | |
| {
 | |
| 	unsigned long flags;
 | |
| 	int cpu;
 | |
| 
 | |
| 	for_each_online_cpu(cpu) {
 | |
| 		spin_lock_irqsave(&per_cpu(lookup_lock, cpu), flags);
 | |
| 		/* nothing */
 | |
| 		spin_unlock_irqrestore(&per_cpu(lookup_lock, cpu), flags);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| static ssize_t tstats_write(struct file *file, const char __user *buf,
 | |
| 			    size_t count, loff_t *offs)
 | |
| {
 | |
| 	char ctl[2];
 | |
| 
 | |
| 	if (count != 2 || *offs)
 | |
| 		return -EINVAL;
 | |
| 
 | |
| 	if (copy_from_user(ctl, buf, count))
 | |
| 		return -EFAULT;
 | |
| 
 | |
| 	mutex_lock(&show_mutex);
 | |
| 	switch (ctl[0]) {
 | |
| 	case '0':
 | |
| 		if (active) {
 | |
| 			active = 0;
 | |
| 			time_stop = ktime_get();
 | |
| 			sync_access();
 | |
| 		}
 | |
| 		break;
 | |
| 	case '1':
 | |
| 		if (!active) {
 | |
| 			reset_entries();
 | |
| 			time_start = ktime_get();
 | |
| 			smp_mb();
 | |
| 			active = 1;
 | |
| 		}
 | |
| 		break;
 | |
| 	default:
 | |
| 		count = -EINVAL;
 | |
| 	}
 | |
| 	mutex_unlock(&show_mutex);
 | |
| 
 | |
| 	return count;
 | |
| }
 | |
| 
 | |
| static int tstats_open(struct inode *inode, struct file *filp)
 | |
| {
 | |
| 	return single_open(filp, tstats_show, NULL);
 | |
| }
 | |
| 
 | |
| static struct file_operations tstats_fops = {
 | |
| 	.open		= tstats_open,
 | |
| 	.read		= seq_read,
 | |
| 	.write		= tstats_write,
 | |
| 	.llseek		= seq_lseek,
 | |
| 	.release	= single_release,
 | |
| };
 | |
| 
 | |
| void __init init_timer_stats(void)
 | |
| {
 | |
| 	int cpu;
 | |
| 
 | |
| 	for_each_possible_cpu(cpu)
 | |
| 		spin_lock_init(&per_cpu(lookup_lock, cpu));
 | |
| }
 | |
| 
 | |
| static int __init init_tstats_procfs(void)
 | |
| {
 | |
| 	struct proc_dir_entry *pe;
 | |
| 
 | |
| 	pe = create_proc_entry("timer_stats", 0644, NULL);
 | |
| 	if (!pe)
 | |
| 		return -ENOMEM;
 | |
| 
 | |
| 	pe->proc_fops = &tstats_fops;
 | |
| 
 | |
| 	return 0;
 | |
| }
 | |
| __initcall(init_tstats_procfs);
 |