Add marker_synchronize_unregister() before module unloading. This prevents possible trace calls into unloaded module text. Signed-off-by: Wu Fengguang <wfg@linux.intel.com> Signed-off-by: Avi Kivity <avi@redhat.com>
		
			
				
	
	
		
			285 lines
		
	
	
	
		
			6.2 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			285 lines
		
	
	
	
		
			6.2 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * kvm trace
 | 
						|
 *
 | 
						|
 * It is designed to allow debugging traces of kvm to be generated
 | 
						|
 * on UP / SMP machines.  Each trace entry can be timestamped so that
 | 
						|
 * it's possible to reconstruct a chronological record of trace events.
 | 
						|
 * The implementation refers to blktrace kernel support.
 | 
						|
 *
 | 
						|
 * Copyright (c) 2008 Intel Corporation
 | 
						|
 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
 | 
						|
 *
 | 
						|
 * Authors: Feng(Eric) Liu, eric.e.liu@intel.com
 | 
						|
 *
 | 
						|
 * Date:    Feb 2008
 | 
						|
 */
 | 
						|
 | 
						|
#include <linux/module.h>
 | 
						|
#include <linux/relay.h>
 | 
						|
#include <linux/debugfs.h>
 | 
						|
#include <linux/ktime.h>
 | 
						|
 | 
						|
#include <linux/kvm_host.h>
 | 
						|
 | 
						|
#define KVM_TRACE_STATE_RUNNING 	(1 << 0)
 | 
						|
#define KVM_TRACE_STATE_PAUSE 		(1 << 1)
 | 
						|
#define KVM_TRACE_STATE_CLEARUP 	(1 << 2)
 | 
						|
 | 
						|
struct kvm_trace {
 | 
						|
	int trace_state;
 | 
						|
	struct rchan *rchan;
 | 
						|
	struct dentry *lost_file;
 | 
						|
	atomic_t lost_records;
 | 
						|
};
 | 
						|
static struct kvm_trace *kvm_trace;
 | 
						|
 | 
						|
struct kvm_trace_probe {
 | 
						|
	const char *name;
 | 
						|
	const char *format;
 | 
						|
	u32 timestamp_in;
 | 
						|
	marker_probe_func *probe_func;
 | 
						|
};
 | 
						|
 | 
						|
static inline int calc_rec_size(int timestamp, int extra)
 | 
						|
{
 | 
						|
	int rec_size = KVM_TRC_HEAD_SIZE;
 | 
						|
 | 
						|
	rec_size += extra;
 | 
						|
	return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size;
 | 
						|
}
 | 
						|
 | 
						|
static void kvm_add_trace(void *probe_private, void *call_data,
 | 
						|
			  const char *format, va_list *args)
 | 
						|
{
 | 
						|
	struct kvm_trace_probe *p = probe_private;
 | 
						|
	struct kvm_trace *kt = kvm_trace;
 | 
						|
	struct kvm_trace_rec rec;
 | 
						|
	struct kvm_vcpu *vcpu;
 | 
						|
	int    i, size;
 | 
						|
	u32    extra;
 | 
						|
 | 
						|
	if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING))
 | 
						|
		return;
 | 
						|
 | 
						|
	rec.rec_val	= TRACE_REC_EVENT_ID(va_arg(*args, u32));
 | 
						|
	vcpu		= va_arg(*args, struct kvm_vcpu *);
 | 
						|
	rec.pid		= current->tgid;
 | 
						|
	rec.vcpu_id	= vcpu->vcpu_id;
 | 
						|
 | 
						|
	extra   	= va_arg(*args, u32);
 | 
						|
	WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX));
 | 
						|
	extra 		= min_t(u32, extra, KVM_TRC_EXTRA_MAX);
 | 
						|
 | 
						|
	rec.rec_val |= TRACE_REC_TCS(p->timestamp_in)
 | 
						|
			| TRACE_REC_NUM_DATA_ARGS(extra);
 | 
						|
 | 
						|
	if (p->timestamp_in) {
 | 
						|
		rec.u.timestamp.timestamp = ktime_to_ns(ktime_get());
 | 
						|
 | 
						|
		for (i = 0; i < extra; i++)
 | 
						|
			rec.u.timestamp.extra_u32[i] = va_arg(*args, u32);
 | 
						|
	} else {
 | 
						|
		for (i = 0; i < extra; i++)
 | 
						|
			rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32);
 | 
						|
	}
 | 
						|
 | 
						|
	size = calc_rec_size(p->timestamp_in, extra * sizeof(u32));
 | 
						|
	relay_write(kt->rchan, &rec, size);
 | 
						|
}
 | 
						|
 | 
						|
static struct kvm_trace_probe kvm_trace_probes[] = {
 | 
						|
	{ "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace },
 | 
						|
	{ "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace },
 | 
						|
};
 | 
						|
 | 
						|
static int lost_records_get(void *data, u64 *val)
 | 
						|
{
 | 
						|
	struct kvm_trace *kt = data;
 | 
						|
 | 
						|
	*val = atomic_read(&kt->lost_records);
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n");
 | 
						|
 | 
						|
/*
 | 
						|
 *  The relay channel is used in "no-overwrite" mode, it keeps trace of how
 | 
						|
 *  many times we encountered a full subbuffer, to tell user space app the
 | 
						|
 *  lost records there were.
 | 
						|
 */
 | 
						|
static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
 | 
						|
				     void *prev_subbuf, size_t prev_padding)
 | 
						|
{
 | 
						|
	struct kvm_trace *kt;
 | 
						|
 | 
						|
	if (!relay_buf_full(buf)) {
 | 
						|
		if (!prev_subbuf) {
 | 
						|
			/*
 | 
						|
			 * executed only once when the channel is opened
 | 
						|
			 * save metadata as first record
 | 
						|
			 */
 | 
						|
			subbuf_start_reserve(buf, sizeof(u32));
 | 
						|
			*(u32 *)subbuf = 0x12345678;
 | 
						|
		}
 | 
						|
 | 
						|
		return 1;
 | 
						|
	}
 | 
						|
 | 
						|
	kt = buf->chan->private_data;
 | 
						|
	atomic_inc(&kt->lost_records);
 | 
						|
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
static struct dentry *kvm_create_buf_file_callack(const char *filename,
 | 
						|
						 struct dentry *parent,
 | 
						|
						 int mode,
 | 
						|
						 struct rchan_buf *buf,
 | 
						|
						 int *is_global)
 | 
						|
{
 | 
						|
	return debugfs_create_file(filename, mode, parent, buf,
 | 
						|
				   &relay_file_operations);
 | 
						|
}
 | 
						|
 | 
						|
static int kvm_remove_buf_file_callback(struct dentry *dentry)
 | 
						|
{
 | 
						|
	debugfs_remove(dentry);
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
static struct rchan_callbacks kvm_relay_callbacks = {
 | 
						|
	.subbuf_start 		= kvm_subbuf_start_callback,
 | 
						|
	.create_buf_file 	= kvm_create_buf_file_callack,
 | 
						|
	.remove_buf_file 	= kvm_remove_buf_file_callback,
 | 
						|
};
 | 
						|
 | 
						|
static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts)
 | 
						|
{
 | 
						|
	struct kvm_trace *kt;
 | 
						|
	int i, r = -ENOMEM;
 | 
						|
 | 
						|
	if (!kuts->buf_size || !kuts->buf_nr)
 | 
						|
		return -EINVAL;
 | 
						|
 | 
						|
	kt = kzalloc(sizeof(*kt), GFP_KERNEL);
 | 
						|
	if (!kt)
 | 
						|
		goto err;
 | 
						|
 | 
						|
	r = -EIO;
 | 
						|
	atomic_set(&kt->lost_records, 0);
 | 
						|
	kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir,
 | 
						|
					    kt, &kvm_trace_lost_ops);
 | 
						|
	if (!kt->lost_file)
 | 
						|
		goto err;
 | 
						|
 | 
						|
	kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size,
 | 
						|
				kuts->buf_nr, &kvm_relay_callbacks, kt);
 | 
						|
	if (!kt->rchan)
 | 
						|
		goto err;
 | 
						|
 | 
						|
	kvm_trace = kt;
 | 
						|
 | 
						|
	for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
 | 
						|
		struct kvm_trace_probe *p = &kvm_trace_probes[i];
 | 
						|
 | 
						|
		r = marker_probe_register(p->name, p->format, p->probe_func, p);
 | 
						|
		if (r)
 | 
						|
			printk(KERN_INFO "Unable to register probe %s\n",
 | 
						|
			       p->name);
 | 
						|
	}
 | 
						|
 | 
						|
	kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING;
 | 
						|
 | 
						|
	return 0;
 | 
						|
err:
 | 
						|
	if (kt) {
 | 
						|
		if (kt->lost_file)
 | 
						|
			debugfs_remove(kt->lost_file);
 | 
						|
		if (kt->rchan)
 | 
						|
			relay_close(kt->rchan);
 | 
						|
		kfree(kt);
 | 
						|
	}
 | 
						|
	return r;
 | 
						|
}
 | 
						|
 | 
						|
static int kvm_trace_enable(char __user *arg)
 | 
						|
{
 | 
						|
	struct kvm_user_trace_setup kuts;
 | 
						|
	int ret;
 | 
						|
 | 
						|
	ret = copy_from_user(&kuts, arg, sizeof(kuts));
 | 
						|
	if (ret)
 | 
						|
		return -EFAULT;
 | 
						|
 | 
						|
	ret = do_kvm_trace_enable(&kuts);
 | 
						|
	if (ret)
 | 
						|
		return ret;
 | 
						|
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
static int kvm_trace_pause(void)
 | 
						|
{
 | 
						|
	struct kvm_trace *kt = kvm_trace;
 | 
						|
	int r = -EINVAL;
 | 
						|
 | 
						|
	if (kt == NULL)
 | 
						|
		return r;
 | 
						|
 | 
						|
	if (kt->trace_state == KVM_TRACE_STATE_RUNNING) {
 | 
						|
		kt->trace_state = KVM_TRACE_STATE_PAUSE;
 | 
						|
		relay_flush(kt->rchan);
 | 
						|
		r = 0;
 | 
						|
	}
 | 
						|
 | 
						|
	return r;
 | 
						|
}
 | 
						|
 | 
						|
void kvm_trace_cleanup(void)
 | 
						|
{
 | 
						|
	struct kvm_trace *kt = kvm_trace;
 | 
						|
	int i;
 | 
						|
 | 
						|
	if (kt == NULL)
 | 
						|
		return;
 | 
						|
 | 
						|
	if (kt->trace_state == KVM_TRACE_STATE_RUNNING ||
 | 
						|
	    kt->trace_state == KVM_TRACE_STATE_PAUSE) {
 | 
						|
 | 
						|
		kt->trace_state = KVM_TRACE_STATE_CLEARUP;
 | 
						|
 | 
						|
		for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
 | 
						|
			struct kvm_trace_probe *p = &kvm_trace_probes[i];
 | 
						|
			marker_probe_unregister(p->name, p->probe_func, p);
 | 
						|
		}
 | 
						|
		marker_synchronize_unregister();
 | 
						|
 | 
						|
		relay_close(kt->rchan);
 | 
						|
		debugfs_remove(kt->lost_file);
 | 
						|
		kfree(kt);
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
 | 
						|
{
 | 
						|
	void __user *argp = (void __user *)arg;
 | 
						|
	long r = -EINVAL;
 | 
						|
 | 
						|
	if (!capable(CAP_SYS_ADMIN))
 | 
						|
		return -EPERM;
 | 
						|
 | 
						|
	switch (ioctl) {
 | 
						|
	case KVM_TRACE_ENABLE:
 | 
						|
		r = kvm_trace_enable(argp);
 | 
						|
		break;
 | 
						|
	case KVM_TRACE_PAUSE:
 | 
						|
		r = kvm_trace_pause();
 | 
						|
		break;
 | 
						|
	case KVM_TRACE_DISABLE:
 | 
						|
		r = 0;
 | 
						|
		kvm_trace_cleanup();
 | 
						|
		break;
 | 
						|
	}
 | 
						|
 | 
						|
	return r;
 | 
						|
}
 |