1. TRACE_EVENT(sched_process_exec) forgets to actually use the old pid argument, it sets ->old_pid = p->pid. 2. search_binary_handler() uses the wrong pid number. tracepoint needs the global pid_t from the root namespace, while old_pid is the virtual pid number as it seen by the tracer/parent. With this patch we have two pid_t's in search_binary_handler(), not really nice. Perhaps we should switch to "struct pid*", but in this case it would be better to cleanup the current code first and move the "depth == 0" code outside. Signed-off-by: Oleg Nesterov <oleg@redhat.com> Cc: David Smith <dsmith@redhat.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Denys Vlasenko <dvlasenk@redhat.com> Link: http://lkml.kernel.org/r/20120330162636.GA4857@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
		
			
				
	
	
		
			432 lines
		
	
	
	
		
			9.7 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			432 lines
		
	
	
	
		
			9.7 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
#undef TRACE_SYSTEM
 | 
						|
#define TRACE_SYSTEM sched
 | 
						|
 | 
						|
#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
 | 
						|
#define _TRACE_SCHED_H
 | 
						|
 | 
						|
#include <linux/sched.h>
 | 
						|
#include <linux/tracepoint.h>
 | 
						|
#include <linux/binfmts.h>
 | 
						|
 | 
						|
/*
 | 
						|
 * Tracepoint for calling kthread_stop, performed to end a kthread:
 | 
						|
 */
 | 
						|
TRACE_EVENT(sched_kthread_stop,
 | 
						|
 | 
						|
	TP_PROTO(struct task_struct *t),
 | 
						|
 | 
						|
	TP_ARGS(t),
 | 
						|
 | 
						|
	TP_STRUCT__entry(
 | 
						|
		__array(	char,	comm,	TASK_COMM_LEN	)
 | 
						|
		__field(	pid_t,	pid			)
 | 
						|
	),
 | 
						|
 | 
						|
	TP_fast_assign(
 | 
						|
		memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
 | 
						|
		__entry->pid	= t->pid;
 | 
						|
	),
 | 
						|
 | 
						|
	TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
 | 
						|
);
 | 
						|
 | 
						|
/*
 | 
						|
 * Tracepoint for the return value of the kthread stopping:
 | 
						|
 */
 | 
						|
TRACE_EVENT(sched_kthread_stop_ret,
 | 
						|
 | 
						|
	TP_PROTO(int ret),
 | 
						|
 | 
						|
	TP_ARGS(ret),
 | 
						|
 | 
						|
	TP_STRUCT__entry(
 | 
						|
		__field(	int,	ret	)
 | 
						|
	),
 | 
						|
 | 
						|
	TP_fast_assign(
 | 
						|
		__entry->ret	= ret;
 | 
						|
	),
 | 
						|
 | 
						|
	TP_printk("ret=%d", __entry->ret)
 | 
						|
);
 | 
						|
 | 
						|
/*
 | 
						|
 * Tracepoint for waking up a task:
 | 
						|
 */
 | 
						|
DECLARE_EVENT_CLASS(sched_wakeup_template,
 | 
						|
 | 
						|
	TP_PROTO(struct task_struct *p, int success),
 | 
						|
 | 
						|
	TP_ARGS(p, success),
 | 
						|
 | 
						|
	TP_STRUCT__entry(
 | 
						|
		__array(	char,	comm,	TASK_COMM_LEN	)
 | 
						|
		__field(	pid_t,	pid			)
 | 
						|
		__field(	int,	prio			)
 | 
						|
		__field(	int,	success			)
 | 
						|
		__field(	int,	target_cpu		)
 | 
						|
	),
 | 
						|
 | 
						|
	TP_fast_assign(
 | 
						|
		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
 | 
						|
		__entry->pid		= p->pid;
 | 
						|
		__entry->prio		= p->prio;
 | 
						|
		__entry->success	= success;
 | 
						|
		__entry->target_cpu	= task_cpu(p);
 | 
						|
	),
 | 
						|
 | 
						|
	TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
 | 
						|
		  __entry->comm, __entry->pid, __entry->prio,
 | 
						|
		  __entry->success, __entry->target_cpu)
 | 
						|
);
 | 
						|
 | 
						|
DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
 | 
						|
	     TP_PROTO(struct task_struct *p, int success),
 | 
						|
	     TP_ARGS(p, success));
 | 
						|
 | 
						|
/*
 | 
						|
 * Tracepoint for waking up a new task:
 | 
						|
 */
 | 
						|
DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
 | 
						|
	     TP_PROTO(struct task_struct *p, int success),
 | 
						|
	     TP_ARGS(p, success));
 | 
						|
 | 
						|
#ifdef CREATE_TRACE_POINTS
 | 
						|
static inline long __trace_sched_switch_state(struct task_struct *p)
 | 
						|
{
 | 
						|
	long state = p->state;
 | 
						|
 | 
						|
#ifdef CONFIG_PREEMPT
 | 
						|
	/*
 | 
						|
	 * For all intents and purposes a preempted task is a running task.
 | 
						|
	 */
 | 
						|
	if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)
 | 
						|
		state = TASK_RUNNING | TASK_STATE_MAX;
 | 
						|
#endif
 | 
						|
 | 
						|
	return state;
 | 
						|
}
 | 
						|
#endif
 | 
						|
 | 
						|
/*
 | 
						|
 * Tracepoint for task switches, performed by the scheduler:
 | 
						|
 */
 | 
						|
TRACE_EVENT(sched_switch,
 | 
						|
 | 
						|
	TP_PROTO(struct task_struct *prev,
 | 
						|
		 struct task_struct *next),
 | 
						|
 | 
						|
	TP_ARGS(prev, next),
 | 
						|
 | 
						|
	TP_STRUCT__entry(
 | 
						|
		__array(	char,	prev_comm,	TASK_COMM_LEN	)
 | 
						|
		__field(	pid_t,	prev_pid			)
 | 
						|
		__field(	int,	prev_prio			)
 | 
						|
		__field(	long,	prev_state			)
 | 
						|
		__array(	char,	next_comm,	TASK_COMM_LEN	)
 | 
						|
		__field(	pid_t,	next_pid			)
 | 
						|
		__field(	int,	next_prio			)
 | 
						|
	),
 | 
						|
 | 
						|
	TP_fast_assign(
 | 
						|
		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
 | 
						|
		__entry->prev_pid	= prev->pid;
 | 
						|
		__entry->prev_prio	= prev->prio;
 | 
						|
		__entry->prev_state	= __trace_sched_switch_state(prev);
 | 
						|
		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
 | 
						|
		__entry->next_pid	= next->pid;
 | 
						|
		__entry->next_prio	= next->prio;
 | 
						|
	),
 | 
						|
 | 
						|
	TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
 | 
						|
		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
 | 
						|
		__entry->prev_state & (TASK_STATE_MAX-1) ?
 | 
						|
		  __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
 | 
						|
				{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
 | 
						|
				{ 16, "Z" }, { 32, "X" }, { 64, "x" },
 | 
						|
				{ 128, "W" }) : "R",
 | 
						|
		__entry->prev_state & TASK_STATE_MAX ? "+" : "",
 | 
						|
		__entry->next_comm, __entry->next_pid, __entry->next_prio)
 | 
						|
);
 | 
						|
 | 
						|
/*
 | 
						|
 * Tracepoint for a task being migrated:
 | 
						|
 */
 | 
						|
TRACE_EVENT(sched_migrate_task,
 | 
						|
 | 
						|
	TP_PROTO(struct task_struct *p, int dest_cpu),
 | 
						|
 | 
						|
	TP_ARGS(p, dest_cpu),
 | 
						|
 | 
						|
	TP_STRUCT__entry(
 | 
						|
		__array(	char,	comm,	TASK_COMM_LEN	)
 | 
						|
		__field(	pid_t,	pid			)
 | 
						|
		__field(	int,	prio			)
 | 
						|
		__field(	int,	orig_cpu		)
 | 
						|
		__field(	int,	dest_cpu		)
 | 
						|
	),
 | 
						|
 | 
						|
	TP_fast_assign(
 | 
						|
		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
 | 
						|
		__entry->pid		= p->pid;
 | 
						|
		__entry->prio		= p->prio;
 | 
						|
		__entry->orig_cpu	= task_cpu(p);
 | 
						|
		__entry->dest_cpu	= dest_cpu;
 | 
						|
	),
 | 
						|
 | 
						|
	TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d",
 | 
						|
		  __entry->comm, __entry->pid, __entry->prio,
 | 
						|
		  __entry->orig_cpu, __entry->dest_cpu)
 | 
						|
);
 | 
						|
 | 
						|
DECLARE_EVENT_CLASS(sched_process_template,
 | 
						|
 | 
						|
	TP_PROTO(struct task_struct *p),
 | 
						|
 | 
						|
	TP_ARGS(p),
 | 
						|
 | 
						|
	TP_STRUCT__entry(
 | 
						|
		__array(	char,	comm,	TASK_COMM_LEN	)
 | 
						|
		__field(	pid_t,	pid			)
 | 
						|
		__field(	int,	prio			)
 | 
						|
	),
 | 
						|
 | 
						|
	TP_fast_assign(
 | 
						|
		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
 | 
						|
		__entry->pid		= p->pid;
 | 
						|
		__entry->prio		= p->prio;
 | 
						|
	),
 | 
						|
 | 
						|
	TP_printk("comm=%s pid=%d prio=%d",
 | 
						|
		  __entry->comm, __entry->pid, __entry->prio)
 | 
						|
);
 | 
						|
 | 
						|
/*
 | 
						|
 * Tracepoint for freeing a task:
 | 
						|
 */
 | 
						|
DEFINE_EVENT(sched_process_template, sched_process_free,
 | 
						|
	     TP_PROTO(struct task_struct *p),
 | 
						|
	     TP_ARGS(p));
 | 
						|
	     
 | 
						|
 | 
						|
/*
 | 
						|
 * Tracepoint for a task exiting:
 | 
						|
 */
 | 
						|
DEFINE_EVENT(sched_process_template, sched_process_exit,
 | 
						|
	     TP_PROTO(struct task_struct *p),
 | 
						|
	     TP_ARGS(p));
 | 
						|
 | 
						|
/*
 | 
						|
 * Tracepoint for waiting on task to unschedule:
 | 
						|
 */
 | 
						|
DEFINE_EVENT(sched_process_template, sched_wait_task,
 | 
						|
	TP_PROTO(struct task_struct *p),
 | 
						|
	TP_ARGS(p));
 | 
						|
 | 
						|
/*
 | 
						|
 * Tracepoint for a waiting task:
 | 
						|
 */
 | 
						|
TRACE_EVENT(sched_process_wait,
 | 
						|
 | 
						|
	TP_PROTO(struct pid *pid),
 | 
						|
 | 
						|
	TP_ARGS(pid),
 | 
						|
 | 
						|
	TP_STRUCT__entry(
 | 
						|
		__array(	char,	comm,	TASK_COMM_LEN	)
 | 
						|
		__field(	pid_t,	pid			)
 | 
						|
		__field(	int,	prio			)
 | 
						|
	),
 | 
						|
 | 
						|
	TP_fast_assign(
 | 
						|
		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 | 
						|
		__entry->pid		= pid_nr(pid);
 | 
						|
		__entry->prio		= current->prio;
 | 
						|
	),
 | 
						|
 | 
						|
	TP_printk("comm=%s pid=%d prio=%d",
 | 
						|
		  __entry->comm, __entry->pid, __entry->prio)
 | 
						|
);
 | 
						|
 | 
						|
/*
 | 
						|
 * Tracepoint for do_fork:
 | 
						|
 */
 | 
						|
TRACE_EVENT(sched_process_fork,
 | 
						|
 | 
						|
	TP_PROTO(struct task_struct *parent, struct task_struct *child),
 | 
						|
 | 
						|
	TP_ARGS(parent, child),
 | 
						|
 | 
						|
	TP_STRUCT__entry(
 | 
						|
		__array(	char,	parent_comm,	TASK_COMM_LEN	)
 | 
						|
		__field(	pid_t,	parent_pid			)
 | 
						|
		__array(	char,	child_comm,	TASK_COMM_LEN	)
 | 
						|
		__field(	pid_t,	child_pid			)
 | 
						|
	),
 | 
						|
 | 
						|
	TP_fast_assign(
 | 
						|
		memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
 | 
						|
		__entry->parent_pid	= parent->pid;
 | 
						|
		memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
 | 
						|
		__entry->child_pid	= child->pid;
 | 
						|
	),
 | 
						|
 | 
						|
	TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d",
 | 
						|
		__entry->parent_comm, __entry->parent_pid,
 | 
						|
		__entry->child_comm, __entry->child_pid)
 | 
						|
);
 | 
						|
 | 
						|
/*
 | 
						|
 * Tracepoint for exec:
 | 
						|
 */
 | 
						|
TRACE_EVENT(sched_process_exec,
 | 
						|
 | 
						|
	TP_PROTO(struct task_struct *p, pid_t old_pid,
 | 
						|
		 struct linux_binprm *bprm),
 | 
						|
 | 
						|
	TP_ARGS(p, old_pid, bprm),
 | 
						|
 | 
						|
	TP_STRUCT__entry(
 | 
						|
		__string(	filename,	bprm->filename	)
 | 
						|
		__field(	pid_t,		pid		)
 | 
						|
		__field(	pid_t,		old_pid		)
 | 
						|
	),
 | 
						|
 | 
						|
	TP_fast_assign(
 | 
						|
		__assign_str(filename, bprm->filename);
 | 
						|
		__entry->pid		= p->pid;
 | 
						|
		__entry->old_pid	= old_pid;
 | 
						|
	),
 | 
						|
 | 
						|
	TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename),
 | 
						|
		  __entry->pid, __entry->old_pid)
 | 
						|
);
 | 
						|
 | 
						|
/*
 | 
						|
 * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
 | 
						|
 *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
 | 
						|
 */
 | 
						|
DECLARE_EVENT_CLASS(sched_stat_template,
 | 
						|
 | 
						|
	TP_PROTO(struct task_struct *tsk, u64 delay),
 | 
						|
 | 
						|
	TP_ARGS(tsk, delay),
 | 
						|
 | 
						|
	TP_STRUCT__entry(
 | 
						|
		__array( char,	comm,	TASK_COMM_LEN	)
 | 
						|
		__field( pid_t,	pid			)
 | 
						|
		__field( u64,	delay			)
 | 
						|
	),
 | 
						|
 | 
						|
	TP_fast_assign(
 | 
						|
		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
 | 
						|
		__entry->pid	= tsk->pid;
 | 
						|
		__entry->delay	= delay;
 | 
						|
	)
 | 
						|
	TP_perf_assign(
 | 
						|
		__perf_count(delay);
 | 
						|
	),
 | 
						|
 | 
						|
	TP_printk("comm=%s pid=%d delay=%Lu [ns]",
 | 
						|
			__entry->comm, __entry->pid,
 | 
						|
			(unsigned long long)__entry->delay)
 | 
						|
);
 | 
						|
 | 
						|
 | 
						|
/*
 | 
						|
 * Tracepoint for accounting wait time (time the task is runnable
 | 
						|
 * but not actually running due to scheduler contention).
 | 
						|
 */
 | 
						|
DEFINE_EVENT(sched_stat_template, sched_stat_wait,
 | 
						|
	     TP_PROTO(struct task_struct *tsk, u64 delay),
 | 
						|
	     TP_ARGS(tsk, delay));
 | 
						|
 | 
						|
/*
 | 
						|
 * Tracepoint for accounting sleep time (time the task is not runnable,
 | 
						|
 * including iowait, see below).
 | 
						|
 */
 | 
						|
DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
 | 
						|
	     TP_PROTO(struct task_struct *tsk, u64 delay),
 | 
						|
	     TP_ARGS(tsk, delay));
 | 
						|
 | 
						|
/*
 | 
						|
 * Tracepoint for accounting iowait time (time the task is not runnable
 | 
						|
 * due to waiting on IO to complete).
 | 
						|
 */
 | 
						|
DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
 | 
						|
	     TP_PROTO(struct task_struct *tsk, u64 delay),
 | 
						|
	     TP_ARGS(tsk, delay));
 | 
						|
 | 
						|
/*
 | 
						|
 * Tracepoint for accounting blocked time (time the task is in uninterruptible).
 | 
						|
 */
 | 
						|
DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
 | 
						|
	     TP_PROTO(struct task_struct *tsk, u64 delay),
 | 
						|
	     TP_ARGS(tsk, delay));
 | 
						|
 | 
						|
/*
 | 
						|
 * Tracepoint for accounting runtime (time the task is executing
 | 
						|
 * on a CPU).
 | 
						|
 */
 | 
						|
TRACE_EVENT(sched_stat_runtime,
 | 
						|
 | 
						|
	TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
 | 
						|
 | 
						|
	TP_ARGS(tsk, runtime, vruntime),
 | 
						|
 | 
						|
	TP_STRUCT__entry(
 | 
						|
		__array( char,	comm,	TASK_COMM_LEN	)
 | 
						|
		__field( pid_t,	pid			)
 | 
						|
		__field( u64,	runtime			)
 | 
						|
		__field( u64,	vruntime			)
 | 
						|
	),
 | 
						|
 | 
						|
	TP_fast_assign(
 | 
						|
		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
 | 
						|
		__entry->pid		= tsk->pid;
 | 
						|
		__entry->runtime	= runtime;
 | 
						|
		__entry->vruntime	= vruntime;
 | 
						|
	)
 | 
						|
	TP_perf_assign(
 | 
						|
		__perf_count(runtime);
 | 
						|
	),
 | 
						|
 | 
						|
	TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
 | 
						|
			__entry->comm, __entry->pid,
 | 
						|
			(unsigned long long)__entry->runtime,
 | 
						|
			(unsigned long long)__entry->vruntime)
 | 
						|
);
 | 
						|
 | 
						|
/*
 | 
						|
 * Tracepoint for showing priority inheritance modifying a tasks
 | 
						|
 * priority.
 | 
						|
 */
 | 
						|
TRACE_EVENT(sched_pi_setprio,
 | 
						|
 | 
						|
	TP_PROTO(struct task_struct *tsk, int newprio),
 | 
						|
 | 
						|
	TP_ARGS(tsk, newprio),
 | 
						|
 | 
						|
	TP_STRUCT__entry(
 | 
						|
		__array( char,	comm,	TASK_COMM_LEN	)
 | 
						|
		__field( pid_t,	pid			)
 | 
						|
		__field( int,	oldprio			)
 | 
						|
		__field( int,	newprio			)
 | 
						|
	),
 | 
						|
 | 
						|
	TP_fast_assign(
 | 
						|
		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
 | 
						|
		__entry->pid		= tsk->pid;
 | 
						|
		__entry->oldprio	= tsk->prio;
 | 
						|
		__entry->newprio	= newprio;
 | 
						|
	),
 | 
						|
 | 
						|
	TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",
 | 
						|
			__entry->comm, __entry->pid,
 | 
						|
			__entry->oldprio, __entry->newprio)
 | 
						|
);
 | 
						|
 | 
						|
#endif /* _TRACE_SCHED_H */
 | 
						|
 | 
						|
/* This part must be outside protection */
 | 
						|
#include <trace/define_trace.h>
 |