| 
									
										
										
										
											2009-07-13 10:33:21 +08:00
										 |  |  | #undef TRACE_SYSTEM
 | 
					
						
							|  |  |  | #define TRACE_SYSTEM irq
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-04-10 08:54:16 -04:00
										 |  |  | #if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ)
 | 
					
						
							| 
									
										
										
										
											2009-02-26 10:11:05 -05:00
										 |  |  | #define _TRACE_IRQ_H
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <linux/tracepoint.h>
 | 
					
						
							| 
									
										
										
										
											2010-08-23 18:42:48 +09:00
										 |  |  | 
 | 
					
						
							|  |  |  | struct irqaction; | 
					
						
							|  |  |  | struct softirq_action; | 
					
						
							| 
									
										
										
										
											2009-04-10 08:54:16 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-06-01 12:20:40 -04:00
										 |  |  | #define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
 | 
					
						
							| 
									
										
										
										
											2009-09-17 17:38:32 +08:00
										 |  |  | #define show_softirq_name(val)				\
 | 
					
						
							|  |  |  | 	__print_symbolic(val,				\ | 
					
						
							|  |  |  | 			 softirq_name(HI),		\ | 
					
						
							|  |  |  | 			 softirq_name(TIMER),		\ | 
					
						
							|  |  |  | 			 softirq_name(NET_TX),		\ | 
					
						
							|  |  |  | 			 softirq_name(NET_RX),		\ | 
					
						
							|  |  |  | 			 softirq_name(BLOCK),		\ | 
					
						
							|  |  |  | 			 softirq_name(BLOCK_IOPOLL),	\ | 
					
						
							|  |  |  | 			 softirq_name(TASKLET),		\ | 
					
						
							|  |  |  | 			 softirq_name(SCHED),		\ | 
					
						
							| 
									
										
											  
											
												rcu: Use softirq to address performance regression
Commit a26ac2455ffcf3(rcu: move TREE_RCU from softirq to kthread)
introduced performance regression. In an AIM7 test, this commit degraded
performance by about 40%.
The commit runs rcu callbacks in a kthread instead of softirq. We observed
high rate of context switch which is caused by this. Out test system has
64 CPUs and HZ is 1000, so we saw more than 64k context switch per second
which is caused by RCU's per-CPU kthread.  A trace showed that most of
the time the RCU per-CPU kthread doesn't actually handle any callbacks,
but instead just does a very small amount of work handling grace periods.
This means that RCU's per-CPU kthreads are making the scheduler do quite
a bit of work in order to allow a very small amount of RCU-related
processing to be done.
Alex Shi's analysis determined that this slowdown is due to lock
contention within the scheduler.  Unfortunately, as Peter Zijlstra points
out, the scheduler's real-time semantics require global action, which
means that this contention is inherent in real-time scheduling.  (Yes,
perhaps someone will come up with a workaround -- otherwise, -rt is not
going to do well on large SMP systems -- but this patch will work around
this issue in the meantime.  And "the meantime" might well be forever.)
This patch therefore re-introduces softirq processing to RCU, but only
for core RCU work.  RCU callbacks are still executed in kthread context,
so that only a small amount of RCU work runs in softirq context in the
common case.  This should minimize ksoftirqd execution, allowing us to
skip boosting of ksoftirqd for CONFIG_RCU_BOOST=y kernels.
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Tested-by: "Alex,Shi" <alex.shi@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
											
										 
											2011-06-14 13:26:25 +08:00
										 |  |  | 			 softirq_name(HRTIMER),		\ | 
					
						
							|  |  |  | 			 softirq_name(RCU)) | 
					
						
							| 
									
										
										
										
											2009-05-20 19:56:19 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-04-30 13:29:47 -04:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * irq_handler_entry - called immediately before the irq action handler | 
					
						
							|  |  |  |  * @irq: irq number | 
					
						
							|  |  |  |  * @action: pointer to struct irqaction | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * The struct irqaction pointed to by @action contains various | 
					
						
							|  |  |  |  * information about the handler, including the device name, | 
					
						
							|  |  |  |  * @action->name, and the device id, @action->dev_id. When used in | 
					
						
							|  |  |  |  * conjunction with the irq_handler_exit tracepoint, we can figure | 
					
						
							|  |  |  |  * out irq handler latencies. | 
					
						
							| 
									
										
										
										
											2009-04-10 08:54:16 -04:00
										 |  |  |  */ | 
					
						
							| 
									
										
										
										
											2009-04-24 11:26:55 -04:00
										 |  |  | TRACE_EVENT(irq_handler_entry, | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-04-10 08:54:16 -04:00
										 |  |  | 	TP_PROTO(int irq, struct irqaction *action), | 
					
						
							| 
									
										
										
										
											2009-04-24 11:26:55 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-04-10 08:54:16 -04:00
										 |  |  | 	TP_ARGS(irq, action), | 
					
						
							| 
									
										
										
										
											2009-04-24 11:26:55 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	TP_STRUCT__entry( | 
					
						
							|  |  |  | 		__field(	int,	irq		) | 
					
						
							|  |  |  | 		__string(	name,	action->name	) | 
					
						
							|  |  |  | 	), | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	TP_fast_assign( | 
					
						
							|  |  |  | 		__entry->irq = irq; | 
					
						
							|  |  |  | 		__assign_str(name, action->name); | 
					
						
							|  |  |  | 	), | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												events: Harmonize event field names and print output names
Now that we can filter based on fields via perf record, people
will start using filter expressions and will expect them to
be obvious.
The primary way to see which fields are available is by looking
at the trace output, such as:
  gcc-18676 [000]   343.011728: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.012727: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.032692: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.033690: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.034687: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.035686: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.036684: irq_handler_entry: irq=0 handler=timer
While 'irq==0' filters work, the 'handler==<x>' filter expression
does not work:
  $ perf record -R -f -a -e irq:irq_handler_entry --filter handler=timer sleep 1
   Error: failed to set filter with 22 (Invalid argument)
The problem is that while an 'irq' field exists and is recognized
as a filter field - 'handler' does not exist - its name is 'name'
in the output.
To solve this, we need to synchronize the printout and the field
names, wherever possible.
In cases where the printout prints a non-field, we enclose
that information in square brackets, such as:
  perf-1380  [013]   724.903505: softirq_exit: vec=9 [action=RCU]
  perf-1380  [013]   724.904482: softirq_exit: vec=1 [action=TIMER]
This way users can use filter expressions more intuitively: all
fields that show up as 'primary' (non-bracketed) information is
filterable.
This patch harmonizes the field names for all irq, bkl, power,
sched and timer events.
We might in fact think about dropping the print format bit of
generic tracepoints altogether, and just print the fields that
are being recorded.
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
											
										 
											2009-10-15 11:50:39 +02:00
										 |  |  | 	TP_printk("irq=%d name=%s", __entry->irq, __get_str(name)) | 
					
						
							| 
									
										
										
										
											2009-04-24 11:26:55 -04:00
										 |  |  | ); | 
					
						
							| 
									
										
										
										
											2009-04-10 08:54:16 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-04-30 13:29:47 -04:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * irq_handler_exit - called immediately after the irq action handler returns | 
					
						
							|  |  |  |  * @irq: irq number | 
					
						
							|  |  |  |  * @action: pointer to struct irqaction | 
					
						
							|  |  |  |  * @ret: return value | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * If the @ret value is set to IRQ_HANDLED, then we know that the corresponding | 
					
						
							|  |  |  |  * @action->handler scuccessully handled this irq. Otherwise, the irq might be | 
					
						
							|  |  |  |  * a shared irq line, or the irq was not handled successfully. Can be used in | 
					
						
							|  |  |  |  * conjunction with the irq_handler_entry to understand irq handler latencies. | 
					
						
							| 
									
										
										
										
											2009-04-10 08:54:16 -04:00
										 |  |  |  */ | 
					
						
							|  |  |  | TRACE_EVENT(irq_handler_exit, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	TP_PROTO(int irq, struct irqaction *action, int ret), | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	TP_ARGS(irq, action, ret), | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	TP_STRUCT__entry( | 
					
						
							|  |  |  | 		__field(	int,	irq	) | 
					
						
							|  |  |  | 		__field(	int,	ret	) | 
					
						
							|  |  |  | 	), | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	TP_fast_assign( | 
					
						
							|  |  |  | 		__entry->irq	= irq; | 
					
						
							|  |  |  | 		__entry->ret	= ret; | 
					
						
							|  |  |  | 	), | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												events: Harmonize event field names and print output names
Now that we can filter based on fields via perf record, people
will start using filter expressions and will expect them to
be obvious.
The primary way to see which fields are available is by looking
at the trace output, such as:
  gcc-18676 [000]   343.011728: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.012727: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.032692: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.033690: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.034687: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.035686: irq_handler_entry: irq=0 handler=timer
  cc1-18677 [000]   343.036684: irq_handler_entry: irq=0 handler=timer
While 'irq==0' filters work, the 'handler==<x>' filter expression
does not work:
  $ perf record -R -f -a -e irq:irq_handler_entry --filter handler=timer sleep 1
   Error: failed to set filter with 22 (Invalid argument)
The problem is that while an 'irq' field exists and is recognized
as a filter field - 'handler' does not exist - its name is 'name'
in the output.
To solve this, we need to synchronize the printout and the field
names, wherever possible.
In cases where the printout prints a non-field, we enclose
that information in square brackets, such as:
  perf-1380  [013]   724.903505: softirq_exit: vec=9 [action=RCU]
  perf-1380  [013]   724.904482: softirq_exit: vec=1 [action=TIMER]
This way users can use filter expressions more intuitively: all
fields that show up as 'primary' (non-bracketed) information is
filterable.
This patch harmonizes the field names for all irq, bkl, power,
sched and timer events.
We might in fact think about dropping the print format bit of
generic tracepoints altogether, and just print the fields that
are being recorded.
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
											
										 
											2009-10-15 11:50:39 +02:00
										 |  |  | 	TP_printk("irq=%d ret=%s", | 
					
						
							| 
									
										
										
										
											2009-04-10 08:54:16 -04:00
										 |  |  | 		  __entry->irq, __entry->ret ? "handled" : "unhandled") | 
					
						
							|  |  |  | ); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-11-26 15:04:31 +08:00
										 |  |  | DECLARE_EVENT_CLASS(softirq, | 
					
						
							| 
									
										
										
										
											2009-04-24 11:26:55 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-10-19 15:00:13 +02:00
										 |  |  | 	TP_PROTO(unsigned int vec_nr), | 
					
						
							| 
									
										
										
										
											2009-04-24 11:26:55 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-10-19 15:00:13 +02:00
										 |  |  | 	TP_ARGS(vec_nr), | 
					
						
							| 
									
										
										
										
											2009-02-26 10:11:05 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-04-24 11:26:55 -04:00
										 |  |  | 	TP_STRUCT__entry( | 
					
						
							| 
									
										
										
										
											2010-10-19 15:00:13 +02:00
										 |  |  | 		__field(	unsigned int,	vec	) | 
					
						
							| 
									
										
										
										
											2009-04-24 11:26:55 -04:00
										 |  |  | 	), | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	TP_fast_assign( | 
					
						
							| 
									
										
										
										
											2010-10-19 15:00:13 +02:00
										 |  |  | 		__entry->vec = vec_nr; | 
					
						
							| 
									
										
										
										
											2009-04-24 11:26:55 -04:00
										 |  |  | 	), | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-10-19 15:00:13 +02:00
										 |  |  | 	TP_printk("vec=%u [action=%s]", __entry->vec, | 
					
						
							| 
									
										
										
										
											2009-05-20 19:56:19 -04:00
										 |  |  | 		  show_softirq_name(__entry->vec)) | 
					
						
							| 
									
										
										
										
											2009-04-24 11:26:55 -04:00
										 |  |  | ); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-11-26 15:04:31 +08:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * softirq_entry - called immediately before the softirq handler | 
					
						
							| 
									
										
										
										
											2010-10-19 15:00:13 +02:00
										 |  |  |  * @vec_nr:  softirq vector number | 
					
						
							| 
									
										
										
										
											2009-11-26 15:04:31 +08:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2010-10-19 15:00:13 +02:00
										 |  |  |  * When used in combination with the softirq_exit tracepoint | 
					
						
							|  |  |  |  * we can determine the softirq handler runtine. | 
					
						
							| 
									
										
										
										
											2009-11-26 15:04:31 +08:00
										 |  |  |  */ | 
					
						
							|  |  |  | DEFINE_EVENT(softirq, softirq_entry, | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-10-19 15:00:13 +02:00
										 |  |  | 	TP_PROTO(unsigned int vec_nr), | 
					
						
							| 
									
										
										
										
											2009-11-26 15:04:31 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-10-19 15:00:13 +02:00
										 |  |  | 	TP_ARGS(vec_nr) | 
					
						
							| 
									
										
										
										
											2009-11-26 15:04:31 +08:00
										 |  |  | ); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-04-30 13:29:47 -04:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * softirq_exit - called immediately after the softirq handler returns | 
					
						
							| 
									
										
										
										
											2010-10-19 15:00:13 +02:00
										 |  |  |  * @vec_nr:  softirq vector number | 
					
						
							| 
									
										
										
										
											2009-04-30 13:29:47 -04:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2010-10-19 15:00:13 +02:00
										 |  |  |  * When used in combination with the softirq_entry tracepoint | 
					
						
							|  |  |  |  * we can determine the softirq handler runtine. | 
					
						
							| 
									
										
										
										
											2009-04-30 13:29:47 -04:00
										 |  |  |  */ | 
					
						
							| 
									
										
										
										
											2009-11-26 15:04:31 +08:00
										 |  |  | DEFINE_EVENT(softirq, softirq_exit, | 
					
						
							| 
									
										
										
										
											2009-04-24 11:26:55 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-10-19 15:00:13 +02:00
										 |  |  | 	TP_PROTO(unsigned int vec_nr), | 
					
						
							| 
									
										
										
										
											2009-04-24 11:26:55 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-10-19 15:00:13 +02:00
										 |  |  | 	TP_ARGS(vec_nr) | 
					
						
							| 
									
										
										
										
											2009-04-24 11:26:55 -04:00
										 |  |  | ); | 
					
						
							| 
									
										
										
										
											2009-02-26 10:11:05 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-08-23 18:42:48 +09:00
										 |  |  | /**
 | 
					
						
							|  |  |  |  * softirq_raise - called immediately when a softirq is raised | 
					
						
							| 
									
										
										
										
											2010-10-19 15:00:13 +02:00
										 |  |  |  * @vec_nr:  softirq vector number | 
					
						
							| 
									
										
										
										
											2010-08-23 18:42:48 +09:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2010-10-19 15:00:13 +02:00
										 |  |  |  * When used in combination with the softirq_entry tracepoint | 
					
						
							|  |  |  |  * we can determine the softirq raise to run latency. | 
					
						
							| 
									
										
										
										
											2010-08-23 18:42:48 +09:00
										 |  |  |  */ | 
					
						
							|  |  |  | DEFINE_EVENT(softirq, softirq_raise, | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-10-19 15:00:13 +02:00
										 |  |  | 	TP_PROTO(unsigned int vec_nr), | 
					
						
							| 
									
										
										
										
											2010-08-23 18:42:48 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-10-19 15:00:13 +02:00
										 |  |  | 	TP_ARGS(vec_nr) | 
					
						
							| 
									
										
										
										
											2010-08-23 18:42:48 +09:00
										 |  |  | ); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-04-10 09:36:00 -04:00
										 |  |  | #endif /*  _TRACE_IRQ_H */
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* This part must be outside protection */ | 
					
						
							|  |  |  | #include <trace/define_trace.h>
 |