 ea7145477a
			
		
	
	
	ea7145477a
	
	
	
		
			
			Put x86 entry code into a separate link section: .entry.text.
Separating the entry text section seems to have performance
benefits - caused by more efficient instruction cache usage.
Running hackbench with perf stat --repeat showed that the change
compresses the icache footprint. The icache load miss rate went
down by about 15%:
 before patch:
         19417627  L1-icache-load-misses      ( +-   0.147% )
 after patch:
         16490788  L1-icache-load-misses      ( +-   0.180% )
The motivation of the patch was to fix a particular kprobes
bug that relates to the entry text section, the performance
advantage was discovered accidentally.
Whole perf output follows:
 - results for current tip tree:
  Performance counter stats for './hackbench/hackbench 10' (500 runs):
         19417627  L1-icache-load-misses      ( +-   0.147% )
       2676914223  instructions             #      0.497 IPC     ( +- 0.079% )
       5389516026  cycles                     ( +-   0.144% )
      0.206267711  seconds time elapsed   ( +-   0.138% )
 - results for current tip tree with the patch applied:
  Performance counter stats for './hackbench/hackbench 10' (500 runs):
         16490788  L1-icache-load-misses      ( +-   0.180% )
       2717734941  instructions             #      0.502 IPC     ( +- 0.079% )
       5414756975  cycles                     ( +-   0.148% )
      0.206747566  seconds time elapsed   ( +-   0.137% )
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: masami.hiramatsu.pt@hitachi.com
Cc: ananth@in.ibm.com
Cc: davem@davemloft.net
Cc: 2nddept-manager@sdl.hitachi.co.jp
LKML-Reference: <20110307181039.GB15197@jolsa.redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
		
	
			
		
			
				
	
	
		
			43 lines
		
	
	
	
		
			1.2 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			43 lines
		
	
	
	
		
			1.2 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
| #ifndef _ASM_GENERIC_SECTIONS_H_
 | |
| #define _ASM_GENERIC_SECTIONS_H_
 | |
| 
 | |
| /* References to section boundaries */
 | |
| 
 | |
| extern char _text[], _stext[], _etext[];
 | |
| extern char _data[], _sdata[], _edata[];
 | |
| extern char __bss_start[], __bss_stop[];
 | |
| extern char __init_begin[], __init_end[];
 | |
| extern char _sinittext[], _einittext[];
 | |
| extern char _end[];
 | |
| extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
 | |
| extern char __kprobes_text_start[], __kprobes_text_end[];
 | |
| extern char __entry_text_start[], __entry_text_end[];
 | |
| extern char __initdata_begin[], __initdata_end[];
 | |
| extern char __start_rodata[], __end_rodata[];
 | |
| 
 | |
| /* Start and end of .ctors section - used for constructor calls. */
 | |
| extern char __ctors_start[], __ctors_end[];
 | |
| 
 | |
| /* function descriptor handling (if any).  Override
 | |
|  * in asm/sections.h */
 | |
| #ifndef dereference_function_descriptor
 | |
| #define dereference_function_descriptor(p) (p)
 | |
| #endif
 | |
| 
 | |
| /* random extra sections (if any).  Override
 | |
|  * in asm/sections.h */
 | |
| #ifndef arch_is_kernel_text
 | |
| static inline int arch_is_kernel_text(unsigned long addr)
 | |
| {
 | |
| 	return 0;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #ifndef arch_is_kernel_data
 | |
| static inline int arch_is_kernel_data(unsigned long addr)
 | |
| {
 | |
| 	return 0;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #endif /* _ASM_GENERIC_SECTIONS_H_ */
 |