 ae01f84b93
			
		
	
	
	ae01f84b93
	
	
	
		
			
			Now we dynamically allocate the paca array, it takes an extra load
whenever we want to access another cpu's paca. One place we do that a lot
is per cpu variables. A simple example:
DEFINE_PER_CPU(unsigned long, vara);
unsigned long test4(int cpu)
{
	return per_cpu(vara, cpu);
}
This takes 4 loads, 5 if you include the actual load of the per cpu variable:
    ld r11,-32760(r30)  # load address of paca pointer
    ld r9,-32768(r30)   # load link address of percpu variable
    sldi r3,r29,9       # get offset into paca (each entry is 512 bytes)
    ld r0,0(r11)        # load paca pointer
    add r3,r0,r3        # paca + offset
    ld r11,64(r3)       # load paca[cpu].data_offset
    ldx r3,r9,r11       # load per cpu variable
If we remove the ppc64 specific per_cpu_offset(), we get the generic one
which indexes into a statically allocated array. This removes one load and
one add:
    ld r11,-32760(r30)  # load address of __per_cpu_offset
    ld r9,-32768(r30)   # load link address of percpu variable
    sldi r3,r29,3       # get offset into __per_cpu_offset (each entry 8 bytes)
    ldx r11,r11,r3      # load __per_cpu_offset[cpu]
    ldx r3,r9,r11       # load per cpu variable
Having all the offsets in one array also helps when iterating over a per cpu
variable across a number of cpus, such as in the scheduler. Before we would
need to load one paca cacheline when calculating each per cpu offset. Now we
have 16 (128 / sizeof(long)) per cpu offsets in each cacheline.
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
		
	
			
		
			
				
	
	
		
			21 lines
		
	
	
	
		
			429 B
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			21 lines
		
	
	
	
		
			429 B
			
		
	
	
	
		
			C
		
	
	
	
	
	
| #ifndef _ASM_POWERPC_PERCPU_H_
 | |
| #define _ASM_POWERPC_PERCPU_H_
 | |
| #ifdef __powerpc64__
 | |
| 
 | |
| /*
 | |
|  * Same as asm-generic/percpu.h, except that we store the per cpu offset
 | |
|  * in the paca. Based on the x86-64 implementation.
 | |
|  */
 | |
| 
 | |
| #ifdef CONFIG_SMP
 | |
| 
 | |
| #include <asm/paca.h>
 | |
| 
 | |
| #define __my_cpu_offset local_paca->data_offset
 | |
| 
 | |
| #endif /* CONFIG_SMP */
 | |
| #endif /* __powerpc64__ */
 | |
| 
 | |
| #include <asm-generic/percpu.h>
 | |
| 
 | |
| #endif /* _ASM_POWERPC_PERCPU_H_ */
 |