percpu: add optimized generic percpu accessors
It is an optimization and a cleanup, and adds the following new generic percpu methods: percpu_read() percpu_write() percpu_add() percpu_sub() percpu_and() percpu_or() percpu_xor() and implements support for them on x86. (other architectures will fall back to a default implementation) The advantage is that for example to read a local percpu variable, instead of this sequence: return __get_cpu_var(var); ffffffff8102ca2b: 48 8b 14 fd 80 09 74 mov -0x7e8bf680(,%rdi,8),%rdx ffffffff8102ca32: 81 ffffffff8102ca33: 48 c7 c0 d8 59 00 00 mov $0x59d8,%rax ffffffff8102ca3a: 48 8b 04 10 mov (%rax,%rdx,1),%rax We can get a single instruction by using the optimized variants: return percpu_read(var); ffffffff8102ca3f: 65 48 8b 05 91 8f fd mov %gs:0x7efd8f91(%rip),%rax I also cleaned up the x86-specific APIs and made the x86 code use these new generic percpu primitives. tj: * fixed generic percpu_sub() definition as Roel Kluin pointed out * added percpu_and() for completeness's sake * made generic percpu ops atomic against preemption Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
parent
004aa322f8
commit
6dbde35308
15 changed files with 102 additions and 48 deletions
|
@ -40,16 +40,11 @@
|
|||
|
||||
#ifdef CONFIG_SMP
|
||||
#define __percpu_seg_str "%%"__stringify(__percpu_seg)":"
|
||||
#define __my_cpu_offset x86_read_percpu(this_cpu_off)
|
||||
#define __my_cpu_offset percpu_read(this_cpu_off)
|
||||
#else
|
||||
#define __percpu_seg_str
|
||||
#endif
|
||||
|
||||
#include <asm-generic/percpu.h>
|
||||
|
||||
/* We can use this directly for local CPU (faster). */
|
||||
DECLARE_PER_CPU(unsigned long, this_cpu_off);
|
||||
|
||||
/* For arch-specific code, we can use direct single-insn ops (they
|
||||
* don't give an lvalue though). */
|
||||
extern void __bad_percpu_size(void);
|
||||
|
@ -115,11 +110,13 @@ do { \
|
|||
ret__; \
|
||||
})
|
||||
|
||||
#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var)
|
||||
#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val)
|
||||
#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val)
|
||||
#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val)
|
||||
#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val)
|
||||
#define percpu_read(var) percpu_from_op("mov", per_cpu__##var)
|
||||
#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val)
|
||||
#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val)
|
||||
#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val)
|
||||
#define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val)
|
||||
#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val)
|
||||
#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val)
|
||||
|
||||
/* This is not atomic against other CPUs -- CPU preemption needs to be off */
|
||||
#define x86_test_and_clear_bit_percpu(bit, var) \
|
||||
|
@ -131,6 +128,11 @@ do { \
|
|||
old__; \
|
||||
})
|
||||
|
||||
#include <asm-generic/percpu.h>
|
||||
|
||||
/* We can use this directly for local CPU (faster). */
|
||||
DECLARE_PER_CPU(unsigned long, this_cpu_off);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
extern void load_pda_offset(int cpu);
|
||||
#else
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue