cpuops: Use cmpxchg for xchg to avoid lock semantics
Use cmpxchg instead of xchg to realize this_cpu_xchg. xchg will cause LOCK overhead since LOCK is always implied but cmpxchg will not. Baselines: xchg() = 18 cycles (no segment prefix, LOCK semantics) __this_cpu_xchg = 1 cycle (simulated using this_cpu_read/write, two prefixes. Looks like the cpu can use loop optimization to get rid of most of the overhead) Cycles before: this_cpu_xchg = 37 cycles (segment prefix and LOCK (implied by xchg)) After: this_cpu_xchg = 11 cycle (using cmpxchg without lock semantics) Signed-off-by: Christoph Lameter <cl@linux.com> Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
		
					parent
					
						
							
								7296e08aba
							
						
					
				
			
			
				commit
				
					
						8270137a0d
					
				
			
		
					 1 changed files with 15 additions and 6 deletions
				
			
		| 
						 | 
					@ -263,8 +263,9 @@ do {									\
 | 
				
			||||||
})
 | 
					})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * Beware: xchg on x86 has an implied lock prefix. There will be the cost of
 | 
					 * xchg is implemented using cmpxchg without a lock prefix. xchg is
 | 
				
			||||||
 * full lock semantics even though they are not needed.
 | 
					 * expensive due to the implied lock prefix.  The processor cannot prefetch
 | 
				
			||||||
 | 
					 * cachelines if xchg is used.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
#define percpu_xchg_op(var, nval)					\
 | 
					#define percpu_xchg_op(var, nval)					\
 | 
				
			||||||
({									\
 | 
					({									\
 | 
				
			||||||
| 
						 | 
					@ -272,25 +273,33 @@ do {									\
 | 
				
			||||||
	typeof(var) pxo_new__ = (nval);					\
 | 
						typeof(var) pxo_new__ = (nval);					\
 | 
				
			||||||
	switch (sizeof(var)) {						\
 | 
						switch (sizeof(var)) {						\
 | 
				
			||||||
	case 1:								\
 | 
						case 1:								\
 | 
				
			||||||
		asm("xchgb %2, "__percpu_arg(1)				\
 | 
							asm("\n1:mov "__percpu_arg(1)",%%al"			\
 | 
				
			||||||
 | 
							    "\n\tcmpxchgb %2, "__percpu_arg(1)			\
 | 
				
			||||||
 | 
							    "\n\tjnz 1b"					\
 | 
				
			||||||
			    : "=a" (pxo_ret__), "+m" (var)		\
 | 
								    : "=a" (pxo_ret__), "+m" (var)		\
 | 
				
			||||||
			    : "q" (pxo_new__)				\
 | 
								    : "q" (pxo_new__)				\
 | 
				
			||||||
			    : "memory");				\
 | 
								    : "memory");				\
 | 
				
			||||||
		break;							\
 | 
							break;							\
 | 
				
			||||||
	case 2:								\
 | 
						case 2:								\
 | 
				
			||||||
		asm("xchgw %2, "__percpu_arg(1)				\
 | 
							asm("\n1:mov "__percpu_arg(1)",%%ax"			\
 | 
				
			||||||
 | 
							    "\n\tcmpxchgw %2, "__percpu_arg(1)			\
 | 
				
			||||||
 | 
							    "\n\tjnz 1b"					\
 | 
				
			||||||
			    : "=a" (pxo_ret__), "+m" (var)		\
 | 
								    : "=a" (pxo_ret__), "+m" (var)		\
 | 
				
			||||||
			    : "r" (pxo_new__)				\
 | 
								    : "r" (pxo_new__)				\
 | 
				
			||||||
			    : "memory");				\
 | 
								    : "memory");				\
 | 
				
			||||||
		break;							\
 | 
							break;							\
 | 
				
			||||||
	case 4:								\
 | 
						case 4:								\
 | 
				
			||||||
		asm("xchgl %2, "__percpu_arg(1)				\
 | 
							asm("\n1:mov "__percpu_arg(1)",%%eax"			\
 | 
				
			||||||
 | 
							    "\n\tcmpxchgl %2, "__percpu_arg(1)			\
 | 
				
			||||||
 | 
							    "\n\tjnz 1b"					\
 | 
				
			||||||
			    : "=a" (pxo_ret__), "+m" (var)		\
 | 
								    : "=a" (pxo_ret__), "+m" (var)		\
 | 
				
			||||||
			    : "r" (pxo_new__)				\
 | 
								    : "r" (pxo_new__)				\
 | 
				
			||||||
			    : "memory");				\
 | 
								    : "memory");				\
 | 
				
			||||||
		break;							\
 | 
							break;							\
 | 
				
			||||||
	case 8:								\
 | 
						case 8:								\
 | 
				
			||||||
		asm("xchgq %2, "__percpu_arg(1)				\
 | 
							asm("\n1:mov "__percpu_arg(1)",%%rax"			\
 | 
				
			||||||
 | 
							    "\n\tcmpxchgq %2, "__percpu_arg(1)			\
 | 
				
			||||||
 | 
							    "\n\tjnz 1b"					\
 | 
				
			||||||
			    : "=a" (pxo_ret__), "+m" (var)		\
 | 
								    : "=a" (pxo_ret__), "+m" (var)		\
 | 
				
			||||||
			    : "r" (pxo_new__)				\
 | 
								    : "r" (pxo_new__)				\
 | 
				
			||||||
			    : "memory");				\
 | 
								    : "memory");				\
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue