 f862eefec0
			
		
	
	
	f862eefec0
	
	
	
		
			
			It turns out the kernel relies on barrier() to force a reload of the
percpu offset value.  Since we can't easily modify the definition of
barrier() to include "tp" as an output register, we instead provide a
definition of __my_cpu_offset as extended assembly that includes a fake
stack read to hazard against barrier(), forcing gcc to know that it
must reread "tp" and recompute anything based on "tp" after a barrier.
This fixes observed hangs in the slub allocator when we are looping
on a percpu cmpxchg_double.
A similar fix for ARMv7 was made in June in change 509eb76ebf.
Cc: stable@vger.kernel.org
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
		
	
			
		
			
				
	
	
		
			52 lines
		
	
	
	
		
			1.8 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			52 lines
		
	
	
	
		
			1.8 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * Copyright 2010 Tilera Corporation. All Rights Reserved.
 | |
|  *
 | |
|  *   This program is free software; you can redistribute it and/or
 | |
|  *   modify it under the terms of the GNU General Public License
 | |
|  *   as published by the Free Software Foundation, version 2.
 | |
|  *
 | |
|  *   This program is distributed in the hope that it will be useful, but
 | |
|  *   WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|  *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
 | |
|  *   NON INFRINGEMENT.  See the GNU General Public License for
 | |
|  *   more details.
 | |
|  */
 | |
| 
 | |
| #ifndef _ASM_TILE_PERCPU_H
 | |
| #define _ASM_TILE_PERCPU_H
 | |
| 
 | |
| register unsigned long my_cpu_offset_reg asm("tp");
 | |
| 
 | |
| #ifdef CONFIG_PREEMPT
 | |
| /*
 | |
|  * For full preemption, we can't just use the register variable
 | |
|  * directly, since we need barrier() to hazard against it, causing the
 | |
|  * compiler to reload anything computed from a previous "tp" value.
 | |
|  * But we also don't want to use volatile asm, since we'd like the
 | |
|  * compiler to be able to cache the value across multiple percpu reads.
 | |
|  * So we use a fake stack read as a hazard against barrier().
 | |
|  * The 'U' constraint is like 'm' but disallows postincrement.
 | |
|  */
 | |
| static inline unsigned long __my_cpu_offset(void)
 | |
| {
 | |
| 	unsigned long tp;
 | |
| 	register unsigned long *sp asm("sp");
 | |
| 	asm("move %0, tp" : "=r" (tp) : "U" (*sp));
 | |
| 	return tp;
 | |
| }
 | |
| #define __my_cpu_offset __my_cpu_offset()
 | |
| #else
 | |
| /*
 | |
|  * We don't need to hazard against barrier() since "tp" doesn't ever
 | |
|  * change with PREEMPT_NONE, and with PREEMPT_VOLUNTARY it only
 | |
|  * changes at function call points, at which we are already re-reading
 | |
|  * the value of "tp" due to "my_cpu_offset_reg" being a global variable.
 | |
|  */
 | |
| #define __my_cpu_offset my_cpu_offset_reg
 | |
| #endif
 | |
| 
 | |
| #define set_my_cpu_offset(tp) (my_cpu_offset_reg = (tp))
 | |
| 
 | |
| #include <asm-generic/percpu.h>
 | |
| 
 | |
| #endif /* _ASM_TILE_PERCPU_H */
 |