The inline path seems to have changed the SLAB behavior for very large
kmalloc allocations with  commit e3366016 ("slab: Use common
kmalloc_index/kmalloc_size functions"). This patch restores the old
behavior but also adds diagnostics so that we can figure where in the
code these large allocations occur.
Reported-and-tested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Christoph Lameter <cl@linux.com>
Link: http://lkml.kernel.org/r/201305040348.CIF81716.OStQOHFJMFLOVF@I-love.SAKURA.ne.jp
[ penberg@kernel.org: use WARN_ON_ONCE ]
Signed-off-by: Pekka Enberg <penberg@kernel.org>
		
	
			
		
			
				
	
	
		
			197 lines
		
	
	
	
		
			4.6 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			197 lines
		
	
	
	
		
			4.6 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
#ifndef _LINUX_SLAB_DEF_H
 | 
						|
#define	_LINUX_SLAB_DEF_H
 | 
						|
 | 
						|
/*
 | 
						|
 * Definitions unique to the original Linux SLAB allocator.
 | 
						|
 *
 | 
						|
 * What we provide here is a way to optimize the frequent kmalloc
 | 
						|
 * calls in the kernel by selecting the appropriate general cache
 | 
						|
 * if kmalloc was called with a size that can be established at
 | 
						|
 * compile time.
 | 
						|
 */
 | 
						|
 | 
						|
#include <linux/init.h>
 | 
						|
#include <linux/compiler.h>
 | 
						|
 | 
						|
/*
 | 
						|
 * struct kmem_cache
 | 
						|
 *
 | 
						|
 * manages a cache.
 | 
						|
 */
 | 
						|
 | 
						|
struct kmem_cache {
 | 
						|
/* 1) Cache tunables. Protected by cache_chain_mutex */
 | 
						|
	unsigned int batchcount;
 | 
						|
	unsigned int limit;
 | 
						|
	unsigned int shared;
 | 
						|
 | 
						|
	unsigned int size;
 | 
						|
	u32 reciprocal_buffer_size;
 | 
						|
/* 2) touched by every alloc & free from the backend */
 | 
						|
 | 
						|
	unsigned int flags;		/* constant flags */
 | 
						|
	unsigned int num;		/* # of objs per slab */
 | 
						|
 | 
						|
/* 3) cache_grow/shrink */
 | 
						|
	/* order of pgs per slab (2^n) */
 | 
						|
	unsigned int gfporder;
 | 
						|
 | 
						|
	/* force GFP flags, e.g. GFP_DMA */
 | 
						|
	gfp_t allocflags;
 | 
						|
 | 
						|
	size_t colour;			/* cache colouring range */
 | 
						|
	unsigned int colour_off;	/* colour offset */
 | 
						|
	struct kmem_cache *slabp_cache;
 | 
						|
	unsigned int slab_size;
 | 
						|
 | 
						|
	/* constructor func */
 | 
						|
	void (*ctor)(void *obj);
 | 
						|
 | 
						|
/* 4) cache creation/removal */
 | 
						|
	const char *name;
 | 
						|
	struct list_head list;
 | 
						|
	int refcount;
 | 
						|
	int object_size;
 | 
						|
	int align;
 | 
						|
 | 
						|
/* 5) statistics */
 | 
						|
#ifdef CONFIG_DEBUG_SLAB
 | 
						|
	unsigned long num_active;
 | 
						|
	unsigned long num_allocations;
 | 
						|
	unsigned long high_mark;
 | 
						|
	unsigned long grown;
 | 
						|
	unsigned long reaped;
 | 
						|
	unsigned long errors;
 | 
						|
	unsigned long max_freeable;
 | 
						|
	unsigned long node_allocs;
 | 
						|
	unsigned long node_frees;
 | 
						|
	unsigned long node_overflow;
 | 
						|
	atomic_t allochit;
 | 
						|
	atomic_t allocmiss;
 | 
						|
	atomic_t freehit;
 | 
						|
	atomic_t freemiss;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * If debugging is enabled, then the allocator can add additional
 | 
						|
	 * fields and/or padding to every object. size contains the total
 | 
						|
	 * object size including these internal fields, the following two
 | 
						|
	 * variables contain the offset to the user object and its size.
 | 
						|
	 */
 | 
						|
	int obj_offset;
 | 
						|
#endif /* CONFIG_DEBUG_SLAB */
 | 
						|
#ifdef CONFIG_MEMCG_KMEM
 | 
						|
	struct memcg_cache_params *memcg_params;
 | 
						|
#endif
 | 
						|
 | 
						|
/* 6) per-cpu/per-node data, touched during every alloc/free */
 | 
						|
	/*
 | 
						|
	 * We put array[] at the end of kmem_cache, because we want to size
 | 
						|
	 * this array to nr_cpu_ids slots instead of NR_CPUS
 | 
						|
	 * (see kmem_cache_init())
 | 
						|
	 * We still use [NR_CPUS] and not [1] or [0] because cache_cache
 | 
						|
	 * is statically defined, so we reserve the max number of cpus.
 | 
						|
	 *
 | 
						|
	 * We also need to guarantee that the list is able to accomodate a
 | 
						|
	 * pointer for each node since "nodelists" uses the remainder of
 | 
						|
	 * available pointers.
 | 
						|
	 */
 | 
						|
	struct kmem_cache_node **node;
 | 
						|
	struct array_cache *array[NR_CPUS + MAX_NUMNODES];
 | 
						|
	/*
 | 
						|
	 * Do not add fields after array[]
 | 
						|
	 */
 | 
						|
};
 | 
						|
 | 
						|
void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
 | 
						|
void *__kmalloc(size_t size, gfp_t flags);
 | 
						|
 | 
						|
#ifdef CONFIG_TRACING
 | 
						|
extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t);
 | 
						|
#else
 | 
						|
static __always_inline void *
 | 
						|
kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
 | 
						|
{
 | 
						|
	return kmem_cache_alloc(cachep, flags);
 | 
						|
}
 | 
						|
#endif
 | 
						|
 | 
						|
static __always_inline void *kmalloc(size_t size, gfp_t flags)
 | 
						|
{
 | 
						|
	struct kmem_cache *cachep;
 | 
						|
	void *ret;
 | 
						|
 | 
						|
	if (__builtin_constant_p(size)) {
 | 
						|
		int i;
 | 
						|
 | 
						|
		if (!size)
 | 
						|
			return ZERO_SIZE_PTR;
 | 
						|
 | 
						|
		if (WARN_ON_ONCE(size > KMALLOC_MAX_SIZE))
 | 
						|
			return NULL;
 | 
						|
 | 
						|
		i = kmalloc_index(size);
 | 
						|
 | 
						|
#ifdef CONFIG_ZONE_DMA
 | 
						|
		if (flags & GFP_DMA)
 | 
						|
			cachep = kmalloc_dma_caches[i];
 | 
						|
		else
 | 
						|
#endif
 | 
						|
			cachep = kmalloc_caches[i];
 | 
						|
 | 
						|
		ret = kmem_cache_alloc_trace(cachep, flags, size);
 | 
						|
 | 
						|
		return ret;
 | 
						|
	}
 | 
						|
	return __kmalloc(size, flags);
 | 
						|
}
 | 
						|
 | 
						|
#ifdef CONFIG_NUMA
 | 
						|
extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
 | 
						|
extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
 | 
						|
 | 
						|
#ifdef CONFIG_TRACING
 | 
						|
extern void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
 | 
						|
					 gfp_t flags,
 | 
						|
					 int nodeid,
 | 
						|
					 size_t size);
 | 
						|
#else
 | 
						|
static __always_inline void *
 | 
						|
kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
 | 
						|
			    gfp_t flags,
 | 
						|
			    int nodeid,
 | 
						|
			    size_t size)
 | 
						|
{
 | 
						|
	return kmem_cache_alloc_node(cachep, flags, nodeid);
 | 
						|
}
 | 
						|
#endif
 | 
						|
 | 
						|
static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 | 
						|
{
 | 
						|
	struct kmem_cache *cachep;
 | 
						|
 | 
						|
	if (__builtin_constant_p(size)) {
 | 
						|
		int i;
 | 
						|
 | 
						|
		if (!size)
 | 
						|
			return ZERO_SIZE_PTR;
 | 
						|
 | 
						|
		if (WARN_ON_ONCE(size > KMALLOC_MAX_SIZE))
 | 
						|
			return NULL;
 | 
						|
 | 
						|
		i = kmalloc_index(size);
 | 
						|
 | 
						|
#ifdef CONFIG_ZONE_DMA
 | 
						|
		if (flags & GFP_DMA)
 | 
						|
			cachep = kmalloc_dma_caches[i];
 | 
						|
		else
 | 
						|
#endif
 | 
						|
			cachep = kmalloc_caches[i];
 | 
						|
 | 
						|
		return kmem_cache_alloc_node_trace(cachep, flags, node, size);
 | 
						|
	}
 | 
						|
	return __kmalloc_node(size, flags, node);
 | 
						|
}
 | 
						|
 | 
						|
#endif	/* CONFIG_NUMA */
 | 
						|
 | 
						|
#endif	/* _LINUX_SLAB_DEF_H */
 |