While I played with my own feature(ex, something on the way to reclaim), the kernel would easily oops. I guessed that the reason had to do with stack overflow and wanted to prove it. I discovered the stack tracer which proved to be very useful for me but the kernel would oops before my user program gather the information via "watch cat /sys/kernel/debug/tracing/stack_trace" so I couldn't get any message from that. What I needed was to have the stack tracer emit the kernel stack usage before it does the oops so I could find what was hogging the stack. This patch shows the callstack of max stack usage right before an oops so we can find a culprit. So, the result is as follows. [ 1116.522206] init: lightdm main process (1246) terminated with status 1 [ 1119.922916] init: failsafe-x main process (1272) terminated with status 1 [ 3887.728131] kworker/u24:1 (6637) used greatest stack depth: 256 bytes left [ 6397.629227] cc1 (9554) used greatest stack depth: 128 bytes left [ 7174.467392] Depth Size Location (47 entries) [ 7174.467392] ----- ---- -------- [ 7174.467785] 0) 7248 256 get_page_from_freelist+0xa7/0x920 [ 7174.468506] 1) 6992 352 __alloc_pages_nodemask+0x1cd/0xb20 [ 7174.469224] 2) 6640 8 alloc_pages_current+0x10f/0x1f0 [ 7174.469413] 3) 6632 168 new_slab+0x2c5/0x370 [ 7174.469413] 4) 6464 8 __slab_alloc+0x3a9/0x501 [ 7174.469413] 5) 6456 80 __kmalloc+0x1cb/0x200 [ 7174.469413] 6) 6376 376 vring_add_indirect+0x36/0x200 [ 7174.469413] 7) 6000 144 virtqueue_add_sgs+0x2e2/0x320 [ 7174.469413] 8) 5856 288 __virtblk_add_req+0xda/0x1b0 [ 7174.469413] 9) 5568 96 virtio_queue_rq+0xd3/0x1d0 [ 7174.469413] 10) 5472 128 __blk_mq_run_hw_queue+0x1ef/0x440 [ 7174.469413] 11) 5344 16 blk_mq_run_hw_queue+0x35/0x40 [ 7174.469413] 12) 5328 96 blk_mq_insert_requests+0xdb/0x160 [ 7174.469413] 13) 5232 112 blk_mq_flush_plug_list+0x12b/0x140 [ 7174.469413] 14) 5120 112 blk_flush_plug_list+0xc7/0x220 [ 7174.469413] 15) 5008 64 io_schedule_timeout+0x88/0x100 [ 7174.469413] 16) 4944 128 mempool_alloc+0x145/0x170 [ 7174.469413] 17) 4816 96 bio_alloc_bioset+0x10b/0x1d0 [ 7174.469413] 18) 4720 48 get_swap_bio+0x30/0x90 [ 7174.469413] 19) 4672 160 __swap_writepage+0x150/0x230 [ 7174.469413] 20) 4512 32 swap_writepage+0x42/0x90 [ 7174.469413] 21) 4480 320 shrink_page_list+0x676/0xa80 [ 7174.469413] 22) 4160 208 shrink_inactive_list+0x262/0x4e0 [ 7174.469413] 23) 3952 304 shrink_lruvec+0x3e1/0x6a0 [ 7174.469413] 24) 3648 80 shrink_zone+0x3f/0x110 [ 7174.469413] 25) 3568 128 do_try_to_free_pages+0x156/0x4c0 [ 7174.469413] 26) 3440 208 try_to_free_pages+0xf7/0x1e0 [ 7174.469413] 27) 3232 352 __alloc_pages_nodemask+0x783/0xb20 [ 7174.469413] 28) 2880 8 alloc_pages_current+0x10f/0x1f0 [ 7174.469413] 29) 2872 200 __page_cache_alloc+0x13f/0x160 [ 7174.469413] 30) 2672 80 find_or_create_page+0x4c/0xb0 [ 7174.469413] 31) 2592 80 ext4_mb_load_buddy+0x1e9/0x370 [ 7174.469413] 32) 2512 176 ext4_mb_regular_allocator+0x1b7/0x460 [ 7174.469413] 33) 2336 128 ext4_mb_new_blocks+0x458/0x5f0 [ 7174.469413] 34) 2208 256 ext4_ext_map_blocks+0x70b/0x1010 [ 7174.469413] 35) 1952 160 ext4_map_blocks+0x325/0x530 [ 7174.469413] 36) 1792 384 ext4_writepages+0x6d1/0xce0 [ 7174.469413] 37) 1408 16 do_writepages+0x23/0x40 [ 7174.469413] 38) 1392 96 __writeback_single_inode+0x45/0x2e0 [ 7174.469413] 39) 1296 176 writeback_sb_inodes+0x2ad/0x500 [ 7174.469413] 40) 1120 80 __writeback_inodes_wb+0x9e/0xd0 [ 7174.469413] 41) 1040 160 wb_writeback+0x29b/0x350 [ 7174.469413] 42) 880 208 bdi_writeback_workfn+0x11c/0x480 [ 7174.469413] 43) 672 144 process_one_work+0x1d2/0x570 [ 7174.469413] 44) 528 112 worker_thread+0x116/0x370 [ 7174.469413] 45) 416 240 kthread+0xf3/0x110 [ 7174.469413] 46) 176 176 ret_from_fork+0x7c/0xb0 [ 7174.469413] ------------[ cut here ]------------ [ 7174.469413] kernel BUG at kernel/trace/trace_stack.c:174! [ 7174.469413] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC [ 7174.469413] Dumping ftrace buffer: [ 7174.469413] (ftrace buffer empty) [ 7174.469413] Modules linked in: [ 7174.469413] CPU: 0 PID: 440 Comm: kworker/u24:0 Not tainted 3.14.0+ #212 [ 7174.469413] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 7174.469413] Workqueue: writeback bdi_writeback_workfn (flush-253:0) [ 7174.469413] task: ffff880034170000 ti: ffff880029518000 task.ti: ffff880029518000 [ 7174.469413] RIP: 0010:[<ffffffff8112336e>] [<ffffffff8112336e>] stack_trace_call+0x2de/0x340 [ 7174.469413] RSP: 0000:ffff880029518290 EFLAGS: 00010046 [ 7174.469413] RAX: 0000000000000030 RBX: 000000000000002f RCX: 0000000000000000 [ 7174.469413] RDX: 0000000000000000 RSI: 000000000000002f RDI: ffffffff810b7159 [ 7174.469413] RBP: ffff8800295182f0 R08: ffffffffffffffff R09: 0000000000000000 [ 7174.469413] R10: 0000000000000001 R11: 0000000000000001 R12: ffffffff82768dfc [ 7174.469413] R13: 000000000000f2e8 R14: ffff8800295182b8 R15: 00000000000000f8 [ 7174.469413] FS: 0000000000000000(0000) GS:ffff880037c00000(0000) knlGS:0000000000000000 [ 7174.469413] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 7174.469413] CR2: 00002acd0b994000 CR3: 0000000001c0b000 CR4: 00000000000006f0 [ 7174.469413] Stack: [ 7174.469413] 0000000000000000 ffffffff8114fdb7 0000000000000087 0000000000001c50 [ 7174.469413] 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 7174.469413] 0000000000000002 ffff880034170000 ffff880034171028 0000000000000000 [ 7174.469413] Call Trace: [ 7174.469413] [<ffffffff8114fdb7>] ? get_page_from_freelist+0xa7/0x920 [ 7174.469413] [<ffffffff816eee3f>] ftrace_call+0x5/0x2f [ 7174.469413] [<ffffffff81165065>] ? next_zones_zonelist+0x5/0x70 [ 7174.469413] [<ffffffff810a23fa>] ? __bfs+0x11a/0x270 [ 7174.469413] [<ffffffff81165065>] ? next_zones_zonelist+0x5/0x70 [ 7174.469413] [<ffffffff8114fdb7>] ? get_page_from_freelist+0xa7/0x920 [ 7174.469413] [<ffffffff8119092f>] ? alloc_pages_current+0x10f/0x1f0 [ 7174.469413] [<ffffffff811507fd>] __alloc_pages_nodemask+0x1cd/0xb20 [ 7174.469413] [<ffffffff810a4de6>] ? check_irq_usage+0x96/0xe0 [ 7174.469413] [<ffffffff816eee3f>] ? ftrace_call+0x5/0x2f [ 7174.469413] [<ffffffff8119092f>] alloc_pages_current+0x10f/0x1f0 [ 7174.469413] [<ffffffff81199cd5>] ? new_slab+0x2c5/0x370 [ 7174.469413] [<ffffffff81199cd5>] new_slab+0x2c5/0x370 [ 7174.469413] [<ffffffff816eee3f>] ? ftrace_call+0x5/0x2f [ 7174.469413] [<ffffffff816db002>] __slab_alloc+0x3a9/0x501 [ 7174.469413] [<ffffffff8119af8b>] ? __kmalloc+0x1cb/0x200 [ 7174.469413] [<ffffffff8141dc46>] ? vring_add_indirect+0x36/0x200 [ 7174.469413] [<ffffffff8141dc46>] ? vring_add_indirect+0x36/0x200 [ 7174.469413] [<ffffffff8141dc46>] ? vring_add_indirect+0x36/0x200 [ 7174.469413] [<ffffffff8119af8b>] __kmalloc+0x1cb/0x200 [ 7174.469413] [<ffffffff8141de10>] ? vring_add_indirect+0x200/0x200 [ 7174.469413] [<ffffffff8141dc46>] vring_add_indirect+0x36/0x200 [ 7174.469413] [<ffffffff8141e402>] virtqueue_add_sgs+0x2e2/0x320 [ 7174.469413] [<ffffffff8148e35a>] __virtblk_add_req+0xda/0x1b0 [ 7174.469413] [<ffffffff8148e503>] virtio_queue_rq+0xd3/0x1d0 [ 7174.469413] [<ffffffff8134aa0f>] __blk_mq_run_hw_queue+0x1ef/0x440 [ 7174.469413] [<ffffffff8134b0d5>] blk_mq_run_hw_queue+0x35/0x40 [ 7174.469413] [<ffffffff8134b7bb>] blk_mq_insert_requests+0xdb/0x160 [ 7174.469413] [<ffffffff8134be5b>] blk_mq_flush_plug_list+0x12b/0x140 [ 7174.469413] [<ffffffff81342237>] blk_flush_plug_list+0xc7/0x220 [ 7174.469413] [<ffffffff816e60ef>] ? _raw_spin_unlock_irqrestore+0x3f/0x70 [ 7174.469413] [<ffffffff816e16e8>] io_schedule_timeout+0x88/0x100 [ 7174.469413] [<ffffffff816e1665>] ? io_schedule_timeout+0x5/0x100 [ 7174.469413] [<ffffffff81149415>] mempool_alloc+0x145/0x170 [ 7174.469413] [<ffffffff8109baf0>] ? __init_waitqueue_head+0x60/0x60 [ 7174.469413] [<ffffffff811e246b>] bio_alloc_bioset+0x10b/0x1d0 [ 7174.469413] [<ffffffff81184230>] ? end_swap_bio_read+0xc0/0xc0 [ 7174.469413] [<ffffffff81184230>] ? end_swap_bio_read+0xc0/0xc0 [ 7174.469413] [<ffffffff81184110>] get_swap_bio+0x30/0x90 [ 7174.469413] [<ffffffff81184230>] ? end_swap_bio_read+0xc0/0xc0 [ 7174.469413] [<ffffffff81184660>] __swap_writepage+0x150/0x230 [ 7174.469413] [<ffffffff810ab405>] ? do_raw_spin_unlock+0x5/0xa0 [ 7174.469413] [<ffffffff81184230>] ? end_swap_bio_read+0xc0/0xc0 [ 7174.469413] [<ffffffff81184515>] ? __swap_writepage+0x5/0x230 [ 7174.469413] [<ffffffff81184782>] swap_writepage+0x42/0x90 [ 7174.469413] [<ffffffff8115ae96>] shrink_page_list+0x676/0xa80 [ 7174.469413] [<ffffffff816eee3f>] ? ftrace_call+0x5/0x2f [ 7174.469413] [<ffffffff8115b872>] shrink_inactive_list+0x262/0x4e0 [ 7174.469413] [<ffffffff8115c1c1>] shrink_lruvec+0x3e1/0x6a0 [ 7174.469413] [<ffffffff8115c4bf>] shrink_zone+0x3f/0x110 [ 7174.469413] [<ffffffff816eee3f>] ? ftrace_call+0x5/0x2f [ 7174.469413] [<ffffffff8115c9e6>] do_try_to_free_pages+0x156/0x4c0 [ 7174.469413] [<ffffffff8115cf47>] try_to_free_pages+0xf7/0x1e0 [ 7174.469413] [<ffffffff81150db3>] __alloc_pages_nodemask+0x783/0xb20 [ 7174.469413] [<ffffffff8119092f>] alloc_pages_current+0x10f/0x1f0 [ 7174.469413] [<ffffffff81145c0f>] ? __page_cache_alloc+0x13f/0x160 [ 7174.469413] [<ffffffff81145c0f>] __page_cache_alloc+0x13f/0x160 [ 7174.469413] [<ffffffff81146c6c>] find_or_create_page+0x4c/0xb0 [ 7174.469413] [<ffffffff811463e5>] ? find_get_page+0x5/0x130 [ 7174.469413] [<ffffffff812837b9>] ext4_mb_load_buddy+0x1e9/0x370 [ 7174.469413] [<ffffffff81284c07>] ext4_mb_regular_allocator+0x1b7/0x460 [ 7174.469413] [<ffffffff81281070>] ? ext4_mb_use_preallocated+0x40/0x360 [ 7174.469413] [<ffffffff816eee3f>] ? ftrace_call+0x5/0x2f [ 7174.469413] [<ffffffff81287eb8>] ext4_mb_new_blocks+0x458/0x5f0 [ 7174.469413] [<ffffffff8127d83b>] ext4_ext_map_blocks+0x70b/0x1010 [ 7174.469413] [<ffffffff8124e6d5>] ext4_map_blocks+0x325/0x530 [ 7174.469413] [<ffffffff81253871>] ext4_writepages+0x6d1/0xce0 [ 7174.469413] [<ffffffff812531a0>] ? ext4_journalled_write_end+0x330/0x330 [ 7174.469413] [<ffffffff811539b3>] do_writepages+0x23/0x40 [ 7174.469413] [<ffffffff811d2365>] __writeback_single_inode+0x45/0x2e0 [ 7174.469413] [<ffffffff811d36ed>] writeback_sb_inodes+0x2ad/0x500 [ 7174.469413] [<ffffffff811d39de>] __writeback_inodes_wb+0x9e/0xd0 [ 7174.469413] [<ffffffff811d40bb>] wb_writeback+0x29b/0x350 [ 7174.469413] [<ffffffff81057c3d>] ? __local_bh_enable_ip+0x6d/0xd0 [ 7174.469413] [<ffffffff811d6e9c>] bdi_writeback_workfn+0x11c/0x480 [ 7174.469413] [<ffffffff81070610>] ? process_one_work+0x170/0x570 [ 7174.469413] [<ffffffff81070672>] process_one_work+0x1d2/0x570 [ 7174.469413] [<ffffffff81070610>] ? process_one_work+0x170/0x570 [ 7174.469413] [<ffffffff81071bb6>] worker_thread+0x116/0x370 [ 7174.469413] [<ffffffff81071aa0>] ? manage_workers.isra.19+0x2e0/0x2e0 [ 7174.469413] [<ffffffff81078e53>] kthread+0xf3/0x110 [ 7174.469413] [<ffffffff81078d60>] ? flush_kthread_worker+0x150/0x150 [ 7174.469413] [<ffffffff816ef0ec>] ret_from_fork+0x7c/0xb0 [ 7174.469413] [<ffffffff81078d60>] ? flush_kthread_worker+0x150/0x150 [ 7174.469413] Code: c0 49 bc fc 8d 76 82 ff ff ff ff e8 44 5a 5b 00 31 f6 8b 05 95 2b b3 00 48 39 c6 7d 0e 4c 8b 04 f5 20 5f c5 81 49 83 f8 ff 75 11 <0f> 0b 48 63 05 71 5a 64 01 48 29 c3 e9 d0 fd ff ff 48 8d 5e 01 [ 7174.469413] RIP [<ffffffff8112336e>] stack_trace_call+0x2de/0x340 [ 7174.469413] RSP <ffff880029518290> [ 7174.469413] ---[ end trace c97d325b36b718f3 ]--- Link: http://lkml.kernel.org/p/1401683592-1651-1-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
		
			
				
	
	
		
			488 lines
		
	
	
	
		
			11 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			488 lines
		
	
	
	
		
			11 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
 | 
						|
 *
 | 
						|
 */
 | 
						|
#include <linux/stacktrace.h>
 | 
						|
#include <linux/kallsyms.h>
 | 
						|
#include <linux/seq_file.h>
 | 
						|
#include <linux/spinlock.h>
 | 
						|
#include <linux/uaccess.h>
 | 
						|
#include <linux/debugfs.h>
 | 
						|
#include <linux/ftrace.h>
 | 
						|
#include <linux/module.h>
 | 
						|
#include <linux/sysctl.h>
 | 
						|
#include <linux/init.h>
 | 
						|
#include <linux/fs.h>
 | 
						|
#include <linux/magic.h>
 | 
						|
 | 
						|
#include <asm/setup.h>
 | 
						|
 | 
						|
#include "trace.h"
 | 
						|
 | 
						|
#define STACK_TRACE_ENTRIES 500
 | 
						|
 | 
						|
#ifdef CC_USING_FENTRY
 | 
						|
# define fentry		1
 | 
						|
#else
 | 
						|
# define fentry		0
 | 
						|
#endif
 | 
						|
 | 
						|
static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
 | 
						|
	 { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
 | 
						|
static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
 | 
						|
 | 
						|
/*
 | 
						|
 * Reserve one entry for the passed in ip. This will allow
 | 
						|
 * us to remove most or all of the stack size overhead
 | 
						|
 * added by the stack tracer itself.
 | 
						|
 */
 | 
						|
static struct stack_trace max_stack_trace = {
 | 
						|
	.max_entries		= STACK_TRACE_ENTRIES - 1,
 | 
						|
	.entries		= &stack_dump_trace[1],
 | 
						|
};
 | 
						|
 | 
						|
static unsigned long max_stack_size;
 | 
						|
static arch_spinlock_t max_stack_lock =
 | 
						|
	(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
 | 
						|
 | 
						|
static DEFINE_PER_CPU(int, trace_active);
 | 
						|
static DEFINE_MUTEX(stack_sysctl_mutex);
 | 
						|
 | 
						|
int stack_tracer_enabled;
 | 
						|
static int last_stack_tracer_enabled;
 | 
						|
 | 
						|
static inline void print_max_stack(void)
 | 
						|
{
 | 
						|
	long i;
 | 
						|
	int size;
 | 
						|
 | 
						|
	pr_emerg("        Depth    Size   Location    (%d entries)\n"
 | 
						|
			   "        -----    ----   --------\n",
 | 
						|
			   max_stack_trace.nr_entries - 1);
 | 
						|
 | 
						|
	for (i = 0; i < max_stack_trace.nr_entries; i++) {
 | 
						|
		if (stack_dump_trace[i] == ULONG_MAX)
 | 
						|
			break;
 | 
						|
		if (i+1 == max_stack_trace.nr_entries ||
 | 
						|
				stack_dump_trace[i+1] == ULONG_MAX)
 | 
						|
			size = stack_dump_index[i];
 | 
						|
		else
 | 
						|
			size = stack_dump_index[i] - stack_dump_index[i+1];
 | 
						|
 | 
						|
		pr_emerg("%3ld) %8d   %5d   %pS\n", i, stack_dump_index[i],
 | 
						|
				size, (void *)stack_dump_trace[i]);
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
static inline void
 | 
						|
check_stack(unsigned long ip, unsigned long *stack)
 | 
						|
{
 | 
						|
	unsigned long this_size, flags; unsigned long *p, *top, *start;
 | 
						|
	static int tracer_frame;
 | 
						|
	int frame_size = ACCESS_ONCE(tracer_frame);
 | 
						|
	int i;
 | 
						|
 | 
						|
	this_size = ((unsigned long)stack) & (THREAD_SIZE-1);
 | 
						|
	this_size = THREAD_SIZE - this_size;
 | 
						|
	/* Remove the frame of the tracer */
 | 
						|
	this_size -= frame_size;
 | 
						|
 | 
						|
	if (this_size <= max_stack_size)
 | 
						|
		return;
 | 
						|
 | 
						|
	/* we do not handle interrupt stacks yet */
 | 
						|
	if (!object_is_on_stack(stack))
 | 
						|
		return;
 | 
						|
 | 
						|
	local_irq_save(flags);
 | 
						|
	arch_spin_lock(&max_stack_lock);
 | 
						|
 | 
						|
	/* In case another CPU set the tracer_frame on us */
 | 
						|
	if (unlikely(!frame_size))
 | 
						|
		this_size -= tracer_frame;
 | 
						|
 | 
						|
	/* a race could have already updated it */
 | 
						|
	if (this_size <= max_stack_size)
 | 
						|
		goto out;
 | 
						|
 | 
						|
	max_stack_size = this_size;
 | 
						|
 | 
						|
	max_stack_trace.nr_entries = 0;
 | 
						|
 | 
						|
	if (using_ftrace_ops_list_func())
 | 
						|
		max_stack_trace.skip = 4;
 | 
						|
	else
 | 
						|
		max_stack_trace.skip = 3;
 | 
						|
 | 
						|
	save_stack_trace(&max_stack_trace);
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Add the passed in ip from the function tracer.
 | 
						|
	 * Searching for this on the stack will skip over
 | 
						|
	 * most of the overhead from the stack tracer itself.
 | 
						|
	 */
 | 
						|
	stack_dump_trace[0] = ip;
 | 
						|
	max_stack_trace.nr_entries++;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Now find where in the stack these are.
 | 
						|
	 */
 | 
						|
	i = 0;
 | 
						|
	start = stack;
 | 
						|
	top = (unsigned long *)
 | 
						|
		(((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Loop through all the entries. One of the entries may
 | 
						|
	 * for some reason be missed on the stack, so we may
 | 
						|
	 * have to account for them. If they are all there, this
 | 
						|
	 * loop will only happen once. This code only takes place
 | 
						|
	 * on a new max, so it is far from a fast path.
 | 
						|
	 */
 | 
						|
	while (i < max_stack_trace.nr_entries) {
 | 
						|
		int found = 0;
 | 
						|
 | 
						|
		stack_dump_index[i] = this_size;
 | 
						|
		p = start;
 | 
						|
 | 
						|
		for (; p < top && i < max_stack_trace.nr_entries; p++) {
 | 
						|
			if (*p == stack_dump_trace[i]) {
 | 
						|
				this_size = stack_dump_index[i++] =
 | 
						|
					(top - p) * sizeof(unsigned long);
 | 
						|
				found = 1;
 | 
						|
				/* Start the search from here */
 | 
						|
				start = p + 1;
 | 
						|
				/*
 | 
						|
				 * We do not want to show the overhead
 | 
						|
				 * of the stack tracer stack in the
 | 
						|
				 * max stack. If we haven't figured
 | 
						|
				 * out what that is, then figure it out
 | 
						|
				 * now.
 | 
						|
				 */
 | 
						|
				if (unlikely(!tracer_frame) && i == 1) {
 | 
						|
					tracer_frame = (p - stack) *
 | 
						|
						sizeof(unsigned long);
 | 
						|
					max_stack_size -= tracer_frame;
 | 
						|
				}
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		if (!found)
 | 
						|
			i++;
 | 
						|
	}
 | 
						|
 | 
						|
	if ((current != &init_task &&
 | 
						|
		*(end_of_stack(current)) != STACK_END_MAGIC)) {
 | 
						|
		print_max_stack();
 | 
						|
		BUG();
 | 
						|
	}
 | 
						|
 | 
						|
 out:
 | 
						|
	arch_spin_unlock(&max_stack_lock);
 | 
						|
	local_irq_restore(flags);
 | 
						|
}
 | 
						|
 | 
						|
static void
 | 
						|
stack_trace_call(unsigned long ip, unsigned long parent_ip,
 | 
						|
		 struct ftrace_ops *op, struct pt_regs *pt_regs)
 | 
						|
{
 | 
						|
	unsigned long stack;
 | 
						|
	int cpu;
 | 
						|
 | 
						|
	preempt_disable_notrace();
 | 
						|
 | 
						|
	cpu = raw_smp_processor_id();
 | 
						|
	/* no atomic needed, we only modify this variable by this cpu */
 | 
						|
	if (per_cpu(trace_active, cpu)++ != 0)
 | 
						|
		goto out;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * When fentry is used, the traced function does not get
 | 
						|
	 * its stack frame set up, and we lose the parent.
 | 
						|
	 * The ip is pretty useless because the function tracer
 | 
						|
	 * was called before that function set up its stack frame.
 | 
						|
	 * In this case, we use the parent ip.
 | 
						|
	 *
 | 
						|
	 * By adding the return address of either the parent ip
 | 
						|
	 * or the current ip we can disregard most of the stack usage
 | 
						|
	 * caused by the stack tracer itself.
 | 
						|
	 *
 | 
						|
	 * The function tracer always reports the address of where the
 | 
						|
	 * mcount call was, but the stack will hold the return address.
 | 
						|
	 */
 | 
						|
	if (fentry)
 | 
						|
		ip = parent_ip;
 | 
						|
	else
 | 
						|
		ip += MCOUNT_INSN_SIZE;
 | 
						|
 | 
						|
	check_stack(ip, &stack);
 | 
						|
 | 
						|
 out:
 | 
						|
	per_cpu(trace_active, cpu)--;
 | 
						|
	/* prevent recursion in schedule */
 | 
						|
	preempt_enable_notrace();
 | 
						|
}
 | 
						|
 | 
						|
static struct ftrace_ops trace_ops __read_mostly =
 | 
						|
{
 | 
						|
	.func = stack_trace_call,
 | 
						|
	.flags = FTRACE_OPS_FL_RECURSION_SAFE,
 | 
						|
};
 | 
						|
 | 
						|
static ssize_t
 | 
						|
stack_max_size_read(struct file *filp, char __user *ubuf,
 | 
						|
		    size_t count, loff_t *ppos)
 | 
						|
{
 | 
						|
	unsigned long *ptr = filp->private_data;
 | 
						|
	char buf[64];
 | 
						|
	int r;
 | 
						|
 | 
						|
	r = snprintf(buf, sizeof(buf), "%ld\n", *ptr);
 | 
						|
	if (r > sizeof(buf))
 | 
						|
		r = sizeof(buf);
 | 
						|
	return simple_read_from_buffer(ubuf, count, ppos, buf, r);
 | 
						|
}
 | 
						|
 | 
						|
static ssize_t
 | 
						|
stack_max_size_write(struct file *filp, const char __user *ubuf,
 | 
						|
		     size_t count, loff_t *ppos)
 | 
						|
{
 | 
						|
	long *ptr = filp->private_data;
 | 
						|
	unsigned long val, flags;
 | 
						|
	int ret;
 | 
						|
	int cpu;
 | 
						|
 | 
						|
	ret = kstrtoul_from_user(ubuf, count, 10, &val);
 | 
						|
	if (ret)
 | 
						|
		return ret;
 | 
						|
 | 
						|
	local_irq_save(flags);
 | 
						|
 | 
						|
	/*
 | 
						|
	 * In case we trace inside arch_spin_lock() or after (NMI),
 | 
						|
	 * we will cause circular lock, so we also need to increase
 | 
						|
	 * the percpu trace_active here.
 | 
						|
	 */
 | 
						|
	cpu = smp_processor_id();
 | 
						|
	per_cpu(trace_active, cpu)++;
 | 
						|
 | 
						|
	arch_spin_lock(&max_stack_lock);
 | 
						|
	*ptr = val;
 | 
						|
	arch_spin_unlock(&max_stack_lock);
 | 
						|
 | 
						|
	per_cpu(trace_active, cpu)--;
 | 
						|
	local_irq_restore(flags);
 | 
						|
 | 
						|
	return count;
 | 
						|
}
 | 
						|
 | 
						|
static const struct file_operations stack_max_size_fops = {
 | 
						|
	.open		= tracing_open_generic,
 | 
						|
	.read		= stack_max_size_read,
 | 
						|
	.write		= stack_max_size_write,
 | 
						|
	.llseek		= default_llseek,
 | 
						|
};
 | 
						|
 | 
						|
static void *
 | 
						|
__next(struct seq_file *m, loff_t *pos)
 | 
						|
{
 | 
						|
	long n = *pos - 1;
 | 
						|
 | 
						|
	if (n >= max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX)
 | 
						|
		return NULL;
 | 
						|
 | 
						|
	m->private = (void *)n;
 | 
						|
	return &m->private;
 | 
						|
}
 | 
						|
 | 
						|
static void *
 | 
						|
t_next(struct seq_file *m, void *v, loff_t *pos)
 | 
						|
{
 | 
						|
	(*pos)++;
 | 
						|
	return __next(m, pos);
 | 
						|
}
 | 
						|
 | 
						|
static void *t_start(struct seq_file *m, loff_t *pos)
 | 
						|
{
 | 
						|
	int cpu;
 | 
						|
 | 
						|
	local_irq_disable();
 | 
						|
 | 
						|
	cpu = smp_processor_id();
 | 
						|
	per_cpu(trace_active, cpu)++;
 | 
						|
 | 
						|
	arch_spin_lock(&max_stack_lock);
 | 
						|
 | 
						|
	if (*pos == 0)
 | 
						|
		return SEQ_START_TOKEN;
 | 
						|
 | 
						|
	return __next(m, pos);
 | 
						|
}
 | 
						|
 | 
						|
static void t_stop(struct seq_file *m, void *p)
 | 
						|
{
 | 
						|
	int cpu;
 | 
						|
 | 
						|
	arch_spin_unlock(&max_stack_lock);
 | 
						|
 | 
						|
	cpu = smp_processor_id();
 | 
						|
	per_cpu(trace_active, cpu)--;
 | 
						|
 | 
						|
	local_irq_enable();
 | 
						|
}
 | 
						|
 | 
						|
static int trace_lookup_stack(struct seq_file *m, long i)
 | 
						|
{
 | 
						|
	unsigned long addr = stack_dump_trace[i];
 | 
						|
 | 
						|
	return seq_printf(m, "%pS\n", (void *)addr);
 | 
						|
}
 | 
						|
 | 
						|
static void print_disabled(struct seq_file *m)
 | 
						|
{
 | 
						|
	seq_puts(m, "#\n"
 | 
						|
		 "#  Stack tracer disabled\n"
 | 
						|
		 "#\n"
 | 
						|
		 "# To enable the stack tracer, either add 'stacktrace' to the\n"
 | 
						|
		 "# kernel command line\n"
 | 
						|
		 "# or 'echo 1 > /proc/sys/kernel/stack_tracer_enabled'\n"
 | 
						|
		 "#\n");
 | 
						|
}
 | 
						|
 | 
						|
static int t_show(struct seq_file *m, void *v)
 | 
						|
{
 | 
						|
	long i;
 | 
						|
	int size;
 | 
						|
 | 
						|
	if (v == SEQ_START_TOKEN) {
 | 
						|
		seq_printf(m, "        Depth    Size   Location"
 | 
						|
			   "    (%d entries)\n"
 | 
						|
			   "        -----    ----   --------\n",
 | 
						|
			   max_stack_trace.nr_entries - 1);
 | 
						|
 | 
						|
		if (!stack_tracer_enabled && !max_stack_size)
 | 
						|
			print_disabled(m);
 | 
						|
 | 
						|
		return 0;
 | 
						|
	}
 | 
						|
 | 
						|
	i = *(long *)v;
 | 
						|
 | 
						|
	if (i >= max_stack_trace.nr_entries ||
 | 
						|
	    stack_dump_trace[i] == ULONG_MAX)
 | 
						|
		return 0;
 | 
						|
 | 
						|
	if (i+1 == max_stack_trace.nr_entries ||
 | 
						|
	    stack_dump_trace[i+1] == ULONG_MAX)
 | 
						|
		size = stack_dump_index[i];
 | 
						|
	else
 | 
						|
		size = stack_dump_index[i] - stack_dump_index[i+1];
 | 
						|
 | 
						|
	seq_printf(m, "%3ld) %8d   %5d   ", i, stack_dump_index[i], size);
 | 
						|
 | 
						|
	trace_lookup_stack(m, i);
 | 
						|
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
static const struct seq_operations stack_trace_seq_ops = {
 | 
						|
	.start		= t_start,
 | 
						|
	.next		= t_next,
 | 
						|
	.stop		= t_stop,
 | 
						|
	.show		= t_show,
 | 
						|
};
 | 
						|
 | 
						|
static int stack_trace_open(struct inode *inode, struct file *file)
 | 
						|
{
 | 
						|
	return seq_open(file, &stack_trace_seq_ops);
 | 
						|
}
 | 
						|
 | 
						|
static const struct file_operations stack_trace_fops = {
 | 
						|
	.open		= stack_trace_open,
 | 
						|
	.read		= seq_read,
 | 
						|
	.llseek		= seq_lseek,
 | 
						|
	.release	= seq_release,
 | 
						|
};
 | 
						|
 | 
						|
static int
 | 
						|
stack_trace_filter_open(struct inode *inode, struct file *file)
 | 
						|
{
 | 
						|
	return ftrace_regex_open(&trace_ops, FTRACE_ITER_FILTER,
 | 
						|
				 inode, file);
 | 
						|
}
 | 
						|
 | 
						|
static const struct file_operations stack_trace_filter_fops = {
 | 
						|
	.open = stack_trace_filter_open,
 | 
						|
	.read = seq_read,
 | 
						|
	.write = ftrace_filter_write,
 | 
						|
	.llseek = tracing_lseek,
 | 
						|
	.release = ftrace_regex_release,
 | 
						|
};
 | 
						|
 | 
						|
int
 | 
						|
stack_trace_sysctl(struct ctl_table *table, int write,
 | 
						|
		   void __user *buffer, size_t *lenp,
 | 
						|
		   loff_t *ppos)
 | 
						|
{
 | 
						|
	int ret;
 | 
						|
 | 
						|
	mutex_lock(&stack_sysctl_mutex);
 | 
						|
 | 
						|
	ret = proc_dointvec(table, write, buffer, lenp, ppos);
 | 
						|
 | 
						|
	if (ret || !write ||
 | 
						|
	    (last_stack_tracer_enabled == !!stack_tracer_enabled))
 | 
						|
		goto out;
 | 
						|
 | 
						|
	last_stack_tracer_enabled = !!stack_tracer_enabled;
 | 
						|
 | 
						|
	if (stack_tracer_enabled)
 | 
						|
		register_ftrace_function(&trace_ops);
 | 
						|
	else
 | 
						|
		unregister_ftrace_function(&trace_ops);
 | 
						|
 | 
						|
 out:
 | 
						|
	mutex_unlock(&stack_sysctl_mutex);
 | 
						|
	return ret;
 | 
						|
}
 | 
						|
 | 
						|
static char stack_trace_filter_buf[COMMAND_LINE_SIZE+1] __initdata;
 | 
						|
 | 
						|
static __init int enable_stacktrace(char *str)
 | 
						|
{
 | 
						|
	if (strncmp(str, "_filter=", 8) == 0)
 | 
						|
		strncpy(stack_trace_filter_buf, str+8, COMMAND_LINE_SIZE);
 | 
						|
 | 
						|
	stack_tracer_enabled = 1;
 | 
						|
	last_stack_tracer_enabled = 1;
 | 
						|
	return 1;
 | 
						|
}
 | 
						|
__setup("stacktrace", enable_stacktrace);
 | 
						|
 | 
						|
static __init int stack_trace_init(void)
 | 
						|
{
 | 
						|
	struct dentry *d_tracer;
 | 
						|
 | 
						|
	d_tracer = tracing_init_dentry();
 | 
						|
	if (!d_tracer)
 | 
						|
		return 0;
 | 
						|
 | 
						|
	trace_create_file("stack_max_size", 0644, d_tracer,
 | 
						|
			&max_stack_size, &stack_max_size_fops);
 | 
						|
 | 
						|
	trace_create_file("stack_trace", 0444, d_tracer,
 | 
						|
			NULL, &stack_trace_fops);
 | 
						|
 | 
						|
	trace_create_file("stack_trace_filter", 0444, d_tracer,
 | 
						|
			NULL, &stack_trace_filter_fops);
 | 
						|
 | 
						|
	if (stack_trace_filter_buf[0])
 | 
						|
		ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);
 | 
						|
 | 
						|
	if (stack_tracer_enabled)
 | 
						|
		register_ftrace_function(&trace_ops);
 | 
						|
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
device_initcall(stack_trace_init);
 |