 45cac65b0f
			
		
	
	
	45cac65b0f
	
	
	
		
			
			.fault now can retry. The retry can break state machine of .fault. In filemap_fault, if page is miss, ra->mmap_miss is increased. In the second try, since the page is in page cache now, ra->mmap_miss is decreased. And these are done in one fault, so we can't detect random mmap file access. Add a new flag to indicate .fault is tried once. In the second try, skip ra->mmap_miss decreasing. The filemap_fault state machine is ok with it. I only tested x86, didn't test other archs, but looks the change for other archs is obvious, but who knows :) Signed-off-by: Shaohua Li <shaohua.li@fusionio.com> Cc: Rik van Riel <riel@redhat.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
		
			
				
	
	
		
			264 lines
		
	
	
	
		
			6.1 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			264 lines
		
	
	
	
		
			6.1 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * Copyright (C) 2004-2006 Atmel Corporation
 | |
|  *
 | |
|  * Based on linux/arch/sh/mm/fault.c:
 | |
|  *   Copyright (C) 1999  Niibe Yutaka
 | |
|  *
 | |
|  * This program is free software; you can redistribute it and/or modify
 | |
|  * it under the terms of the GNU General Public License version 2 as
 | |
|  * published by the Free Software Foundation.
 | |
|  */
 | |
| 
 | |
| #include <linux/mm.h>
 | |
| #include <linux/module.h>
 | |
| #include <linux/pagemap.h>
 | |
| #include <linux/kdebug.h>
 | |
| #include <linux/kprobes.h>
 | |
| 
 | |
| #include <asm/mmu_context.h>
 | |
| #include <asm/sysreg.h>
 | |
| #include <asm/tlb.h>
 | |
| #include <asm/uaccess.h>
 | |
| 
 | |
| #ifdef CONFIG_KPROBES
 | |
| static inline int notify_page_fault(struct pt_regs *regs, int trap)
 | |
| {
 | |
| 	int ret = 0;
 | |
| 
 | |
| 	if (!user_mode(regs)) {
 | |
| 		if (kprobe_running() && kprobe_fault_handler(regs, trap))
 | |
| 			ret = 1;
 | |
| 	}
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| #else
 | |
| static inline int notify_page_fault(struct pt_regs *regs, int trap)
 | |
| {
 | |
| 	return 0;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| int exception_trace = 1;
 | |
| 
 | |
| /*
 | |
|  * This routine handles page faults. It determines the address and the
 | |
|  * problem, and then passes it off to one of the appropriate routines.
 | |
|  *
 | |
|  * ecr is the Exception Cause Register. Possible values are:
 | |
|  *   6:  Protection fault (instruction access)
 | |
|  *   15: Protection fault (read access)
 | |
|  *   16: Protection fault (write access)
 | |
|  *   20: Page not found (instruction access)
 | |
|  *   24: Page not found (read access)
 | |
|  *   28: Page not found (write access)
 | |
|  */
 | |
| asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs)
 | |
| {
 | |
| 	struct task_struct *tsk;
 | |
| 	struct mm_struct *mm;
 | |
| 	struct vm_area_struct *vma;
 | |
| 	const struct exception_table_entry *fixup;
 | |
| 	unsigned long address;
 | |
| 	unsigned long page;
 | |
| 	long signr;
 | |
| 	int code;
 | |
| 	int fault;
 | |
| 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 | |
| 
 | |
| 	if (notify_page_fault(regs, ecr))
 | |
| 		return;
 | |
| 
 | |
| 	address = sysreg_read(TLBEAR);
 | |
| 
 | |
| 	tsk = current;
 | |
| 	mm = tsk->mm;
 | |
| 
 | |
| 	signr = SIGSEGV;
 | |
| 	code = SEGV_MAPERR;
 | |
| 
 | |
| 	/*
 | |
| 	 * If we're in an interrupt or have no user context, we must
 | |
| 	 * not take the fault...
 | |
| 	 */
 | |
| 	if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM))
 | |
| 		goto no_context;
 | |
| 
 | |
| 	local_irq_enable();
 | |
| 
 | |
| retry:
 | |
| 	down_read(&mm->mmap_sem);
 | |
| 
 | |
| 	vma = find_vma(mm, address);
 | |
| 	if (!vma)
 | |
| 		goto bad_area;
 | |
| 	if (vma->vm_start <= address)
 | |
| 		goto good_area;
 | |
| 	if (!(vma->vm_flags & VM_GROWSDOWN))
 | |
| 		goto bad_area;
 | |
| 	if (expand_stack(vma, address))
 | |
| 		goto bad_area;
 | |
| 
 | |
| 	/*
 | |
| 	 * Ok, we have a good vm_area for this memory access, so we
 | |
| 	 * can handle it...
 | |
| 	 */
 | |
| good_area:
 | |
| 	code = SEGV_ACCERR;
 | |
| 
 | |
| 	switch (ecr) {
 | |
| 	case ECR_PROTECTION_X:
 | |
| 	case ECR_TLB_MISS_X:
 | |
| 		if (!(vma->vm_flags & VM_EXEC))
 | |
| 			goto bad_area;
 | |
| 		break;
 | |
| 	case ECR_PROTECTION_R:
 | |
| 	case ECR_TLB_MISS_R:
 | |
| 		if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
 | |
| 			goto bad_area;
 | |
| 		break;
 | |
| 	case ECR_PROTECTION_W:
 | |
| 	case ECR_TLB_MISS_W:
 | |
| 		if (!(vma->vm_flags & VM_WRITE))
 | |
| 			goto bad_area;
 | |
| 		flags |= FAULT_FLAG_WRITE;
 | |
| 		break;
 | |
| 	default:
 | |
| 		panic("Unhandled case %lu in do_page_fault!", ecr);
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * If for any reason at all we couldn't handle the fault, make
 | |
| 	 * sure we exit gracefully rather than endlessly redo the
 | |
| 	 * fault.
 | |
| 	 */
 | |
| 	fault = handle_mm_fault(mm, vma, address, flags);
 | |
| 
 | |
| 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 | |
| 		return;
 | |
| 
 | |
| 	if (unlikely(fault & VM_FAULT_ERROR)) {
 | |
| 		if (fault & VM_FAULT_OOM)
 | |
| 			goto out_of_memory;
 | |
| 		else if (fault & VM_FAULT_SIGBUS)
 | |
| 			goto do_sigbus;
 | |
| 		BUG();
 | |
| 	}
 | |
| 
 | |
| 	if (flags & FAULT_FLAG_ALLOW_RETRY) {
 | |
| 		if (fault & VM_FAULT_MAJOR)
 | |
| 			tsk->maj_flt++;
 | |
| 		else
 | |
| 			tsk->min_flt++;
 | |
| 		if (fault & VM_FAULT_RETRY) {
 | |
| 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
 | |
| 			flags |= FAULT_FLAG_TRIED;
 | |
| 
 | |
| 			/*
 | |
| 			 * No need to up_read(&mm->mmap_sem) as we would have
 | |
| 			 * already released it in __lock_page_or_retry() in
 | |
| 			 * mm/filemap.c.
 | |
| 			 */
 | |
| 			goto retry;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	up_read(&mm->mmap_sem);
 | |
| 	return;
 | |
| 
 | |
| 	/*
 | |
| 	 * Something tried to access memory that isn't in our memory
 | |
| 	 * map. Fix it, but check if it's kernel or user first...
 | |
| 	 */
 | |
| bad_area:
 | |
| 	up_read(&mm->mmap_sem);
 | |
| 
 | |
| 	if (user_mode(regs)) {
 | |
| 		if (exception_trace && printk_ratelimit())
 | |
| 			printk("%s%s[%d]: segfault at %08lx pc %08lx "
 | |
| 			       "sp %08lx ecr %lu\n",
 | |
| 			       is_global_init(tsk) ? KERN_EMERG : KERN_INFO,
 | |
| 			       tsk->comm, tsk->pid, address, regs->pc,
 | |
| 			       regs->sp, ecr);
 | |
| 		_exception(SIGSEGV, regs, code, address);
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| no_context:
 | |
| 	/* Are we prepared to handle this kernel fault? */
 | |
| 	fixup = search_exception_tables(regs->pc);
 | |
| 	if (fixup) {
 | |
| 		regs->pc = fixup->fixup;
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * Oops. The kernel tried to access some bad page. We'll have
 | |
| 	 * to terminate things with extreme prejudice.
 | |
| 	 */
 | |
| 	if (address < PAGE_SIZE)
 | |
| 		printk(KERN_ALERT
 | |
| 		       "Unable to handle kernel NULL pointer dereference");
 | |
| 	else
 | |
| 		printk(KERN_ALERT
 | |
| 		       "Unable to handle kernel paging request");
 | |
| 	printk(" at virtual address %08lx\n", address);
 | |
| 
 | |
| 	page = sysreg_read(PTBR);
 | |
| 	printk(KERN_ALERT "ptbr = %08lx", page);
 | |
| 	if (address >= TASK_SIZE)
 | |
| 		page = (unsigned long)swapper_pg_dir;
 | |
| 	if (page) {
 | |
| 		page = ((unsigned long *)page)[address >> 22];
 | |
| 		printk(" pgd = %08lx", page);
 | |
| 		if (page & _PAGE_PRESENT) {
 | |
| 			page &= PAGE_MASK;
 | |
| 			address &= 0x003ff000;
 | |
| 			page = ((unsigned long *)__va(page))[address >> PAGE_SHIFT];
 | |
| 			printk(" pte = %08lx", page);
 | |
| 		}
 | |
| 	}
 | |
| 	printk("\n");
 | |
| 	die("Kernel access of bad area", regs, signr);
 | |
| 	return;
 | |
| 
 | |
| 	/*
 | |
| 	 * We ran out of memory, or some other thing happened to us
 | |
| 	 * that made us unable to handle the page fault gracefully.
 | |
| 	 */
 | |
| out_of_memory:
 | |
| 	up_read(&mm->mmap_sem);
 | |
| 	pagefault_out_of_memory();
 | |
| 	if (!user_mode(regs))
 | |
| 		goto no_context;
 | |
| 	return;
 | |
| 
 | |
| do_sigbus:
 | |
| 	up_read(&mm->mmap_sem);
 | |
| 
 | |
| 	/* Kernel mode? Handle exceptions or die */
 | |
| 	signr = SIGBUS;
 | |
| 	code = BUS_ADRERR;
 | |
| 	if (!user_mode(regs))
 | |
| 		goto no_context;
 | |
| 
 | |
| 	if (exception_trace)
 | |
| 		printk("%s%s[%d]: bus error at %08lx pc %08lx "
 | |
| 		       "sp %08lx ecr %lu\n",
 | |
| 		       is_global_init(tsk) ? KERN_EMERG : KERN_INFO,
 | |
| 		       tsk->comm, tsk->pid, address, regs->pc,
 | |
| 		       regs->sp, ecr);
 | |
| 
 | |
| 	_exception(SIGBUS, regs, BUS_ADRERR, address);
 | |
| }
 | |
| 
 | |
| asmlinkage void do_bus_error(unsigned long addr, int write_access,
 | |
| 			     struct pt_regs *regs)
 | |
| {
 | |
| 	printk(KERN_ALERT
 | |
| 	       "Bus error at physical address 0x%08lx (%s access)\n",
 | |
| 	       addr, write_access ? "write" : "read");
 | |
| 	printk(KERN_INFO "DTLB dump:\n");
 | |
| 	dump_dtlb();
 | |
| 	die("Bus Error", regs, SIGKILL);
 | |
| }
 |