 70ffdb9393
			
		
	
	
	70ffdb9393
	
	
	
		
			
			Introduce faulthandler_disabled() and use it to check for irq context and disabled pagefaults (via pagefault_disable()) in the pagefault handlers. Please note that we keep the in_atomic() checks in place - to detect whether in irq context (in which case preemption is always properly disabled). In contrast, preempt_disable() should never be used to disable pagefaults. With !CONFIG_PREEMPT_COUNT, preempt_disable() doesn't modify the preempt counter, and therefore the result of in_atomic() differs. We validate that condition by using might_fault() checks when calling might_sleep(). Therefore, add a comment to faulthandler_disabled(), describing why this is needed. faulthandler_disabled() and pagefault_disable() are defined in linux/uaccess.h, so let's properly add that include to all relevant files. This patch is based on a patch from Thomas Gleixner. Reviewed-and-tested-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: David.Laight@ACULAB.COM Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: airlied@linux.ie Cc: akpm@linux-foundation.org Cc: benh@kernel.crashing.org Cc: bigeasy@linutronix.de Cc: borntraeger@de.ibm.com Cc: daniel.vetter@intel.com Cc: heiko.carstens@de.ibm.com Cc: herbert@gondor.apana.org.au Cc: hocko@suse.cz Cc: hughd@google.com Cc: mst@redhat.com Cc: paulus@samba.org Cc: ralf@linux-mips.org Cc: schwidefsky@de.ibm.com Cc: yang.shi@windriver.com Link: http://lkml.kernel.org/r/1431359540-32227-7-git-send-email-dahi@linux.vnet.ibm.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
		
			
				
	
	
		
			468 lines
		
	
	
	
		
			11 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			468 lines
		
	
	
	
		
			11 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * fault.c:  Page fault handlers for the Sparc.
 | |
|  *
 | |
|  * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
 | |
|  * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
 | |
|  * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
 | |
|  */
 | |
| 
 | |
| #include <asm/head.h>
 | |
| 
 | |
| #include <linux/string.h>
 | |
| #include <linux/types.h>
 | |
| #include <linux/sched.h>
 | |
| #include <linux/ptrace.h>
 | |
| #include <linux/mman.h>
 | |
| #include <linux/threads.h>
 | |
| #include <linux/kernel.h>
 | |
| #include <linux/signal.h>
 | |
| #include <linux/mm.h>
 | |
| #include <linux/smp.h>
 | |
| #include <linux/perf_event.h>
 | |
| #include <linux/interrupt.h>
 | |
| #include <linux/kdebug.h>
 | |
| #include <linux/uaccess.h>
 | |
| 
 | |
| #include <asm/page.h>
 | |
| #include <asm/pgtable.h>
 | |
| #include <asm/openprom.h>
 | |
| #include <asm/oplib.h>
 | |
| #include <asm/setup.h>
 | |
| #include <asm/smp.h>
 | |
| #include <asm/traps.h>
 | |
| 
 | |
| #include "mm_32.h"
 | |
| 
 | |
| int show_unhandled_signals = 1;
 | |
| 
 | |
| static void __noreturn unhandled_fault(unsigned long address,
 | |
| 				       struct task_struct *tsk,
 | |
| 				       struct pt_regs *regs)
 | |
| {
 | |
| 	if ((unsigned long) address < PAGE_SIZE) {
 | |
| 		printk(KERN_ALERT
 | |
| 		    "Unable to handle kernel NULL pointer dereference\n");
 | |
| 	} else {
 | |
| 		printk(KERN_ALERT "Unable to handle kernel paging request at virtual address %08lx\n",
 | |
| 		       address);
 | |
| 	}
 | |
| 	printk(KERN_ALERT "tsk->{mm,active_mm}->context = %08lx\n",
 | |
| 		(tsk->mm ? tsk->mm->context : tsk->active_mm->context));
 | |
| 	printk(KERN_ALERT "tsk->{mm,active_mm}->pgd = %08lx\n",
 | |
| 		(tsk->mm ? (unsigned long) tsk->mm->pgd :
 | |
| 			(unsigned long) tsk->active_mm->pgd));
 | |
| 	die_if_kernel("Oops", regs);
 | |
| }
 | |
| 
 | |
| asmlinkage int lookup_fault(unsigned long pc, unsigned long ret_pc,
 | |
| 			    unsigned long address)
 | |
| {
 | |
| 	struct pt_regs regs;
 | |
| 	unsigned long g2;
 | |
| 	unsigned int insn;
 | |
| 	int i;
 | |
| 
 | |
| 	i = search_extables_range(ret_pc, &g2);
 | |
| 	switch (i) {
 | |
| 	case 3:
 | |
| 		/* load & store will be handled by fixup */
 | |
| 		return 3;
 | |
| 
 | |
| 	case 1:
 | |
| 		/* store will be handled by fixup, load will bump out */
 | |
| 		/* for _to_ macros */
 | |
| 		insn = *((unsigned int *) pc);
 | |
| 		if ((insn >> 21) & 1)
 | |
| 			return 1;
 | |
| 		break;
 | |
| 
 | |
| 	case 2:
 | |
| 		/* load will be handled by fixup, store will bump out */
 | |
| 		/* for _from_ macros */
 | |
| 		insn = *((unsigned int *) pc);
 | |
| 		if (!((insn >> 21) & 1) || ((insn>>19)&0x3f) == 15)
 | |
| 			return 2;
 | |
| 		break;
 | |
| 
 | |
| 	default:
 | |
| 		break;
 | |
| 	}
 | |
| 
 | |
| 	memset(®s, 0, sizeof(regs));
 | |
| 	regs.pc = pc;
 | |
| 	regs.npc = pc + 4;
 | |
| 	__asm__ __volatile__(
 | |
| 		"rd %%psr, %0\n\t"
 | |
| 		"nop\n\t"
 | |
| 		"nop\n\t"
 | |
| 		"nop\n" : "=r" (regs.psr));
 | |
| 	unhandled_fault(address, current, ®s);
 | |
| 
 | |
| 	/* Not reached */
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static inline void
 | |
| show_signal_msg(struct pt_regs *regs, int sig, int code,
 | |
| 		unsigned long address, struct task_struct *tsk)
 | |
| {
 | |
| 	if (!unhandled_signal(tsk, sig))
 | |
| 		return;
 | |
| 
 | |
| 	if (!printk_ratelimit())
 | |
| 		return;
 | |
| 
 | |
| 	printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
 | |
| 	       task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
 | |
| 	       tsk->comm, task_pid_nr(tsk), address,
 | |
| 	       (void *)regs->pc, (void *)regs->u_regs[UREG_I7],
 | |
| 	       (void *)regs->u_regs[UREG_FP], code);
 | |
| 
 | |
| 	print_vma_addr(KERN_CONT " in ", regs->pc);
 | |
| 
 | |
| 	printk(KERN_CONT "\n");
 | |
| }
 | |
| 
 | |
| static void __do_fault_siginfo(int code, int sig, struct pt_regs *regs,
 | |
| 			       unsigned long addr)
 | |
| {
 | |
| 	siginfo_t info;
 | |
| 
 | |
| 	info.si_signo = sig;
 | |
| 	info.si_code = code;
 | |
| 	info.si_errno = 0;
 | |
| 	info.si_addr = (void __user *) addr;
 | |
| 	info.si_trapno = 0;
 | |
| 
 | |
| 	if (unlikely(show_unhandled_signals))
 | |
| 		show_signal_msg(regs, sig, info.si_code,
 | |
| 				addr, current);
 | |
| 
 | |
| 	force_sig_info (sig, &info, current);
 | |
| }
 | |
| 
 | |
| static unsigned long compute_si_addr(struct pt_regs *regs, int text_fault)
 | |
| {
 | |
| 	unsigned int insn;
 | |
| 
 | |
| 	if (text_fault)
 | |
| 		return regs->pc;
 | |
| 
 | |
| 	if (regs->psr & PSR_PS)
 | |
| 		insn = *(unsigned int *) regs->pc;
 | |
| 	else
 | |
| 		__get_user(insn, (unsigned int *) regs->pc);
 | |
| 
 | |
| 	return safe_compute_effective_address(regs, insn);
 | |
| }
 | |
| 
 | |
| static noinline void do_fault_siginfo(int code, int sig, struct pt_regs *regs,
 | |
| 				      int text_fault)
 | |
| {
 | |
| 	unsigned long addr = compute_si_addr(regs, text_fault);
 | |
| 
 | |
| 	__do_fault_siginfo(code, sig, regs, addr);
 | |
| }
 | |
| 
 | |
| asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
 | |
| 			       unsigned long address)
 | |
| {
 | |
| 	struct vm_area_struct *vma;
 | |
| 	struct task_struct *tsk = current;
 | |
| 	struct mm_struct *mm = tsk->mm;
 | |
| 	unsigned int fixup;
 | |
| 	unsigned long g2;
 | |
| 	int from_user = !(regs->psr & PSR_PS);
 | |
| 	int fault, code;
 | |
| 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 | |
| 
 | |
| 	if (text_fault)
 | |
| 		address = regs->pc;
 | |
| 
 | |
| 	/*
 | |
| 	 * We fault-in kernel-space virtual memory on-demand. The
 | |
| 	 * 'reference' page table is init_mm.pgd.
 | |
| 	 *
 | |
| 	 * NOTE! We MUST NOT take any locks for this case. We may
 | |
| 	 * be in an interrupt or a critical region, and should
 | |
| 	 * only copy the information from the master page table,
 | |
| 	 * nothing more.
 | |
| 	 */
 | |
| 	code = SEGV_MAPERR;
 | |
| 	if (address >= TASK_SIZE)
 | |
| 		goto vmalloc_fault;
 | |
| 
 | |
| 	/*
 | |
| 	 * If we're in an interrupt or have no user
 | |
| 	 * context, we must not take the fault..
 | |
| 	 */
 | |
| 	if (pagefault_disabled() || !mm)
 | |
| 		goto no_context;
 | |
| 
 | |
| 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 | |
| 
 | |
| retry:
 | |
| 	down_read(&mm->mmap_sem);
 | |
| 
 | |
| 	if (!from_user && address >= PAGE_OFFSET)
 | |
| 		goto bad_area;
 | |
| 
 | |
| 	vma = find_vma(mm, address);
 | |
| 	if (!vma)
 | |
| 		goto bad_area;
 | |
| 	if (vma->vm_start <= address)
 | |
| 		goto good_area;
 | |
| 	if (!(vma->vm_flags & VM_GROWSDOWN))
 | |
| 		goto bad_area;
 | |
| 	if (expand_stack(vma, address))
 | |
| 		goto bad_area;
 | |
| 	/*
 | |
| 	 * Ok, we have a good vm_area for this memory access, so
 | |
| 	 * we can handle it..
 | |
| 	 */
 | |
| good_area:
 | |
| 	code = SEGV_ACCERR;
 | |
| 	if (write) {
 | |
| 		if (!(vma->vm_flags & VM_WRITE))
 | |
| 			goto bad_area;
 | |
| 	} else {
 | |
| 		/* Allow reads even for write-only mappings */
 | |
| 		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
 | |
| 			goto bad_area;
 | |
| 	}
 | |
| 
 | |
| 	if (from_user)
 | |
| 		flags |= FAULT_FLAG_USER;
 | |
| 	if (write)
 | |
| 		flags |= FAULT_FLAG_WRITE;
 | |
| 
 | |
| 	/*
 | |
| 	 * If for any reason at all we couldn't handle the fault,
 | |
| 	 * make sure we exit gracefully rather than endlessly redo
 | |
| 	 * the fault.
 | |
| 	 */
 | |
| 	fault = handle_mm_fault(mm, vma, address, flags);
 | |
| 
 | |
| 	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
 | |
| 		return;
 | |
| 
 | |
| 	if (unlikely(fault & VM_FAULT_ERROR)) {
 | |
| 		if (fault & VM_FAULT_OOM)
 | |
| 			goto out_of_memory;
 | |
| 		else if (fault & VM_FAULT_SIGSEGV)
 | |
| 			goto bad_area;
 | |
| 		else if (fault & VM_FAULT_SIGBUS)
 | |
| 			goto do_sigbus;
 | |
| 		BUG();
 | |
| 	}
 | |
| 
 | |
| 	if (flags & FAULT_FLAG_ALLOW_RETRY) {
 | |
| 		if (fault & VM_FAULT_MAJOR) {
 | |
| 			current->maj_flt++;
 | |
| 			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
 | |
| 				      1, regs, address);
 | |
| 		} else {
 | |
| 			current->min_flt++;
 | |
| 			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
 | |
| 				      1, regs, address);
 | |
| 		}
 | |
| 		if (fault & VM_FAULT_RETRY) {
 | |
| 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
 | |
| 			flags |= FAULT_FLAG_TRIED;
 | |
| 
 | |
| 			/* No need to up_read(&mm->mmap_sem) as we would
 | |
| 			 * have already released it in __lock_page_or_retry
 | |
| 			 * in mm/filemap.c.
 | |
| 			 */
 | |
| 
 | |
| 			goto retry;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	up_read(&mm->mmap_sem);
 | |
| 	return;
 | |
| 
 | |
| 	/*
 | |
| 	 * Something tried to access memory that isn't in our memory map..
 | |
| 	 * Fix it, but check if it's kernel or user first..
 | |
| 	 */
 | |
| bad_area:
 | |
| 	up_read(&mm->mmap_sem);
 | |
| 
 | |
| bad_area_nosemaphore:
 | |
| 	/* User mode accesses just cause a SIGSEGV */
 | |
| 	if (from_user) {
 | |
| 		do_fault_siginfo(code, SIGSEGV, regs, text_fault);
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	/* Is this in ex_table? */
 | |
| no_context:
 | |
| 	g2 = regs->u_regs[UREG_G2];
 | |
| 	if (!from_user) {
 | |
| 		fixup = search_extables_range(regs->pc, &g2);
 | |
| 		/* Values below 10 are reserved for other things */
 | |
| 		if (fixup > 10) {
 | |
| 			extern const unsigned __memset_start[];
 | |
| 			extern const unsigned __memset_end[];
 | |
| 			extern const unsigned __csum_partial_copy_start[];
 | |
| 			extern const unsigned __csum_partial_copy_end[];
 | |
| 
 | |
| #ifdef DEBUG_EXCEPTIONS
 | |
| 			printk("Exception: PC<%08lx> faddr<%08lx>\n",
 | |
| 			       regs->pc, address);
 | |
| 			printk("EX_TABLE: insn<%08lx> fixup<%08x> g2<%08lx>\n",
 | |
| 				regs->pc, fixup, g2);
 | |
| #endif
 | |
| 			if ((regs->pc >= (unsigned long)__memset_start &&
 | |
| 			     regs->pc < (unsigned long)__memset_end) ||
 | |
| 			    (regs->pc >= (unsigned long)__csum_partial_copy_start &&
 | |
| 			     regs->pc < (unsigned long)__csum_partial_copy_end)) {
 | |
| 				regs->u_regs[UREG_I4] = address;
 | |
| 				regs->u_regs[UREG_I5] = regs->pc;
 | |
| 			}
 | |
| 			regs->u_regs[UREG_G2] = g2;
 | |
| 			regs->pc = fixup;
 | |
| 			regs->npc = regs->pc + 4;
 | |
| 			return;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	unhandled_fault(address, tsk, regs);
 | |
| 	do_exit(SIGKILL);
 | |
| 
 | |
| /*
 | |
|  * We ran out of memory, or some other thing happened to us that made
 | |
|  * us unable to handle the page fault gracefully.
 | |
|  */
 | |
| out_of_memory:
 | |
| 	up_read(&mm->mmap_sem);
 | |
| 	if (from_user) {
 | |
| 		pagefault_out_of_memory();
 | |
| 		return;
 | |
| 	}
 | |
| 	goto no_context;
 | |
| 
 | |
| do_sigbus:
 | |
| 	up_read(&mm->mmap_sem);
 | |
| 	do_fault_siginfo(BUS_ADRERR, SIGBUS, regs, text_fault);
 | |
| 	if (!from_user)
 | |
| 		goto no_context;
 | |
| 
 | |
| vmalloc_fault:
 | |
| 	{
 | |
| 		/*
 | |
| 		 * Synchronize this task's top level page-table
 | |
| 		 * with the 'reference' page table.
 | |
| 		 */
 | |
| 		int offset = pgd_index(address);
 | |
| 		pgd_t *pgd, *pgd_k;
 | |
| 		pmd_t *pmd, *pmd_k;
 | |
| 
 | |
| 		pgd = tsk->active_mm->pgd + offset;
 | |
| 		pgd_k = init_mm.pgd + offset;
 | |
| 
 | |
| 		if (!pgd_present(*pgd)) {
 | |
| 			if (!pgd_present(*pgd_k))
 | |
| 				goto bad_area_nosemaphore;
 | |
| 			pgd_val(*pgd) = pgd_val(*pgd_k);
 | |
| 			return;
 | |
| 		}
 | |
| 
 | |
| 		pmd = pmd_offset(pgd, address);
 | |
| 		pmd_k = pmd_offset(pgd_k, address);
 | |
| 
 | |
| 		if (pmd_present(*pmd) || !pmd_present(*pmd_k))
 | |
| 			goto bad_area_nosemaphore;
 | |
| 
 | |
| 		*pmd = *pmd_k;
 | |
| 		return;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /* This always deals with user addresses. */
 | |
| static void force_user_fault(unsigned long address, int write)
 | |
| {
 | |
| 	struct vm_area_struct *vma;
 | |
| 	struct task_struct *tsk = current;
 | |
| 	struct mm_struct *mm = tsk->mm;
 | |
| 	unsigned int flags = FAULT_FLAG_USER;
 | |
| 	int code;
 | |
| 
 | |
| 	code = SEGV_MAPERR;
 | |
| 
 | |
| 	down_read(&mm->mmap_sem);
 | |
| 	vma = find_vma(mm, address);
 | |
| 	if (!vma)
 | |
| 		goto bad_area;
 | |
| 	if (vma->vm_start <= address)
 | |
| 		goto good_area;
 | |
| 	if (!(vma->vm_flags & VM_GROWSDOWN))
 | |
| 		goto bad_area;
 | |
| 	if (expand_stack(vma, address))
 | |
| 		goto bad_area;
 | |
| good_area:
 | |
| 	code = SEGV_ACCERR;
 | |
| 	if (write) {
 | |
| 		if (!(vma->vm_flags & VM_WRITE))
 | |
| 			goto bad_area;
 | |
| 		flags |= FAULT_FLAG_WRITE;
 | |
| 	} else {
 | |
| 		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
 | |
| 			goto bad_area;
 | |
| 	}
 | |
| 	switch (handle_mm_fault(mm, vma, address, flags)) {
 | |
| 	case VM_FAULT_SIGBUS:
 | |
| 	case VM_FAULT_OOM:
 | |
| 		goto do_sigbus;
 | |
| 	}
 | |
| 	up_read(&mm->mmap_sem);
 | |
| 	return;
 | |
| bad_area:
 | |
| 	up_read(&mm->mmap_sem);
 | |
| 	__do_fault_siginfo(code, SIGSEGV, tsk->thread.kregs, address);
 | |
| 	return;
 | |
| 
 | |
| do_sigbus:
 | |
| 	up_read(&mm->mmap_sem);
 | |
| 	__do_fault_siginfo(BUS_ADRERR, SIGBUS, tsk->thread.kregs, address);
 | |
| }
 | |
| 
 | |
| static void check_stack_aligned(unsigned long sp)
 | |
| {
 | |
| 	if (sp & 0x7UL)
 | |
| 		force_sig(SIGILL, current);
 | |
| }
 | |
| 
 | |
| void window_overflow_fault(void)
 | |
| {
 | |
| 	unsigned long sp;
 | |
| 
 | |
| 	sp = current_thread_info()->rwbuf_stkptrs[0];
 | |
| 	if (((sp + 0x38) & PAGE_MASK) != (sp & PAGE_MASK))
 | |
| 		force_user_fault(sp + 0x38, 1);
 | |
| 	force_user_fault(sp, 1);
 | |
| 
 | |
| 	check_stack_aligned(sp);
 | |
| }
 | |
| 
 | |
| void window_underflow_fault(unsigned long sp)
 | |
| {
 | |
| 	if (((sp + 0x38) & PAGE_MASK) != (sp & PAGE_MASK))
 | |
| 		force_user_fault(sp + 0x38, 0);
 | |
| 	force_user_fault(sp, 0);
 | |
| 
 | |
| 	check_stack_aligned(sp);
 | |
| }
 | |
| 
 | |
| void window_ret_fault(struct pt_regs *regs)
 | |
| {
 | |
| 	unsigned long sp;
 | |
| 
 | |
| 	sp = regs->u_regs[UREG_FP];
 | |
| 	if (((sp + 0x38) & PAGE_MASK) != (sp & PAGE_MASK))
 | |
| 		force_user_fault(sp + 0x38, 0);
 | |
| 	force_user_fault(sp, 0);
 | |
| 
 | |
| 	check_stack_aligned(sp);
 | |
| }
 |