sparc64: sun4v TLB error power off events
We've witnessed a few TLB events causing the machine to power off because
of prom_halt. In one case it was some nfs related area during rmmod. Another
was an mmapper of /dev/mem. A more recent one is an ITLB issue with
a bad pagesize which could be a hardware bug. Bugs happen but we should
attempt to not power off the machine and/or hang it when possible.
This is a DTLB error from an mmapper of /dev/mem:
[root@sparcie ~]# SUN4V-DTLB: Error at TPC[fffff80100903e6c], tl 1
SUN4V-DTLB: TPC<0xfffff80100903e6c>
SUN4V-DTLB: O7[fffff801081979d0]
SUN4V-DTLB: O7<0xfffff801081979d0>
SUN4V-DTLB: vaddr[fffff80100000000] ctx[1250] pte[98000000000f0610] error[2]
.
This is recent mainline for ITLB:
[ 3708.179864] SUN4V-ITLB: TPC<0xfffffc010071cefc>
[ 3708.188866] SUN4V-ITLB: O7[fffffc010071cee8]
[ 3708.197377] SUN4V-ITLB: O7<0xfffffc010071cee8>
[ 3708.206539] SUN4V-ITLB: vaddr[e0003] ctx[1a3c] pte[2900000dcc800eeb] error[4]
.
Normally sun4v_itlb_error_report() and sun4v_dtlb_error_report() would call
prom_halt() and drop us to OF command prompt "ok". This isn't the case for
LDOMs and the machine powers off.
For the HV reported error of HV_ENORADDR for HV HV_MMU_MAP_ADDR_TRAP we cause
a SIGBUS error by qualifying it within do_sparc64_fault() for fault code mask
of FAULT_CODE_BAD_RA. This is done when trap level (%tl) is less or equal
one("1"). Otherwise, for %tl > 1,  we proceed eventually to die_if_kernel().
The logic of this patch was partially inspired by David Miller's feedback.
Power off of large sparc64 machines is painful. Plus die_if_kernel provides
more context. A reset sequence isn't a brief period on large sparc64 but
better than power-off/power-on sequence.
Cc: sparclinux@vger.kernel.org
Signed-off-by: Bob Picco <bob.picco@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
	
	
This commit is contained in:
		
					parent
					
						
							
								d1105287aa
							
						
					
				
			
			
				commit
				
					
						4ccb927289
					
				
			
		
					 4 changed files with 34 additions and 20 deletions
				
			
		|  | @ -102,6 +102,7 @@ struct thread_info { | |||
| #define FAULT_CODE_ITLB		0x04	/* Miss happened in I-TLB	   */ | ||||
| #define FAULT_CODE_WINFIXUP	0x08	/* Miss happened during spill/fill */ | ||||
| #define FAULT_CODE_BLKCOMMIT	0x10	/* Use blk-commit ASI in copy_page */ | ||||
| #define	FAULT_CODE_BAD_RA	0x20	/* Bad RA for sun4v		   */ | ||||
| 
 | ||||
| #if PAGE_SHIFT == 13 | ||||
| #define THREAD_SIZE (2*PAGE_SIZE) | ||||
|  |  | |||
|  | @ -195,6 +195,11 @@ sun4v_tsb_miss_common: | |||
| 	 ldx	[%g2 + TRAP_PER_CPU_PGD_PADDR], %g7 | ||||
| 
 | ||||
| sun4v_itlb_error: | ||||
| 	rdpr	%tl, %g1 | ||||
| 	cmp	%g1, 1 | ||||
| 	ble,pt	%icc, sun4v_bad_ra | ||||
| 	 or	%g0, FAULT_CODE_BAD_RA | FAULT_CODE_ITLB, %g1 | ||||
| 
 | ||||
| 	sethi	%hi(sun4v_err_itlb_vaddr), %g1 | ||||
| 	stx	%g4, [%g1 + %lo(sun4v_err_itlb_vaddr)] | ||||
| 	sethi	%hi(sun4v_err_itlb_ctx), %g1 | ||||
|  | @ -206,15 +211,10 @@ sun4v_itlb_error: | |||
| 	sethi	%hi(sun4v_err_itlb_error), %g1 | ||||
| 	stx	%o0, [%g1 + %lo(sun4v_err_itlb_error)] | ||||
| 
 | ||||
| 	sethi	%hi(1f), %g7 | ||||
| 	rdpr	%tl, %g4 | ||||
| 	cmp	%g4, 1 | ||||
| 	ble,pt	%icc, 1f | ||||
| 	 sethi	%hi(2f), %g7 | ||||
| 	ba,pt	%xcc, etraptl1 | ||||
| 	 or	%g7, %lo(2f), %g7 | ||||
| 
 | ||||
| 1:	ba,pt	%xcc, etrap | ||||
| 2:	 or	%g7, %lo(2b), %g7 | ||||
| 1:	 or	%g7, %lo(1f), %g7 | ||||
| 	mov	%l4, %o1 | ||||
| 	call	sun4v_itlb_error_report | ||||
| 	 add	%sp, PTREGS_OFF, %o0 | ||||
|  | @ -222,6 +222,11 @@ sun4v_itlb_error: | |||
| 	/* NOTREACHED */ | ||||
| 
 | ||||
| sun4v_dtlb_error: | ||||
| 	rdpr	%tl, %g1 | ||||
| 	cmp	%g1, 1 | ||||
| 	ble,pt	%icc, sun4v_bad_ra | ||||
| 	 or	%g0, FAULT_CODE_BAD_RA | FAULT_CODE_DTLB, %g1 | ||||
| 
 | ||||
| 	sethi	%hi(sun4v_err_dtlb_vaddr), %g1 | ||||
| 	stx	%g4, [%g1 + %lo(sun4v_err_dtlb_vaddr)] | ||||
| 	sethi	%hi(sun4v_err_dtlb_ctx), %g1 | ||||
|  | @ -233,21 +238,23 @@ sun4v_dtlb_error: | |||
| 	sethi	%hi(sun4v_err_dtlb_error), %g1 | ||||
| 	stx	%o0, [%g1 + %lo(sun4v_err_dtlb_error)] | ||||
| 
 | ||||
| 	sethi	%hi(1f), %g7 | ||||
| 	rdpr	%tl, %g4 | ||||
| 	cmp	%g4, 1 | ||||
| 	ble,pt	%icc, 1f | ||||
| 	 sethi	%hi(2f), %g7 | ||||
| 	ba,pt	%xcc, etraptl1 | ||||
| 	 or	%g7, %lo(2f), %g7 | ||||
| 
 | ||||
| 1:	ba,pt	%xcc, etrap | ||||
| 2:	 or	%g7, %lo(2b), %g7 | ||||
| 1:	 or	%g7, %lo(1f), %g7 | ||||
| 	mov	%l4, %o1 | ||||
| 	call	sun4v_dtlb_error_report | ||||
| 	 add	%sp, PTREGS_OFF, %o0 | ||||
| 
 | ||||
| 	/* NOTREACHED */ | ||||
| 
 | ||||
| sun4v_bad_ra: | ||||
| 	or	%g0, %g4, %g5 | ||||
| 	ba,pt	%xcc, sparc64_realfault_common | ||||
| 	 or	%g1, %g0, %g4 | ||||
| 
 | ||||
| 	/* NOTREACHED */ | ||||
| 
 | ||||
| 	/* Instruction Access Exception, tl0. */ | ||||
| sun4v_iacc: | ||||
| 	ldxa	[%g0] ASI_SCRATCHPAD, %g2 | ||||
|  |  | |||
|  | @ -2104,6 +2104,11 @@ void sun4v_nonresum_overflow(struct pt_regs *regs) | |||
| 	atomic_inc(&sun4v_nonresum_oflow_cnt); | ||||
| } | ||||
| 
 | ||||
| static void sun4v_tlb_error(struct pt_regs *regs) | ||||
| { | ||||
| 	die_if_kernel("TLB/TSB error", regs); | ||||
| } | ||||
| 
 | ||||
| unsigned long sun4v_err_itlb_vaddr; | ||||
| unsigned long sun4v_err_itlb_ctx; | ||||
| unsigned long sun4v_err_itlb_pte; | ||||
|  | @ -2111,8 +2116,7 @@ unsigned long sun4v_err_itlb_error; | |||
| 
 | ||||
| void sun4v_itlb_error_report(struct pt_regs *regs, int tl) | ||||
| { | ||||
| 	if (tl > 1) | ||||
| 		dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); | ||||
| 	dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); | ||||
| 
 | ||||
| 	printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n", | ||||
| 	       regs->tpc, tl); | ||||
|  | @ -2125,7 +2129,7 @@ void sun4v_itlb_error_report(struct pt_regs *regs, int tl) | |||
| 	       sun4v_err_itlb_vaddr, sun4v_err_itlb_ctx, | ||||
| 	       sun4v_err_itlb_pte, sun4v_err_itlb_error); | ||||
| 
 | ||||
| 	prom_halt(); | ||||
| 	sun4v_tlb_error(regs); | ||||
| } | ||||
| 
 | ||||
| unsigned long sun4v_err_dtlb_vaddr; | ||||
|  | @ -2135,8 +2139,7 @@ unsigned long sun4v_err_dtlb_error; | |||
| 
 | ||||
| void sun4v_dtlb_error_report(struct pt_regs *regs, int tl) | ||||
| { | ||||
| 	if (tl > 1) | ||||
| 		dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); | ||||
| 	dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); | ||||
| 
 | ||||
| 	printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n", | ||||
| 	       regs->tpc, tl); | ||||
|  | @ -2149,7 +2152,7 @@ void sun4v_dtlb_error_report(struct pt_regs *regs, int tl) | |||
| 	       sun4v_err_dtlb_vaddr, sun4v_err_dtlb_ctx, | ||||
| 	       sun4v_err_dtlb_pte, sun4v_err_dtlb_error); | ||||
| 
 | ||||
| 	prom_halt(); | ||||
| 	sun4v_tlb_error(regs); | ||||
| } | ||||
| 
 | ||||
| void hypervisor_tlbop_error(unsigned long err, unsigned long op) | ||||
|  |  | |||
|  | @ -346,6 +346,9 @@ retry: | |||
| 		down_read(&mm->mmap_sem); | ||||
| 	} | ||||
| 
 | ||||
| 	if (fault_code & FAULT_CODE_BAD_RA) | ||||
| 		goto do_sigbus; | ||||
| 
 | ||||
| 	vma = find_vma(mm, address); | ||||
| 	if (!vma) | ||||
| 		goto bad_area; | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 bob picco
				bob picco