77 lines
		
	
	
	
		
			1.9 KiB
			
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
		
		
			
		
	
	
			77 lines
		
	
	
	
		
			1.9 KiB
			
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
|   | /* | ||
|  |  * Copyright (C) 1999-2002 Hewlett-Packard Co | ||
|  |  *	Stephane Eranian <eranian@hpl.hp.com>
 | ||
|  |  *	David Mosberger-Tang <davidm@hpl.hp.com>
 | ||
|  |  * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
 | ||
|  |  * | ||
|  |  * 1/06/01 davidm	Tuned for Itanium. | ||
|  |  * 2/12/02 kchen	Tuned for both Itanium and McKinley | ||
|  |  * 3/08/02 davidm	Some more tweaking | ||
|  |  */ | ||
|  | 
 | ||
|  | #include <asm/asmmacro.h> | ||
|  | #include <asm/page.h> | ||
|  | 
 | ||
|  | #ifdef CONFIG_ITANIUM | ||
|  | # define L3_LINE_SIZE	64	// Itanium L3 line size | ||
|  | # define PREFETCH_LINES	9	// magic number | ||
|  | #else | ||
|  | # define L3_LINE_SIZE	128	// McKinley L3 line size | ||
|  | # define PREFETCH_LINES	12	// magic number | ||
|  | #endif | ||
|  | 
 | ||
|  | #define saved_lc	r2 | ||
|  | #define dst_fetch	r3 | ||
|  | #define dst1		r8 | ||
|  | #define dst2		r9 | ||
|  | #define dst3		r10 | ||
|  | #define dst4		r11 | ||
|  | 
 | ||
|  | #define dst_last	r31 | ||
|  | 
 | ||
|  | GLOBAL_ENTRY(clear_page) | ||
|  | 	.prologue | ||
|  | 	.regstk 1,0,0,0 | ||
|  | 	mov r16 = PAGE_SIZE/L3_LINE_SIZE-1	// main loop count, -1=repeat/until | ||
|  | 	.save ar.lc, saved_lc | ||
|  | 	mov saved_lc = ar.lc | ||
|  | 
 | ||
|  | 	.body | ||
|  | 	mov ar.lc = (PREFETCH_LINES - 1) | ||
|  | 	mov dst_fetch = in0 | ||
|  | 	adds dst1 = 16, in0 | ||
|  | 	adds dst2 = 32, in0 | ||
|  | 	;;
 | ||
|  | .fetch:	stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE | ||
|  | 	adds dst3 = 48, in0		// executing this multiple times is harmless | ||
|  | 	br.cloop.sptk.few .fetch | ||
|  | 	;;
 | ||
|  | 	addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch | ||
|  | 	mov ar.lc = r16			// one L3 line per iteration | ||
|  | 	adds dst4 = 64, in0 | ||
|  | 	;;
 | ||
|  | #ifdef CONFIG_ITANIUM | ||
|  | 	// Optimized for Itanium | ||
|  | 1:	stf.spill.nta [dst1] = f0, 64 | ||
|  | 	stf.spill.nta [dst2] = f0, 64 | ||
|  | 	cmp.lt p8,p0=dst_fetch, dst_last | ||
|  | 	;;
 | ||
|  | #else | ||
|  | 	// Optimized for McKinley | ||
|  | 1:	stf.spill.nta [dst1] = f0, 64 | ||
|  | 	stf.spill.nta [dst2] = f0, 64 | ||
|  | 	stf.spill.nta [dst3] = f0, 64 | ||
|  | 	stf.spill.nta [dst4] = f0, 128 | ||
|  | 	cmp.lt p8,p0=dst_fetch, dst_last | ||
|  | 	;;
 | ||
|  | 	stf.spill.nta [dst1] = f0, 64 | ||
|  | 	stf.spill.nta [dst2] = f0, 64 | ||
|  | #endif | ||
|  | 	stf.spill.nta [dst3] = f0, 64 | ||
|  | (p8)	stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE | ||
|  | 	br.cloop.sptk.few 1b | ||
|  | 	;;
 | ||
|  | 	mov ar.lc = saved_lc		// restore lc | ||
|  | 	br.ret.sptk.many rp | ||
|  | END(clear_page) |