77 lines
		
	
	
	
		
			1.9 KiB
			
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
		
		
			
		
	
	
			77 lines
		
	
	
	
		
			1.9 KiB
			
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								 * Copyright (C) 1999-2002 Hewlett-Packard Co
							 | 
						||
| 
								 | 
							
								 *	Stephane Eranian <eranian@hpl.hp.com>
							 | 
						||
| 
								 | 
							
								 *	David Mosberger-Tang <davidm@hpl.hp.com>
							 | 
						||
| 
								 | 
							
								 * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * 1/06/01 davidm	Tuned for Itanium.
							 | 
						||
| 
								 | 
							
								 * 2/12/02 kchen	Tuned for both Itanium and McKinley
							 | 
						||
| 
								 | 
							
								 * 3/08/02 davidm	Some more tweaking
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#include <asm/asmmacro.h>
							 | 
						||
| 
								 | 
							
								#include <asm/page.h>
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef CONFIG_ITANIUM
							 | 
						||
| 
								 | 
							
								# define L3_LINE_SIZE	64	// Itanium L3 line size
							 | 
						||
| 
								 | 
							
								# define PREFETCH_LINES	9	// magic number
							 | 
						||
| 
								 | 
							
								#else
							 | 
						||
| 
								 | 
							
								# define L3_LINE_SIZE	128	// McKinley L3 line size
							 | 
						||
| 
								 | 
							
								# define PREFETCH_LINES	12	// magic number
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#define saved_lc	r2
							 | 
						||
| 
								 | 
							
								#define dst_fetch	r3
							 | 
						||
| 
								 | 
							
								#define dst1		r8
							 | 
						||
| 
								 | 
							
								#define dst2		r9
							 | 
						||
| 
								 | 
							
								#define dst3		r10
							 | 
						||
| 
								 | 
							
								#define dst4		r11
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#define dst_last	r31
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								GLOBAL_ENTRY(clear_page)
							 | 
						||
| 
								 | 
							
									.prologue
							 | 
						||
| 
								 | 
							
									.regstk 1,0,0,0
							 | 
						||
| 
								 | 
							
									mov r16 = PAGE_SIZE/L3_LINE_SIZE-1	// main loop count, -1=repeat/until
							 | 
						||
| 
								 | 
							
									.save ar.lc, saved_lc
							 | 
						||
| 
								 | 
							
									mov saved_lc = ar.lc
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									.body
							 | 
						||
| 
								 | 
							
									mov ar.lc = (PREFETCH_LINES - 1)
							 | 
						||
| 
								 | 
							
									mov dst_fetch = in0
							 | 
						||
| 
								 | 
							
									adds dst1 = 16, in0
							 | 
						||
| 
								 | 
							
									adds dst2 = 32, in0
							 | 
						||
| 
								 | 
							
									;;
							 | 
						||
| 
								 | 
							
								.fetch:	stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
							 | 
						||
| 
								 | 
							
									adds dst3 = 48, in0		// executing this multiple times is harmless
							 | 
						||
| 
								 | 
							
									br.cloop.sptk.few .fetch
							 | 
						||
| 
								 | 
							
									;;
							 | 
						||
| 
								 | 
							
									addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch
							 | 
						||
| 
								 | 
							
									mov ar.lc = r16			// one L3 line per iteration
							 | 
						||
| 
								 | 
							
									adds dst4 = 64, in0
							 | 
						||
| 
								 | 
							
									;;
							 | 
						||
| 
								 | 
							
								#ifdef CONFIG_ITANIUM
							 | 
						||
| 
								 | 
							
									// Optimized for Itanium
							 | 
						||
| 
								 | 
							
								1:	stf.spill.nta [dst1] = f0, 64
							 | 
						||
| 
								 | 
							
									stf.spill.nta [dst2] = f0, 64
							 | 
						||
| 
								 | 
							
									cmp.lt p8,p0=dst_fetch, dst_last
							 | 
						||
| 
								 | 
							
									;;
							 | 
						||
| 
								 | 
							
								#else
							 | 
						||
| 
								 | 
							
									// Optimized for McKinley
							 | 
						||
| 
								 | 
							
								1:	stf.spill.nta [dst1] = f0, 64
							 | 
						||
| 
								 | 
							
									stf.spill.nta [dst2] = f0, 64
							 | 
						||
| 
								 | 
							
									stf.spill.nta [dst3] = f0, 64
							 | 
						||
| 
								 | 
							
									stf.spill.nta [dst4] = f0, 128
							 | 
						||
| 
								 | 
							
									cmp.lt p8,p0=dst_fetch, dst_last
							 | 
						||
| 
								 | 
							
									;;
							 | 
						||
| 
								 | 
							
									stf.spill.nta [dst1] = f0, 64
							 | 
						||
| 
								 | 
							
									stf.spill.nta [dst2] = f0, 64
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
									stf.spill.nta [dst3] = f0, 64
							 | 
						||
| 
								 | 
							
								(p8)	stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
							 | 
						||
| 
								 | 
							
									br.cloop.sptk.few 1b
							 | 
						||
| 
								 | 
							
									;;
							 | 
						||
| 
								 | 
							
									mov ar.lc = saved_lc		// restore lc
							 | 
						||
| 
								 | 
							
									br.ret.sptk.many rp
							 | 
						||
| 
								 | 
							
								END(clear_page)
							 |