| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /* | 
					
						
							|  |  |  |    Copyright 2003 Richard Curnow, SuperH (UK) Ltd. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    This file is subject to the terms and conditions of the GNU General Public | 
					
						
							|  |  |  |    License.  See the file "COPYING" in the main directory of this archive | 
					
						
							|  |  |  |    for more details. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    Tight version of mempy for the case of just copying a page. | 
					
						
							|  |  |  |    Prefetch strategy empirically optimised against RTL simulations | 
					
						
							|  |  |  |    of SH5-101 cut2 eval chip with Cayman board DDR memory. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    Parameters: | 
					
						
							| 
									
										
										
										
											2007-11-20 16:51:28 +09:00
										 |  |  |    r2 : destination effective address (start of page) | 
					
						
							|  |  |  |    r3 : source effective address (start of page) | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |    Always copies 4096 bytes. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    Points to review. | 
					
						
							|  |  |  |    * Currently the prefetch is 4 lines ahead and the alloco is 2 lines ahead. | 
					
						
							|  |  |  |      It seems like the prefetch needs to be at at least 4 lines ahead to get | 
					
						
							|  |  |  |      the data into the cache in time, and the allocos contend with outstanding | 
					
						
							|  |  |  |      prefetches for the same cache set, so it's better to have the numbers | 
					
						
							|  |  |  |      different. | 
					
						
							|  |  |  |    */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	.section .text..SHmedia32,"ax" | 
					
						
							|  |  |  | 	.little | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	.balign 8
 | 
					
						
							| 
									
										
										
										
											2007-11-20 16:51:28 +09:00
										 |  |  | 	.global copy_page
 | 
					
						
							|  |  |  | copy_page: | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-11-20 16:51:28 +09:00
										 |  |  | 	/* Copy 4096 bytes worth of data from r3 to r2. | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	   Do prefetches 4 lines ahead. | 
					
						
							|  |  |  | 	   Do alloco 2 lines ahead */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	pta 1f, tr1 | 
					
						
							|  |  |  | 	pta 2f, tr2 | 
					
						
							|  |  |  | 	pta 3f, tr3 | 
					
						
							|  |  |  | 	ptabs r18, tr0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #if 0 | 
					
						
							|  |  |  | 	/* TAKum03020 */ | 
					
						
							| 
									
										
										
										
											2007-11-20 16:51:28 +09:00
										 |  |  | 	ld.q r3, 0x00, r63 | 
					
						
							|  |  |  | 	ld.q r3, 0x20, r63 | 
					
						
							|  |  |  | 	ld.q r3, 0x40, r63 | 
					
						
							|  |  |  | 	ld.q r3, 0x60, r63 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | #endif | 
					
						
							| 
									
										
										
										
											2007-11-20 16:51:28 +09:00
										 |  |  | 	alloco r2, 0x00 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	synco		! TAKum03020 | 
					
						
							| 
									
										
										
										
											2007-11-20 16:51:28 +09:00
										 |  |  | 	alloco r2, 0x20 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	synco		! TAKum03020 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	movi 3968, r6 | 
					
						
							| 
									
										
										
										
											2007-11-20 16:51:28 +09:00
										 |  |  | 	add  r2, r6, r6 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	addi r6, 64, r7 | 
					
						
							|  |  |  | 	addi r7, 64, r8 | 
					
						
							| 
									
										
										
										
											2007-11-20 16:51:28 +09:00
										 |  |  | 	sub r3, r2, r60 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	addi r60, 8, r61 | 
					
						
							|  |  |  | 	addi r61, 8, r62 | 
					
						
							|  |  |  | 	addi r62, 8, r23 | 
					
						
							|  |  |  | 	addi r60, 0x80, r22 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Minimal code size.  The extra branches inside the loop don't cost much | 
					
						
							|  |  |  |    because they overlap with the time spent waiting for prefetches to | 
					
						
							|  |  |  |    complete. */ | 
					
						
							|  |  |  | 1: | 
					
						
							|  |  |  | #if 0 | 
					
						
							|  |  |  | 	/* TAKum03020 */ | 
					
						
							| 
									
										
										
										
											2007-11-20 16:51:28 +09:00
										 |  |  | 	bge/u r2, r6, tr2  ! skip prefetch for last 4 lines | 
					
						
							|  |  |  | 	ldx.q r2, r22, r63 ! prefetch 4 lines hence | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | #endif | 
					
						
							|  |  |  | 2: | 
					
						
							| 
									
										
										
										
											2007-11-20 16:51:28 +09:00
										 |  |  | 	bge/u r2, r7, tr3  ! skip alloco for last 2 lines | 
					
						
							|  |  |  | 	alloco r2, 0x40    ! alloc destination line 2 lines ahead | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	synco		! TAKum03020 | 
					
						
							|  |  |  | 3: | 
					
						
							| 
									
										
										
										
											2007-11-20 16:51:28 +09:00
										 |  |  | 	ldx.q r2, r60, r36 | 
					
						
							|  |  |  | 	ldx.q r2, r61, r37 | 
					
						
							|  |  |  | 	ldx.q r2, r62, r38 | 
					
						
							|  |  |  | 	ldx.q r2, r23, r39 | 
					
						
							|  |  |  | 	st.q  r2,   0, r36 | 
					
						
							|  |  |  | 	st.q  r2,   8, r37 | 
					
						
							|  |  |  | 	st.q  r2,  16, r38 | 
					
						
							|  |  |  | 	st.q  r2,  24, r39 | 
					
						
							|  |  |  | 	addi r2, 32, r2 | 
					
						
							|  |  |  | 	bgt/l r8, r2, tr1 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	blink tr0, r63	   ! return |