2005-04-16 15:20:36 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								/*
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   Copyright 2003 Richard Curnow, SuperH (UK) Ltd.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   This file is subject to the terms and conditions of the GNU General Public
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   License.  See the file "COPYING" in the main directory of this archive
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   for more details.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   Tight version of mempy for the case of just copying a page.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   Prefetch strategy empirically optimised against RTL simulations
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   of SH5-101 cut2 eval chip with Cayman board DDR memory.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   Parameters:
							 | 
						
					
						
							
								
									
										
										
										
											2007-11-20 16:51:28 +09:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								   r2 : destination effective address (start of page)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   r3 : source effective address (start of page)
							 | 
						
					
						
							
								
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   Always copies 4096 bytes.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   Points to review.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   * Currently the prefetch is 4 lines ahead and the alloco is 2 lines ahead.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     It seems like the prefetch needs to be at at least 4 lines ahead to get
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     the data into the cache in time, and the allocos contend with outstanding
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     prefetches for the same cache set, so it's better to have the numbers
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     different.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									.section .text..SHmedia32,"ax"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									.little
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									.balign 8
							 | 
						
					
						
							
								
									
										
										
										
											2007-11-20 16:51:28 +09:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									.global copy_page
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								copy_page:
							 | 
						
					
						
							
								
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2007-11-20 16:51:28 +09:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									/* Copy 4096 bytes worth of data from r3 to r2.
							 | 
						
					
						
							
								
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
									   Do prefetches 4 lines ahead.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									   Do alloco 2 lines ahead */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									pta 1f, tr1
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									pta 2f, tr2
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									pta 3f, tr3
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									ptabs r18, tr0
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#if 0
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									/* TAKum03020 */
							 | 
						
					
						
							
								
									
										
										
										
											2007-11-20 16:51:28 +09:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									ld.q r3, 0x00, r63
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									ld.q r3, 0x20, r63
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									ld.q r3, 0x40, r63
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									ld.q r3, 0x60, r63
							 | 
						
					
						
							
								
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#endif
							 | 
						
					
						
							
								
									
										
										
										
											2007-11-20 16:51:28 +09:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									alloco r2, 0x00
							 | 
						
					
						
							
								
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
									synco		! TAKum03020
							 | 
						
					
						
							
								
									
										
										
										
											2007-11-20 16:51:28 +09:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									alloco r2, 0x20
							 | 
						
					
						
							
								
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
									synco		! TAKum03020
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									movi 3968, r6
							 | 
						
					
						
							
								
									
										
										
										
											2007-11-20 16:51:28 +09:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									add  r2, r6, r6
							 | 
						
					
						
							
								
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
									addi r6, 64, r7
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									addi r7, 64, r8
							 | 
						
					
						
							
								
									
										
										
										
											2007-11-20 16:51:28 +09:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									sub r3, r2, r60
							 | 
						
					
						
							
								
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
									addi r60, 8, r61
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									addi r61, 8, r62
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									addi r62, 8, r23
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									addi r60, 0x80, r22
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								/* Minimal code size.  The extra branches inside the loop don't cost much
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   because they overlap with the time spent waiting for prefetches to
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   complete. */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								1:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#if 0
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									/* TAKum03020 */
							 | 
						
					
						
							
								
									
										
										
										
											2007-11-20 16:51:28 +09:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									bge/u r2, r6, tr2  ! skip prefetch for last 4 lines
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									ldx.q r2, r22, r63 ! prefetch 4 lines hence
							 | 
						
					
						
							
								
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#endif
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								2:
							 | 
						
					
						
							
								
									
										
										
										
											2007-11-20 16:51:28 +09:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									bge/u r2, r7, tr3  ! skip alloco for last 2 lines
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									alloco r2, 0x40    ! alloc destination line 2 lines ahead
							 | 
						
					
						
							
								
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
									synco		! TAKum03020
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								3:
							 | 
						
					
						
							
								
									
										
										
										
											2007-11-20 16:51:28 +09:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									ldx.q r2, r60, r36
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									ldx.q r2, r61, r37
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									ldx.q r2, r62, r38
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									ldx.q r2, r23, r39
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									st.q  r2,   0, r36
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									st.q  r2,   8, r37
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									st.q  r2,  16, r38
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									st.q  r2,  24, r39
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									addi r2, 32, r2
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									bgt/l r8, r2, tr1
							 | 
						
					
						
							
								
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									blink tr0, r63	   ! return
							 |