| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  *    Optimized memory copy routines. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *    Copyright (C) 2004 Randolph Chung <tausq@debian.org> | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *    This program is free software; you can redistribute it and/or modify | 
					
						
							|  |  |  |  *    it under the terms of the GNU General Public License as published by | 
					
						
							|  |  |  |  *    the Free Software Foundation; either version 2, or (at your option) | 
					
						
							|  |  |  |  *    any later version. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *    This program is distributed in the hope that it will be useful, | 
					
						
							|  |  |  |  *    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  |  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
					
						
							|  |  |  |  *    GNU General Public License for more details. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *    You should have received a copy of the GNU General Public License | 
					
						
							|  |  |  |  *    along with this program; if not, write to the Free Software | 
					
						
							|  |  |  |  *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *    Portions derived from the GNU C Library | 
					
						
							|  |  |  |  *    Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Several strategies are tried to try to get the best performance for various | 
					
						
							|  |  |  |  * conditions. In the optimal case, we copy 64-bytes in an unrolled loop using  | 
					
						
							|  |  |  |  * fp regs. This is followed by loops that copy 32- or 16-bytes at a time using | 
					
						
							|  |  |  |  * general registers.  Unaligned copies are handled either by aligning the  | 
					
						
							|  |  |  |  * destination and then using shift-and-write method, or in a few cases by  | 
					
						
							|  |  |  |  * falling back to a byte-at-a-time copy. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * I chose to implement this in C because it is easier to maintain and debug, | 
					
						
							|  |  |  |  * and in my experiments it appears that the C code generated by gcc (3.3/3.4 | 
					
						
							|  |  |  |  * at the time of writing) is fairly optimal. Unfortunately some of the  | 
					
						
							|  |  |  |  * semantics of the copy routine (exception handling) is difficult to express | 
					
						
							|  |  |  |  * in C, so we have to play some tricks to get it to work. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * All the loads and stores are done via explicit asm() code in order to use | 
					
						
							|  |  |  |  * the right space registers.  | 
					
						
							|  |  |  |  *  | 
					
						
							|  |  |  |  * Testing with various alignments and buffer sizes shows that this code is  | 
					
						
							|  |  |  |  * often >10x faster than a simple byte-at-a-time copy, even for strangely | 
					
						
							|  |  |  |  * aligned operands. It is interesting to note that the glibc version | 
					
						
							|  |  |  |  * of memcpy (written in C) is actually quite fast already. This routine is  | 
					
						
							|  |  |  |  * able to beat it by 30-40% for aligned copies because of the loop unrolling,  | 
					
						
							|  |  |  |  * but in some cases the glibc version is still slightly faster. This lends  | 
					
						
							|  |  |  |  * more credibility that gcc can generate very good code as long as we are  | 
					
						
							|  |  |  |  * careful. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * TODO: | 
					
						
							|  |  |  |  * - cache prefetching needs more experimentation to get optimal settings | 
					
						
							|  |  |  |  * - try not to use the post-increment address modifiers; they create additional | 
					
						
							|  |  |  |  *   interlocks | 
					
						
							|  |  |  |  * - replace byte-copy loops with stybs sequences | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef __KERNEL__
 | 
					
						
							|  |  |  | #include <linux/module.h>
 | 
					
						
							|  |  |  | #include <linux/compiler.h>
 | 
					
						
							|  |  |  | #include <asm/uaccess.h>
 | 
					
						
							|  |  |  | #define s_space "%%sr1"
 | 
					
						
							|  |  |  | #define d_space "%%sr2"
 | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  | #include "memcpy.h"
 | 
					
						
							|  |  |  | #define s_space "%%sr0"
 | 
					
						
							|  |  |  | #define d_space "%%sr0"
 | 
					
						
							|  |  |  | #define pa_memcpy new2_copy
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | DECLARE_PER_CPU(struct exception_data, exception_data); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define preserve_branch(label)	do {					\
 | 
					
						
							| 
									
										
										
										
											2013-02-27 00:06:48 +01:00
										 |  |  | 	volatile int dummy = 0;						\ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	/* The following branch is never taken, it's just here to  */	\ | 
					
						
							|  |  |  | 	/* prevent gcc from optimizing away our exception code. */ 	\ | 
					
						
							|  |  |  | 	if (unlikely(dummy != dummy))					\ | 
					
						
							|  |  |  | 		goto label;						\ | 
					
						
							|  |  |  | } while (0) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define get_user_space() (segment_eq(get_fs(), KERNEL_DS) ? 0 : mfsp(3))
 | 
					
						
							|  |  |  | #define get_kernel_space() (0)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define MERGE(w0, sh_1, w1, sh_2)  ({					\
 | 
					
						
							|  |  |  | 	unsigned int _r;						\ | 
					
						
							|  |  |  | 	asm volatile (							\ | 
					
						
							|  |  |  | 	"mtsar %3\n"							\ | 
					
						
							|  |  |  | 	"shrpw %1, %2, %%sar, %0\n"					\ | 
					
						
							|  |  |  | 	: "=r"(_r)							\ | 
					
						
							|  |  |  | 	: "r"(w0), "r"(w1), "r"(sh_2)					\ | 
					
						
							|  |  |  | 	);								\ | 
					
						
							|  |  |  | 	_r;								\ | 
					
						
							|  |  |  | }) | 
					
						
							|  |  |  | #define THRESHOLD	16
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef DEBUG_MEMCPY
 | 
					
						
							| 
									
										
										
										
											2008-05-14 16:21:55 -07:00
										 |  |  | #define DPRINTF(fmt, args...) do { printk(KERN_DEBUG "%s:%d:%s ", __FILE__, __LINE__, __func__ ); printk(KERN_DEBUG fmt, ##args ); } while (0)
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | #else
 | 
					
						
							|  |  |  | #define DPRINTF(fmt, args...)
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define def_load_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e)	\
 | 
					
						
							|  |  |  | 	__asm__ __volatile__ (				\ | 
					
						
							| 
									
										
										
										
											2007-01-28 14:52:57 +01:00
										 |  |  | 	"1:\t" #_insn ",ma " #_sz "(" _s ",%1), %0\n\t"	\ | 
					
						
							|  |  |  | 	ASM_EXCEPTIONTABLE_ENTRY(1b,_e)			\ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	: _tt(_t), "+r"(_a)				\ | 
					
						
							|  |  |  | 	: 						\ | 
					
						
							|  |  |  | 	: "r8") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define def_store_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) 	\
 | 
					
						
							|  |  |  | 	__asm__ __volatile__ (				\ | 
					
						
							| 
									
										
										
										
											2007-01-28 14:52:57 +01:00
										 |  |  | 	"1:\t" #_insn ",ma %1, " #_sz "(" _s ",%0)\n\t"	\ | 
					
						
							|  |  |  | 	ASM_EXCEPTIONTABLE_ENTRY(1b,_e)			\ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	: "+r"(_a) 					\ | 
					
						
							|  |  |  | 	: _tt(_t)					\ | 
					
						
							|  |  |  | 	: "r8") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define ldbma(_s, _a, _t, _e) def_load_ai_insn(ldbs,1,"=r",_s,_a,_t,_e)
 | 
					
						
							|  |  |  | #define stbma(_s, _t, _a, _e) def_store_ai_insn(stbs,1,"r",_s,_a,_t,_e)
 | 
					
						
							|  |  |  | #define ldwma(_s, _a, _t, _e) def_load_ai_insn(ldw,4,"=r",_s,_a,_t,_e)
 | 
					
						
							|  |  |  | #define stwma(_s, _t, _a, _e) def_store_ai_insn(stw,4,"r",_s,_a,_t,_e)
 | 
					
						
							|  |  |  | #define flddma(_s, _a, _t, _e) def_load_ai_insn(fldd,8,"=f",_s,_a,_t,_e)
 | 
					
						
							|  |  |  | #define fstdma(_s, _t, _a, _e) def_store_ai_insn(fstd,8,"f",_s,_a,_t,_e)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define def_load_insn(_insn,_tt,_s,_o,_a,_t,_e) 	\
 | 
					
						
							|  |  |  | 	__asm__ __volatile__ (				\ | 
					
						
							| 
									
										
										
										
											2007-01-28 14:52:57 +01:00
										 |  |  | 	"1:\t" #_insn " " #_o "(" _s ",%1), %0\n\t"	\ | 
					
						
							|  |  |  | 	ASM_EXCEPTIONTABLE_ENTRY(1b,_e)			\ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	: _tt(_t) 					\ | 
					
						
							|  |  |  | 	: "r"(_a)					\ | 
					
						
							|  |  |  | 	: "r8") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define def_store_insn(_insn,_tt,_s,_t,_o,_a,_e) 	\
 | 
					
						
							|  |  |  | 	__asm__ __volatile__ (				\ | 
					
						
							| 
									
										
										
										
											2007-01-28 14:52:57 +01:00
										 |  |  | 	"1:\t" #_insn " %0, " #_o "(" _s ",%1)\n\t" 	\ | 
					
						
							|  |  |  | 	ASM_EXCEPTIONTABLE_ENTRY(1b,_e)			\ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	: 						\ | 
					
						
							|  |  |  | 	: _tt(_t), "r"(_a)				\ | 
					
						
							|  |  |  | 	: "r8") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define ldw(_s,_o,_a,_t,_e)	def_load_insn(ldw,"=r",_s,_o,_a,_t,_e)
 | 
					
						
							|  |  |  | #define stw(_s,_t,_o,_a,_e) 	def_store_insn(stw,"r",_s,_t,_o,_a,_e)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef  CONFIG_PREFETCH
 | 
					
						
							| 
									
										
										
										
											2007-10-16 14:24:59 -07:00
										 |  |  | static inline void prefetch_src(const void *addr) | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | { | 
					
						
							|  |  |  | 	__asm__("ldw 0(" s_space ",%0), %%r0" : : "r" (addr)); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2007-10-16 14:24:59 -07:00
										 |  |  | static inline void prefetch_dst(const void *addr) | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | { | 
					
						
							|  |  |  | 	__asm__("ldd 0(" d_space ",%0), %%r0" : : "r" (addr)); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | #else
 | 
					
						
							| 
									
										
										
										
											2007-01-28 14:52:57 +01:00
										 |  |  | #define prefetch_src(addr) do { } while(0)
 | 
					
						
							|  |  |  | #define prefetch_dst(addr) do { } while(0)
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words
 | 
					
						
							|  |  |  |  * per loop.  This code is derived from glibc.  | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static inline unsigned long copy_dstaligned(unsigned long dst, unsigned long src, unsigned long len, unsigned long o_dst, unsigned long o_src, unsigned long o_len) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	/* gcc complains that a2 and a3 may be uninitialized, but actually
 | 
					
						
							|  |  |  | 	 * they cannot be.  Initialize a2/a3 to shut gcc up. | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	register unsigned int a0, a1, a2 = 0, a3 = 0; | 
					
						
							|  |  |  | 	int sh_1, sh_2; | 
					
						
							|  |  |  | 	struct exception_data *d; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* prefetch_src((const void *)src); */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Calculate how to shift a word read at the memory operation
 | 
					
						
							|  |  |  | 	   aligned srcp to make it aligned for copy.  */ | 
					
						
							|  |  |  | 	sh_1 = 8 * (src % sizeof(unsigned int)); | 
					
						
							|  |  |  | 	sh_2 = 8 * sizeof(unsigned int) - sh_1; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Make src aligned by rounding it down.  */ | 
					
						
							|  |  |  | 	src &= -sizeof(unsigned int); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	switch (len % 4) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		case 2: | 
					
						
							|  |  |  | 			/* a1 = ((unsigned int *) src)[0];
 | 
					
						
							|  |  |  | 			   a2 = ((unsigned int *) src)[1]; */ | 
					
						
							|  |  |  | 			ldw(s_space, 0, src, a1, cda_ldw_exc); | 
					
						
							|  |  |  | 			ldw(s_space, 4, src, a2, cda_ldw_exc); | 
					
						
							|  |  |  | 			src -= 1 * sizeof(unsigned int); | 
					
						
							|  |  |  | 			dst -= 3 * sizeof(unsigned int); | 
					
						
							|  |  |  | 			len += 2; | 
					
						
							|  |  |  | 			goto do1; | 
					
						
							|  |  |  | 		case 3: | 
					
						
							|  |  |  | 			/* a0 = ((unsigned int *) src)[0];
 | 
					
						
							|  |  |  | 			   a1 = ((unsigned int *) src)[1]; */ | 
					
						
							|  |  |  | 			ldw(s_space, 0, src, a0, cda_ldw_exc); | 
					
						
							|  |  |  | 			ldw(s_space, 4, src, a1, cda_ldw_exc); | 
					
						
							|  |  |  | 			src -= 0 * sizeof(unsigned int); | 
					
						
							|  |  |  | 			dst -= 2 * sizeof(unsigned int); | 
					
						
							|  |  |  | 			len += 1; | 
					
						
							|  |  |  | 			goto do2; | 
					
						
							|  |  |  | 		case 0: | 
					
						
							|  |  |  | 			if (len == 0) | 
					
						
							|  |  |  | 				return 0; | 
					
						
							|  |  |  | 			/* a3 = ((unsigned int *) src)[0];
 | 
					
						
							|  |  |  | 			   a0 = ((unsigned int *) src)[1]; */ | 
					
						
							|  |  |  | 			ldw(s_space, 0, src, a3, cda_ldw_exc); | 
					
						
							|  |  |  | 			ldw(s_space, 4, src, a0, cda_ldw_exc); | 
					
						
							|  |  |  | 			src -=-1 * sizeof(unsigned int); | 
					
						
							|  |  |  | 			dst -= 1 * sizeof(unsigned int); | 
					
						
							|  |  |  | 			len += 0; | 
					
						
							|  |  |  | 			goto do3; | 
					
						
							|  |  |  | 		case 1: | 
					
						
							|  |  |  | 			/* a2 = ((unsigned int *) src)[0];
 | 
					
						
							|  |  |  | 			   a3 = ((unsigned int *) src)[1]; */ | 
					
						
							|  |  |  | 			ldw(s_space, 0, src, a2, cda_ldw_exc); | 
					
						
							|  |  |  | 			ldw(s_space, 4, src, a3, cda_ldw_exc); | 
					
						
							|  |  |  | 			src -=-2 * sizeof(unsigned int); | 
					
						
							|  |  |  | 			dst -= 0 * sizeof(unsigned int); | 
					
						
							|  |  |  | 			len -= 1; | 
					
						
							|  |  |  | 			if (len == 0) | 
					
						
							|  |  |  | 				goto do0; | 
					
						
							|  |  |  | 			goto do4;			/* No-op.  */ | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	do | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		/* prefetch_src((const void *)(src + 4 * sizeof(unsigned int))); */ | 
					
						
							|  |  |  | do4: | 
					
						
							|  |  |  | 		/* a0 = ((unsigned int *) src)[0]; */ | 
					
						
							|  |  |  | 		ldw(s_space, 0, src, a0, cda_ldw_exc); | 
					
						
							|  |  |  | 		/* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */ | 
					
						
							|  |  |  | 		stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc); | 
					
						
							|  |  |  | do3: | 
					
						
							|  |  |  | 		/* a1 = ((unsigned int *) src)[1]; */ | 
					
						
							|  |  |  | 		ldw(s_space, 4, src, a1, cda_ldw_exc); | 
					
						
							|  |  |  | 		/* ((unsigned int *) dst)[1] = MERGE (a3, sh_1, a0, sh_2); */ | 
					
						
							|  |  |  | 		stw(d_space, MERGE (a3, sh_1, a0, sh_2), 4, dst, cda_stw_exc); | 
					
						
							|  |  |  | do2: | 
					
						
							|  |  |  | 		/* a2 = ((unsigned int *) src)[2]; */ | 
					
						
							|  |  |  | 		ldw(s_space, 8, src, a2, cda_ldw_exc); | 
					
						
							|  |  |  | 		/* ((unsigned int *) dst)[2] = MERGE (a0, sh_1, a1, sh_2); */ | 
					
						
							|  |  |  | 		stw(d_space, MERGE (a0, sh_1, a1, sh_2), 8, dst, cda_stw_exc); | 
					
						
							|  |  |  | do1: | 
					
						
							|  |  |  | 		/* a3 = ((unsigned int *) src)[3]; */ | 
					
						
							|  |  |  | 		ldw(s_space, 12, src, a3, cda_ldw_exc); | 
					
						
							|  |  |  | 		/* ((unsigned int *) dst)[3] = MERGE (a1, sh_1, a2, sh_2); */ | 
					
						
							|  |  |  | 		stw(d_space, MERGE (a1, sh_1, a2, sh_2), 12, dst, cda_stw_exc); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		src += 4 * sizeof(unsigned int); | 
					
						
							|  |  |  | 		dst += 4 * sizeof(unsigned int); | 
					
						
							|  |  |  | 		len -= 4; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	while (len != 0); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | do0: | 
					
						
							|  |  |  | 	/* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */ | 
					
						
							|  |  |  | 	stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	preserve_branch(handle_load_error); | 
					
						
							|  |  |  | 	preserve_branch(handle_store_error); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | handle_load_error: | 
					
						
							|  |  |  | 	__asm__ __volatile__ ("cda_ldw_exc:\n"); | 
					
						
							|  |  |  | 	d = &__get_cpu_var(exception_data); | 
					
						
							|  |  |  | 	DPRINTF("cda_ldw_exc: o_len=%lu fault_addr=%lu o_src=%lu ret=%lu\n", | 
					
						
							|  |  |  | 		o_len, d->fault_addr, o_src, o_len - d->fault_addr + o_src); | 
					
						
							|  |  |  | 	return o_len * 4 - d->fault_addr + o_src; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | handle_store_error: | 
					
						
							|  |  |  | 	__asm__ __volatile__ ("cda_stw_exc:\n"); | 
					
						
							|  |  |  | 	d = &__get_cpu_var(exception_data); | 
					
						
							|  |  |  | 	DPRINTF("cda_stw_exc: o_len=%lu fault_addr=%lu o_dst=%lu ret=%lu\n", | 
					
						
							|  |  |  | 		o_len, d->fault_addr, o_dst, o_len - d->fault_addr + o_dst); | 
					
						
							|  |  |  | 	return o_len * 4 - d->fault_addr + o_dst; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Returns 0 for success, otherwise, returns number of bytes not transferred. */ | 
					
						
							| 
									
										
										
										
											2008-12-02 03:28:15 +00:00
										 |  |  | static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | { | 
					
						
							|  |  |  | 	register unsigned long src, dst, t1, t2, t3; | 
					
						
							|  |  |  | 	register unsigned char *pcs, *pcd; | 
					
						
							|  |  |  | 	register unsigned int *pws, *pwd; | 
					
						
							|  |  |  | 	register double *pds, *pdd; | 
					
						
							|  |  |  | 	unsigned long ret = 0; | 
					
						
							|  |  |  | 	unsigned long o_dst, o_src, o_len; | 
					
						
							|  |  |  | 	struct exception_data *d; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	src = (unsigned long)srcp; | 
					
						
							|  |  |  | 	dst = (unsigned long)dstp; | 
					
						
							|  |  |  | 	pcs = (unsigned char *)srcp; | 
					
						
							|  |  |  | 	pcd = (unsigned char *)dstp; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	o_dst = dst; o_src = src; o_len = len; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* prefetch_src((const void *)srcp); */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (len < THRESHOLD) | 
					
						
							|  |  |  | 		goto byte_copy; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Check alignment */ | 
					
						
							|  |  |  | 	t1 = (src ^ dst); | 
					
						
							|  |  |  | 	if (unlikely(t1 & (sizeof(double)-1))) | 
					
						
							|  |  |  | 		goto unaligned_copy; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* src and dst have same alignment. */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Copy bytes till we are double-aligned. */ | 
					
						
							|  |  |  | 	t2 = src & (sizeof(double) - 1); | 
					
						
							|  |  |  | 	if (unlikely(t2 != 0)) { | 
					
						
							|  |  |  | 		t2 = sizeof(double) - t2; | 
					
						
							|  |  |  | 		while (t2 && len) { | 
					
						
							|  |  |  | 			/* *pcd++ = *pcs++; */ | 
					
						
							|  |  |  | 			ldbma(s_space, pcs, t3, pmc_load_exc); | 
					
						
							|  |  |  | 			len--; | 
					
						
							|  |  |  | 			stbma(d_space, t3, pcd, pmc_store_exc); | 
					
						
							|  |  |  | 			t2--; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	pds = (double *)pcs; | 
					
						
							|  |  |  | 	pdd = (double *)pcd; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-10-21 22:48:34 -04:00
										 |  |  | #if 0
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 	/* Copy 8 doubles at a time */ | 
					
						
							|  |  |  | 	while (len >= 8*sizeof(double)) { | 
					
						
							|  |  |  | 		register double r1, r2, r3, r4, r5, r6, r7, r8; | 
					
						
							|  |  |  | 		/* prefetch_src((char *)pds + L1_CACHE_BYTES); */ | 
					
						
							|  |  |  | 		flddma(s_space, pds, r1, pmc_load_exc); | 
					
						
							|  |  |  | 		flddma(s_space, pds, r2, pmc_load_exc); | 
					
						
							|  |  |  | 		flddma(s_space, pds, r3, pmc_load_exc); | 
					
						
							|  |  |  | 		flddma(s_space, pds, r4, pmc_load_exc); | 
					
						
							|  |  |  | 		fstdma(d_space, r1, pdd, pmc_store_exc); | 
					
						
							|  |  |  | 		fstdma(d_space, r2, pdd, pmc_store_exc); | 
					
						
							|  |  |  | 		fstdma(d_space, r3, pdd, pmc_store_exc); | 
					
						
							|  |  |  | 		fstdma(d_space, r4, pdd, pmc_store_exc); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #if 0
 | 
					
						
							|  |  |  | 		if (L1_CACHE_BYTES <= 32) | 
					
						
							|  |  |  | 			prefetch_src((char *)pds + L1_CACHE_BYTES); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 		flddma(s_space, pds, r5, pmc_load_exc); | 
					
						
							|  |  |  | 		flddma(s_space, pds, r6, pmc_load_exc); | 
					
						
							|  |  |  | 		flddma(s_space, pds, r7, pmc_load_exc); | 
					
						
							|  |  |  | 		flddma(s_space, pds, r8, pmc_load_exc); | 
					
						
							|  |  |  | 		fstdma(d_space, r5, pdd, pmc_store_exc); | 
					
						
							|  |  |  | 		fstdma(d_space, r6, pdd, pmc_store_exc); | 
					
						
							|  |  |  | 		fstdma(d_space, r7, pdd, pmc_store_exc); | 
					
						
							|  |  |  | 		fstdma(d_space, r8, pdd, pmc_store_exc); | 
					
						
							|  |  |  | 		len -= 8*sizeof(double); | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2005-10-21 22:48:34 -04:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	pws = (unsigned int *)pds; | 
					
						
							|  |  |  | 	pwd = (unsigned int *)pdd; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | word_copy: | 
					
						
							|  |  |  | 	while (len >= 8*sizeof(unsigned int)) { | 
					
						
							|  |  |  | 		register unsigned int r1,r2,r3,r4,r5,r6,r7,r8; | 
					
						
							|  |  |  | 		/* prefetch_src((char *)pws + L1_CACHE_BYTES); */ | 
					
						
							|  |  |  | 		ldwma(s_space, pws, r1, pmc_load_exc); | 
					
						
							|  |  |  | 		ldwma(s_space, pws, r2, pmc_load_exc); | 
					
						
							|  |  |  | 		ldwma(s_space, pws, r3, pmc_load_exc); | 
					
						
							|  |  |  | 		ldwma(s_space, pws, r4, pmc_load_exc); | 
					
						
							|  |  |  | 		stwma(d_space, r1, pwd, pmc_store_exc); | 
					
						
							|  |  |  | 		stwma(d_space, r2, pwd, pmc_store_exc); | 
					
						
							|  |  |  | 		stwma(d_space, r3, pwd, pmc_store_exc); | 
					
						
							|  |  |  | 		stwma(d_space, r4, pwd, pmc_store_exc); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		ldwma(s_space, pws, r5, pmc_load_exc); | 
					
						
							|  |  |  | 		ldwma(s_space, pws, r6, pmc_load_exc); | 
					
						
							|  |  |  | 		ldwma(s_space, pws, r7, pmc_load_exc); | 
					
						
							|  |  |  | 		ldwma(s_space, pws, r8, pmc_load_exc); | 
					
						
							|  |  |  | 		stwma(d_space, r5, pwd, pmc_store_exc); | 
					
						
							|  |  |  | 		stwma(d_space, r6, pwd, pmc_store_exc); | 
					
						
							|  |  |  | 		stwma(d_space, r7, pwd, pmc_store_exc); | 
					
						
							|  |  |  | 		stwma(d_space, r8, pwd, pmc_store_exc); | 
					
						
							|  |  |  | 		len -= 8*sizeof(unsigned int); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	while (len >= 4*sizeof(unsigned int)) { | 
					
						
							|  |  |  | 		register unsigned int r1,r2,r3,r4; | 
					
						
							|  |  |  | 		ldwma(s_space, pws, r1, pmc_load_exc); | 
					
						
							|  |  |  | 		ldwma(s_space, pws, r2, pmc_load_exc); | 
					
						
							|  |  |  | 		ldwma(s_space, pws, r3, pmc_load_exc); | 
					
						
							|  |  |  | 		ldwma(s_space, pws, r4, pmc_load_exc); | 
					
						
							|  |  |  | 		stwma(d_space, r1, pwd, pmc_store_exc); | 
					
						
							|  |  |  | 		stwma(d_space, r2, pwd, pmc_store_exc); | 
					
						
							|  |  |  | 		stwma(d_space, r3, pwd, pmc_store_exc); | 
					
						
							|  |  |  | 		stwma(d_space, r4, pwd, pmc_store_exc); | 
					
						
							|  |  |  | 		len -= 4*sizeof(unsigned int); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	pcs = (unsigned char *)pws; | 
					
						
							|  |  |  | 	pcd = (unsigned char *)pwd; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | byte_copy: | 
					
						
							|  |  |  | 	while (len) { | 
					
						
							|  |  |  | 		/* *pcd++ = *pcs++; */ | 
					
						
							|  |  |  | 		ldbma(s_space, pcs, t3, pmc_load_exc); | 
					
						
							|  |  |  | 		stbma(d_space, t3, pcd, pmc_store_exc); | 
					
						
							|  |  |  | 		len--; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | unaligned_copy: | 
					
						
							|  |  |  | 	/* possibly we are aligned on a word, but not on a double... */ | 
					
						
							| 
									
										
										
										
											2009-06-23 14:53:26 +00:00
										 |  |  | 	if (likely((t1 & (sizeof(unsigned int)-1)) == 0)) { | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 		t2 = src & (sizeof(unsigned int) - 1); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if (unlikely(t2 != 0)) { | 
					
						
							|  |  |  | 			t2 = sizeof(unsigned int) - t2; | 
					
						
							|  |  |  | 			while (t2) { | 
					
						
							|  |  |  | 				/* *pcd++ = *pcs++; */ | 
					
						
							|  |  |  | 				ldbma(s_space, pcs, t3, pmc_load_exc); | 
					
						
							|  |  |  | 				stbma(d_space, t3, pcd, pmc_store_exc); | 
					
						
							|  |  |  | 				len--; | 
					
						
							|  |  |  | 				t2--; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		pws = (unsigned int *)pcs; | 
					
						
							|  |  |  | 		pwd = (unsigned int *)pcd; | 
					
						
							|  |  |  | 		goto word_copy; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Align the destination.  */ | 
					
						
							|  |  |  | 	if (unlikely((dst & (sizeof(unsigned int) - 1)) != 0)) { | 
					
						
							|  |  |  | 		t2 = sizeof(unsigned int) - (dst & (sizeof(unsigned int) - 1)); | 
					
						
							|  |  |  | 		while (t2) { | 
					
						
							|  |  |  | 			/* *pcd++ = *pcs++; */ | 
					
						
							|  |  |  | 			ldbma(s_space, pcs, t3, pmc_load_exc); | 
					
						
							|  |  |  | 			stbma(d_space, t3, pcd, pmc_store_exc); | 
					
						
							|  |  |  | 			len--; | 
					
						
							|  |  |  | 			t2--; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		dst = (unsigned long)pcd; | 
					
						
							|  |  |  | 		src = (unsigned long)pcs; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	ret = copy_dstaligned(dst, src, len / sizeof(unsigned int),  | 
					
						
							|  |  |  | 		o_dst, o_src, o_len); | 
					
						
							|  |  |  | 	if (ret) | 
					
						
							|  |  |  | 		return ret; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	pcs += (len & -sizeof(unsigned int)); | 
					
						
							|  |  |  | 	pcd += (len & -sizeof(unsigned int)); | 
					
						
							|  |  |  | 	len %= sizeof(unsigned int); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	preserve_branch(handle_load_error); | 
					
						
							|  |  |  | 	preserve_branch(handle_store_error); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	goto byte_copy; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | handle_load_error: | 
					
						
							|  |  |  | 	__asm__ __volatile__ ("pmc_load_exc:\n"); | 
					
						
							|  |  |  | 	d = &__get_cpu_var(exception_data); | 
					
						
							|  |  |  | 	DPRINTF("pmc_load_exc: o_len=%lu fault_addr=%lu o_src=%lu ret=%lu\n", | 
					
						
							|  |  |  | 		o_len, d->fault_addr, o_src, o_len - d->fault_addr + o_src); | 
					
						
							|  |  |  | 	return o_len - d->fault_addr + o_src; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | handle_store_error: | 
					
						
							|  |  |  | 	__asm__ __volatile__ ("pmc_store_exc:\n"); | 
					
						
							|  |  |  | 	d = &__get_cpu_var(exception_data); | 
					
						
							|  |  |  | 	DPRINTF("pmc_store_exc: o_len=%lu fault_addr=%lu o_dst=%lu ret=%lu\n", | 
					
						
							|  |  |  | 		o_len, d->fault_addr, o_dst, o_len - d->fault_addr + o_dst); | 
					
						
							|  |  |  | 	return o_len - d->fault_addr + o_dst; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef __KERNEL__
 | 
					
						
							|  |  |  | unsigned long copy_to_user(void __user *dst, const void *src, unsigned long len) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	mtsp(get_kernel_space(), 1); | 
					
						
							|  |  |  | 	mtsp(get_user_space(), 2); | 
					
						
							|  |  |  | 	return pa_memcpy((void __force *)dst, src, len); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2010-02-01 19:56:33 +00:00
										 |  |  | EXPORT_SYMBOL(__copy_from_user); | 
					
						
							|  |  |  | unsigned long __copy_from_user(void *dst, const void __user *src, unsigned long len) | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | { | 
					
						
							|  |  |  | 	mtsp(get_user_space(), 1); | 
					
						
							|  |  |  | 	mtsp(get_kernel_space(), 2); | 
					
						
							|  |  |  | 	return pa_memcpy(dst, (void __force *)src, len); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | unsigned long copy_in_user(void __user *dst, const void __user *src, unsigned long len) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	mtsp(get_user_space(), 1); | 
					
						
							|  |  |  | 	mtsp(get_user_space(), 2); | 
					
						
							|  |  |  | 	return pa_memcpy((void __force *)dst, (void __force *)src, len); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void * memcpy(void * dst,const void *src, size_t count) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	mtsp(get_kernel_space(), 1); | 
					
						
							|  |  |  | 	mtsp(get_kernel_space(), 2); | 
					
						
							|  |  |  | 	pa_memcpy(dst, src, count); | 
					
						
							|  |  |  | 	return dst; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | EXPORT_SYMBOL(copy_to_user); | 
					
						
							|  |  |  | EXPORT_SYMBOL(copy_from_user); | 
					
						
							|  |  |  | EXPORT_SYMBOL(copy_in_user); | 
					
						
							|  |  |  | EXPORT_SYMBOL(memcpy); | 
					
						
							|  |  |  | #endif
 |