| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /* Copyright 2002 Andi Kleen */ | 
					
						
							| 
									
										
										
										
											2006-10-04 03:38:54 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-09-26 10:52:32 +02:00
										 |  |  | #include <linux/linkage.h> | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-09-26 10:52:32 +02:00
										 |  |  | #include <asm/cpufeature.h> | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  | #include <asm/dwarf2.h> | 
					
						
							| 
									
										
										
										
											2006-09-26 10:52:32 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /* | 
					
						
							|  |  |  |  * memcpy - Copy a memory block. | 
					
						
							|  |  |  |  * | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  |  * Input: | 
					
						
							|  |  |  |  *  rdi destination | 
					
						
							|  |  |  |  *  rsi source | 
					
						
							|  |  |  |  *  rdx count | 
					
						
							|  |  |  |  * | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  |  * Output: | 
					
						
							|  |  |  |  * rax original destination | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  |  */ | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  | /* | 
					
						
							|  |  |  |  * memcpy_c() - fast string ops (REP MOVSQ) based variant. | 
					
						
							|  |  |  |  * | 
					
						
							| 
									
										
										
										
											2009-12-18 16:16:03 +00:00
										 |  |  |  * This gets patched over the unrolled variant (below) via the | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  |  * alternative instructions framework: | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2009-12-18 16:16:03 +00:00
										 |  |  | 	.section .altinstr_replacement, "ax", @progbits
 | 
					
						
							|  |  |  | .Lmemcpy_c: | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  | 	movq %rdi, %rax | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	movl %edx, %ecx | 
					
						
							|  |  |  | 	shrl $3, %ecx | 
					
						
							|  |  |  | 	andl $7, %edx | 
					
						
							| 
									
										
										
										
											2006-09-26 10:52:32 +02:00
										 |  |  | 	rep movsq | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  | 	movl %edx, %ecx | 
					
						
							| 
									
										
										
										
											2006-09-26 10:52:32 +02:00
										 |  |  | 	rep movsb | 
					
						
							|  |  |  | 	ret | 
					
						
							| 
									
										
										
										
											2009-12-18 16:16:03 +00:00
										 |  |  | .Lmemcpy_e: | 
					
						
							|  |  |  | 	.previous | 
					
						
							| 
									
										
										
										
											2006-09-26 10:52:32 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | ENTRY(__memcpy) | 
					
						
							|  |  |  | ENTRY(memcpy) | 
					
						
							|  |  |  | 	CFI_STARTPROC | 
					
						
							| 
									
										
										
										
											2006-02-03 21:51:02 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  | 	/* | 
					
						
							|  |  |  | 	 * Put the number of full 64-byte blocks into %ecx. | 
					
						
							|  |  |  | 	 * Tail portion is handled at the end: | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	movq %rdi, %rax | 
					
						
							|  |  |  | 	movl %edx, %ecx | 
					
						
							|  |  |  | 	shrl   $6, %ecx | 
					
						
							| 
									
										
										
										
											2006-02-03 21:51:02 +01:00
										 |  |  | 	jz .Lhandle_tail | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	.p2align 4
 | 
					
						
							|  |  |  | .Lloop_64: | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  | 	/* | 
					
						
							|  |  |  | 	 * We decrement the loop index here - and the zero-flag is | 
					
						
							|  |  |  | 	 * checked at the end of the loop (instructions inbetween do | 
					
						
							|  |  |  | 	 * not change the zero flag): | 
					
						
							|  |  |  | 	 */ | 
					
						
							| 
									
										
										
										
											2006-02-03 21:51:02 +01:00
										 |  |  | 	decl %ecx | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  | 	/* | 
					
						
							|  |  |  | 	 * Move in blocks of 4x16 bytes: | 
					
						
							|  |  |  | 	 */ | 
					
						
							|  |  |  | 	movq 0*8(%rsi),		%r11 | 
					
						
							|  |  |  | 	movq 1*8(%rsi),		%r8 | 
					
						
							|  |  |  | 	movq %r11,		0*8(%rdi) | 
					
						
							|  |  |  | 	movq %r8,		1*8(%rdi) | 
					
						
							| 
									
										
										
										
											2006-02-03 21:51:02 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  | 	movq 2*8(%rsi),		%r9 | 
					
						
							|  |  |  | 	movq 3*8(%rsi),		%r10 | 
					
						
							|  |  |  | 	movq %r9,		2*8(%rdi) | 
					
						
							|  |  |  | 	movq %r10,		3*8(%rdi) | 
					
						
							| 
									
										
										
										
											2006-02-03 21:51:02 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  | 	movq 4*8(%rsi),		%r11 | 
					
						
							|  |  |  | 	movq 5*8(%rsi),		%r8 | 
					
						
							|  |  |  | 	movq %r11,		4*8(%rdi) | 
					
						
							|  |  |  | 	movq %r8,		5*8(%rdi) | 
					
						
							| 
									
										
										
										
											2006-02-03 21:51:02 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  | 	movq 6*8(%rsi),		%r9 | 
					
						
							|  |  |  | 	movq 7*8(%rsi),		%r10 | 
					
						
							|  |  |  | 	movq %r9,		6*8(%rdi) | 
					
						
							|  |  |  | 	movq %r10,		7*8(%rdi) | 
					
						
							| 
									
										
										
										
											2006-02-03 21:51:02 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  | 	leaq 64(%rsi), %rsi | 
					
						
							|  |  |  | 	leaq 64(%rdi), %rdi | 
					
						
							| 
									
										
										
										
											2006-02-03 21:51:02 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	jnz  .Lloop_64 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | .Lhandle_tail: | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  | 	movl %edx, %ecx | 
					
						
							|  |  |  | 	andl  $63, %ecx | 
					
						
							|  |  |  | 	shrl   $3, %ecx | 
					
						
							| 
									
										
										
										
											2006-02-03 21:51:02 +01:00
										 |  |  | 	jz   .Lhandle_7 | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-02-03 21:51:02 +01:00
										 |  |  | 	.p2align 4
 | 
					
						
							|  |  |  | .Lloop_8: | 
					
						
							|  |  |  | 	decl %ecx | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  | 	movq (%rsi),		%r8 | 
					
						
							|  |  |  | 	movq %r8,		(%rdi) | 
					
						
							|  |  |  | 	leaq 8(%rdi),		%rdi | 
					
						
							|  |  |  | 	leaq 8(%rsi),		%rsi | 
					
						
							| 
									
										
										
										
											2006-02-03 21:51:02 +01:00
										 |  |  | 	jnz  .Lloop_8 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | .Lhandle_7: | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  | 	movl %edx, %ecx | 
					
						
							|  |  |  | 	andl $7, %ecx | 
					
						
							|  |  |  | 	jz .Lend | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2006-02-03 21:51:02 +01:00
										 |  |  | 	.p2align 4
 | 
					
						
							|  |  |  | .Lloop_1: | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  | 	movb (%rsi), %r8b | 
					
						
							|  |  |  | 	movb %r8b, (%rdi) | 
					
						
							| 
									
										
										
										
											2006-02-03 21:51:02 +01:00
										 |  |  | 	incq %rdi | 
					
						
							|  |  |  | 	incq %rsi | 
					
						
							|  |  |  | 	decl %ecx | 
					
						
							|  |  |  | 	jnz .Lloop_1 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  | .Lend: | 
					
						
							| 
									
										
										
										
											2006-02-03 21:51:02 +01:00
										 |  |  | 	ret | 
					
						
							| 
									
										
										
										
											2006-09-26 10:52:32 +02:00
										 |  |  | 	CFI_ENDPROC | 
					
						
							|  |  |  | ENDPROC(memcpy) | 
					
						
							|  |  |  | ENDPROC(__memcpy) | 
					
						
							| 
									
										
										
										
											2006-02-03 21:51:02 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  | 	/* | 
					
						
							|  |  |  | 	 * Some CPUs run faster using the string copy instructions. | 
					
						
							|  |  |  | 	 * It is also a lot simpler. Use this when possible: | 
					
						
							|  |  |  | 	 */ | 
					
						
							| 
									
										
										
										
											2006-02-03 21:51:02 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  | 	.section .altinstructions, "a" | 
					
						
							| 
									
										
										
										
											2006-02-03 21:51:02 +01:00
										 |  |  | 	.align 8
 | 
					
						
							| 
									
										
										
										
											2006-09-26 10:52:32 +02:00
										 |  |  | 	.quad memcpy
 | 
					
						
							| 
									
										
										
										
											2009-12-18 16:16:03 +00:00
										 |  |  | 	.quad .Lmemcpy_c | 
					
						
							| 
									
										
										
										
											2006-09-26 10:52:32 +02:00
										 |  |  | 	.byte X86_FEATURE_REP_GOOD
 | 
					
						
							| 
									
										
										
										
											2009-03-12 12:20:17 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	/* | 
					
						
							|  |  |  | 	 * Replace only beginning, memcpy is used to apply alternatives, | 
					
						
							|  |  |  | 	 * so it is silly to overwrite itself with nops - reboot is the | 
					
						
							|  |  |  | 	 * only outcome... | 
					
						
							|  |  |  | 	 */ | 
					
						
							| 
									
										
										
										
											2009-12-18 16:16:03 +00:00
										 |  |  | 	.byte .Lmemcpy_e - .Lmemcpy_c | 
					
						
							|  |  |  | 	.byte .Lmemcpy_e - .Lmemcpy_c | 
					
						
							| 
									
										
										
										
											2006-02-03 21:51:02 +01:00
										 |  |  | 	.previous |