The many stray whitespaces and other uncleanlinesses made this code almost unreadable to me - so fix those. No changes to the code. Signed-off-by: Ingo Molnar <mingo@elte.hu>
		
			
				
	
	
		
			249 lines
		
	
	
	
		
			4.1 KiB
			
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			249 lines
		
	
	
	
		
			4.1 KiB
			
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
/*
 | 
						|
 * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
 | 
						|
 *
 | 
						|
 * This file is subject to the terms and conditions of the GNU General Public
 | 
						|
 * License.  See the file COPYING in the main directory of this archive
 | 
						|
 * for more details. No warranty for anything given at all.
 | 
						|
 */
 | 
						|
#include <linux/linkage.h>
 | 
						|
#include <asm/dwarf2.h>
 | 
						|
#include <asm/errno.h>
 | 
						|
 | 
						|
/*
 | 
						|
 * Checksum copy with exception handling.
 | 
						|
 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
 | 
						|
 * destination is zeroed.
 | 
						|
 *
 | 
						|
 * Input
 | 
						|
 * rdi  source
 | 
						|
 * rsi  destination
 | 
						|
 * edx  len (32bit)
 | 
						|
 * ecx  sum (32bit)
 | 
						|
 * r8   src_err_ptr (int)
 | 
						|
 * r9   dst_err_ptr (int)
 | 
						|
 *
 | 
						|
 * Output
 | 
						|
 * eax  64bit sum. undefined in case of exception.
 | 
						|
 *
 | 
						|
 * Wrappers need to take care of valid exception sum and zeroing.
 | 
						|
 * They also should align source or destination to 8 bytes.
 | 
						|
 */
 | 
						|
 | 
						|
	.macro source
 | 
						|
10:
 | 
						|
	.section __ex_table, "a"
 | 
						|
	.align 8
 | 
						|
	.quad 10b, .Lbad_source
 | 
						|
	.previous
 | 
						|
	.endm
 | 
						|
 | 
						|
	.macro dest
 | 
						|
20:
 | 
						|
	.section __ex_table, "a"
 | 
						|
	.align 8
 | 
						|
	.quad 20b, .Lbad_dest
 | 
						|
	.previous
 | 
						|
	.endm
 | 
						|
 | 
						|
	.macro ignore L=.Lignore
 | 
						|
30:
 | 
						|
	.section __ex_table, "a"
 | 
						|
	.align 8
 | 
						|
	.quad 30b, \L
 | 
						|
	.previous
 | 
						|
	.endm
 | 
						|
 | 
						|
 | 
						|
ENTRY(csum_partial_copy_generic)
 | 
						|
	CFI_STARTPROC
 | 
						|
	cmpl	$3*64, %edx
 | 
						|
	jle	.Lignore
 | 
						|
 | 
						|
.Lignore:
 | 
						|
	subq  $7*8, %rsp
 | 
						|
	CFI_ADJUST_CFA_OFFSET 7*8
 | 
						|
	movq  %rbx, 2*8(%rsp)
 | 
						|
	CFI_REL_OFFSET rbx, 2*8
 | 
						|
	movq  %r12, 3*8(%rsp)
 | 
						|
	CFI_REL_OFFSET r12, 3*8
 | 
						|
	movq  %r14, 4*8(%rsp)
 | 
						|
	CFI_REL_OFFSET r14, 4*8
 | 
						|
	movq  %r13, 5*8(%rsp)
 | 
						|
	CFI_REL_OFFSET r13, 5*8
 | 
						|
	movq  %rbp, 6*8(%rsp)
 | 
						|
	CFI_REL_OFFSET rbp, 6*8
 | 
						|
 | 
						|
	movq  %r8, (%rsp)
 | 
						|
	movq  %r9, 1*8(%rsp)
 | 
						|
 | 
						|
	movl  %ecx, %eax
 | 
						|
	movl  %edx, %ecx
 | 
						|
 | 
						|
	xorl  %r9d, %r9d
 | 
						|
	movq  %rcx, %r12
 | 
						|
 | 
						|
	shrq  $6, %r12
 | 
						|
	jz	.Lhandle_tail       /* < 64 */
 | 
						|
 | 
						|
	clc
 | 
						|
 | 
						|
	/* main loop. clear in 64 byte blocks */
 | 
						|
	/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
 | 
						|
	/* r11:	temp3, rdx: temp4, r12 loopcnt */
 | 
						|
	/* r10:	temp5, rbp: temp6, r14 temp7, r13 temp8 */
 | 
						|
	.p2align 4
 | 
						|
.Lloop:
 | 
						|
	source
 | 
						|
	movq  (%rdi), %rbx
 | 
						|
	source
 | 
						|
	movq  8(%rdi), %r8
 | 
						|
	source
 | 
						|
	movq  16(%rdi), %r11
 | 
						|
	source
 | 
						|
	movq  24(%rdi), %rdx
 | 
						|
 | 
						|
	source
 | 
						|
	movq  32(%rdi), %r10
 | 
						|
	source
 | 
						|
	movq  40(%rdi), %rbp
 | 
						|
	source
 | 
						|
	movq  48(%rdi), %r14
 | 
						|
	source
 | 
						|
	movq  56(%rdi), %r13
 | 
						|
 | 
						|
	ignore 2f
 | 
						|
	prefetcht0 5*64(%rdi)
 | 
						|
2:
 | 
						|
	adcq  %rbx, %rax
 | 
						|
	adcq  %r8, %rax
 | 
						|
	adcq  %r11, %rax
 | 
						|
	adcq  %rdx, %rax
 | 
						|
	adcq  %r10, %rax
 | 
						|
	adcq  %rbp, %rax
 | 
						|
	adcq  %r14, %rax
 | 
						|
	adcq  %r13, %rax
 | 
						|
 | 
						|
	decl %r12d
 | 
						|
 | 
						|
	dest
 | 
						|
	movq %rbx, (%rsi)
 | 
						|
	dest
 | 
						|
	movq %r8, 8(%rsi)
 | 
						|
	dest
 | 
						|
	movq %r11, 16(%rsi)
 | 
						|
	dest
 | 
						|
	movq %rdx, 24(%rsi)
 | 
						|
 | 
						|
	dest
 | 
						|
	movq %r10, 32(%rsi)
 | 
						|
	dest
 | 
						|
	movq %rbp, 40(%rsi)
 | 
						|
	dest
 | 
						|
	movq %r14, 48(%rsi)
 | 
						|
	dest
 | 
						|
	movq %r13, 56(%rsi)
 | 
						|
 | 
						|
3:
 | 
						|
 | 
						|
	leaq 64(%rdi), %rdi
 | 
						|
	leaq 64(%rsi), %rsi
 | 
						|
 | 
						|
	jnz	.Lloop
 | 
						|
 | 
						|
	adcq  %r9, %rax
 | 
						|
 | 
						|
	/* do last up to 56 bytes */
 | 
						|
.Lhandle_tail:
 | 
						|
	/* ecx:	count */
 | 
						|
	movl %ecx, %r10d
 | 
						|
	andl $63, %ecx
 | 
						|
	shrl $3, %ecx
 | 
						|
	jz	.Lfold
 | 
						|
	clc
 | 
						|
	.p2align 4
 | 
						|
.Lloop_8:
 | 
						|
	source
 | 
						|
	movq (%rdi), %rbx
 | 
						|
	adcq %rbx, %rax
 | 
						|
	decl %ecx
 | 
						|
	dest
 | 
						|
	movq %rbx, (%rsi)
 | 
						|
	leaq 8(%rsi), %rsi /* preserve carry */
 | 
						|
	leaq 8(%rdi), %rdi
 | 
						|
	jnz	.Lloop_8
 | 
						|
	adcq %r9, %rax	/* add in carry */
 | 
						|
 | 
						|
.Lfold:
 | 
						|
	/* reduce checksum to 32bits */
 | 
						|
	movl %eax, %ebx
 | 
						|
	shrq $32, %rax
 | 
						|
	addl %ebx, %eax
 | 
						|
	adcl %r9d, %eax
 | 
						|
 | 
						|
	/* do last up to 6 bytes */
 | 
						|
.Lhandle_7:
 | 
						|
	movl %r10d, %ecx
 | 
						|
	andl $7, %ecx
 | 
						|
	shrl $1, %ecx
 | 
						|
	jz   .Lhandle_1
 | 
						|
	movl $2, %edx
 | 
						|
	xorl %ebx, %ebx
 | 
						|
	clc
 | 
						|
	.p2align 4
 | 
						|
.Lloop_1:
 | 
						|
	source
 | 
						|
	movw (%rdi), %bx
 | 
						|
	adcl %ebx, %eax
 | 
						|
	decl %ecx
 | 
						|
	dest
 | 
						|
	movw %bx, (%rsi)
 | 
						|
	leaq 2(%rdi), %rdi
 | 
						|
	leaq 2(%rsi), %rsi
 | 
						|
	jnz .Lloop_1
 | 
						|
	adcl %r9d, %eax	/* add in carry */
 | 
						|
 | 
						|
	/* handle last odd byte */
 | 
						|
.Lhandle_1:
 | 
						|
	testl $1, %r10d
 | 
						|
	jz    .Lende
 | 
						|
	xorl  %ebx, %ebx
 | 
						|
	source
 | 
						|
	movb (%rdi), %bl
 | 
						|
	dest
 | 
						|
	movb %bl, (%rsi)
 | 
						|
	addl %ebx, %eax
 | 
						|
	adcl %r9d, %eax		/* carry */
 | 
						|
 | 
						|
	CFI_REMEMBER_STATE
 | 
						|
.Lende:
 | 
						|
	movq 2*8(%rsp), %rbx
 | 
						|
	CFI_RESTORE rbx
 | 
						|
	movq 3*8(%rsp), %r12
 | 
						|
	CFI_RESTORE r12
 | 
						|
	movq 4*8(%rsp), %r14
 | 
						|
	CFI_RESTORE r14
 | 
						|
	movq 5*8(%rsp), %r13
 | 
						|
	CFI_RESTORE r13
 | 
						|
	movq 6*8(%rsp), %rbp
 | 
						|
	CFI_RESTORE rbp
 | 
						|
	addq $7*8, %rsp
 | 
						|
	CFI_ADJUST_CFA_OFFSET -7*8
 | 
						|
	ret
 | 
						|
	CFI_RESTORE_STATE
 | 
						|
 | 
						|
	/* Exception handlers. Very simple, zeroing is done in the wrappers */
 | 
						|
.Lbad_source:
 | 
						|
	movq (%rsp), %rax
 | 
						|
	testq %rax, %rax
 | 
						|
	jz   .Lende
 | 
						|
	movl $-EFAULT, (%rax)
 | 
						|
	jmp  .Lende
 | 
						|
 | 
						|
.Lbad_dest:
 | 
						|
	movq 8(%rsp), %rax
 | 
						|
	testq %rax, %rax
 | 
						|
	jz   .Lende
 | 
						|
	movl $-EFAULT, (%rax)
 | 
						|
	jmp .Lende
 | 
						|
	CFI_ENDPROC
 | 
						|
ENDPROC(csum_partial_copy_generic)
 |