 93ed397011
			
		
	
	
	93ed397011
	
	
	
		
			
			This declaration specifies the "function" type and size for various assembly functions, mainly needed for generating the correct branch instructions in Thumb-2. Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
		
			
				
	
	
		
			332 lines
		
	
	
	
		
			6.7 KiB
			
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			332 lines
		
	
	
	
		
			6.7 KiB
			
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| /*
 | |
|  *  linux/arch/arm/lib/csumpartialcopygeneric.S
 | |
|  *
 | |
|  *  Copyright (C) 1995-2001 Russell King
 | |
|  *
 | |
|  * This program is free software; you can redistribute it and/or modify
 | |
|  * it under the terms of the GNU General Public License version 2 as
 | |
|  * published by the Free Software Foundation.
 | |
|  */
 | |
| 
 | |
| /*
 | |
|  * unsigned int
 | |
|  * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
 | |
|  *  r0 = src, r1 = dst, r2 = len, r3 = sum
 | |
|  *  Returns : r0 = checksum
 | |
|  *
 | |
|  * Note that 'tst' and 'teq' preserve the carry flag.
 | |
|  */
 | |
| 
 | |
| src	.req	r0
 | |
| dst	.req	r1
 | |
| len	.req	r2
 | |
| sum	.req	r3
 | |
| 
 | |
| .Lzero:		mov	r0, sum
 | |
| 		load_regs
 | |
| 
 | |
| 		/*
 | |
| 		 * Align an unaligned destination pointer.  We know that
 | |
| 		 * we have >= 8 bytes here, so we don't need to check
 | |
| 		 * the length.  Note that the source pointer hasn't been
 | |
| 		 * aligned yet.
 | |
| 		 */
 | |
| .Ldst_unaligned:
 | |
| 		tst	dst, #1
 | |
| 		beq	.Ldst_16bit
 | |
| 
 | |
| 		load1b	ip
 | |
| 		sub	len, len, #1
 | |
| 		adcs	sum, sum, ip, put_byte_1	@ update checksum
 | |
| 		strb	ip, [dst], #1
 | |
| 		tst	dst, #2
 | |
| 		moveq	pc, lr			@ dst is now 32bit aligned
 | |
| 
 | |
| .Ldst_16bit:	load2b	r8, ip
 | |
| 		sub	len, len, #2
 | |
| 		adcs	sum, sum, r8, put_byte_0
 | |
| 		strb	r8, [dst], #1
 | |
| 		adcs	sum, sum, ip, put_byte_1
 | |
| 		strb	ip, [dst], #1
 | |
| 		mov	pc, lr			@ dst is now 32bit aligned
 | |
| 
 | |
| 		/*
 | |
| 		 * Handle 0 to 7 bytes, with any alignment of source and
 | |
| 		 * destination pointers.  Note that when we get here, C = 0
 | |
| 		 */
 | |
| .Lless8:	teq	len, #0			@ check for zero count
 | |
| 		beq	.Lzero
 | |
| 
 | |
| 		/* we must have at least one byte. */
 | |
| 		tst	dst, #1			@ dst 16-bit aligned
 | |
| 		beq	.Lless8_aligned
 | |
| 
 | |
| 		/* Align dst */
 | |
| 		load1b	ip
 | |
| 		sub	len, len, #1
 | |
| 		adcs	sum, sum, ip, put_byte_1	@ update checksum
 | |
| 		strb	ip, [dst], #1
 | |
| 		tst	len, #6
 | |
| 		beq	.Lless8_byteonly
 | |
| 
 | |
| 1:		load2b	r8, ip
 | |
| 		sub	len, len, #2
 | |
| 		adcs	sum, sum, r8, put_byte_0
 | |
| 		strb	r8, [dst], #1
 | |
| 		adcs	sum, sum, ip, put_byte_1
 | |
| 		strb	ip, [dst], #1
 | |
| .Lless8_aligned:
 | |
| 		tst	len, #6
 | |
| 		bne	1b
 | |
| .Lless8_byteonly:
 | |
| 		tst	len, #1
 | |
| 		beq	.Ldone
 | |
| 		load1b	r8
 | |
| 		adcs	sum, sum, r8, put_byte_0	@ update checksum
 | |
| 		strb	r8, [dst], #1
 | |
| 		b	.Ldone
 | |
| 
 | |
| FN_ENTRY
 | |
| 		save_regs
 | |
| 
 | |
| 		cmp	len, #8			@ Ensure that we have at least
 | |
| 		blo	.Lless8			@ 8 bytes to copy.
 | |
| 
 | |
| 		adds	sum, sum, #0		@ C = 0
 | |
| 		tst	dst, #3			@ Test destination alignment
 | |
| 		blne	.Ldst_unaligned		@ align destination, return here
 | |
| 
 | |
| 		/*
 | |
| 		 * Ok, the dst pointer is now 32bit aligned, and we know
 | |
| 		 * that we must have more than 4 bytes to copy.  Note
 | |
| 		 * that C contains the carry from the dst alignment above.
 | |
| 		 */
 | |
| 
 | |
| 		tst	src, #3			@ Test source alignment
 | |
| 		bne	.Lsrc_not_aligned
 | |
| 
 | |
| 		/* Routine for src & dst aligned */
 | |
| 
 | |
| 		bics	ip, len, #15
 | |
| 		beq	2f
 | |
| 
 | |
| 1:		load4l	r4, r5, r6, r7
 | |
| 		stmia	dst!, {r4, r5, r6, r7}
 | |
| 		adcs	sum, sum, r4
 | |
| 		adcs	sum, sum, r5
 | |
| 		adcs	sum, sum, r6
 | |
| 		adcs	sum, sum, r7
 | |
| 		sub	ip, ip, #16
 | |
| 		teq	ip, #0
 | |
| 		bne	1b
 | |
| 
 | |
| 2:		ands	ip, len, #12
 | |
| 		beq	4f
 | |
| 		tst	ip, #8
 | |
| 		beq	3f
 | |
| 		load2l	r4, r5
 | |
| 		stmia	dst!, {r4, r5}
 | |
| 		adcs	sum, sum, r4
 | |
| 		adcs	sum, sum, r5
 | |
| 		tst	ip, #4
 | |
| 		beq	4f
 | |
| 
 | |
| 3:		load1l	r4
 | |
| 		str	r4, [dst], #4
 | |
| 		adcs	sum, sum, r4
 | |
| 
 | |
| 4:		ands	len, len, #3
 | |
| 		beq	.Ldone
 | |
| 		load1l	r4
 | |
| 		tst	len, #2
 | |
| 		mov	r5, r4, get_byte_0
 | |
| 		beq	.Lexit
 | |
| 		adcs	sum, sum, r4, push #16
 | |
| 		strb	r5, [dst], #1
 | |
| 		mov	r5, r4, get_byte_1
 | |
| 		strb	r5, [dst], #1
 | |
| 		mov	r5, r4, get_byte_2
 | |
| .Lexit:		tst	len, #1
 | |
| 		strneb	r5, [dst], #1
 | |
| 		andne	r5, r5, #255
 | |
| 		adcnes	sum, sum, r5, put_byte_0
 | |
| 
 | |
| 		/*
 | |
| 		 * If the dst pointer was not 16-bit aligned, we
 | |
| 		 * need to rotate the checksum here to get around
 | |
| 		 * the inefficient byte manipulations in the
 | |
| 		 * architecture independent code.
 | |
| 		 */
 | |
| .Ldone:		adc	r0, sum, #0
 | |
| 		ldr	sum, [sp, #0]		@ dst
 | |
| 		tst	sum, #1
 | |
| 		movne	r0, r0, ror #8
 | |
| 		load_regs
 | |
| 
 | |
| .Lsrc_not_aligned:
 | |
| 		adc	sum, sum, #0		@ include C from dst alignment
 | |
| 		and	ip, src, #3
 | |
| 		bic	src, src, #3
 | |
| 		load1l	r5
 | |
| 		cmp	ip, #2
 | |
| 		beq	.Lsrc2_aligned
 | |
| 		bhi	.Lsrc3_aligned
 | |
| 		mov	r4, r5, pull #8		@ C = 0
 | |
| 		bics	ip, len, #15
 | |
| 		beq	2f
 | |
| 1:		load4l	r5, r6, r7, r8
 | |
| 		orr	r4, r4, r5, push #24
 | |
| 		mov	r5, r5, pull #8
 | |
| 		orr	r5, r5, r6, push #24
 | |
| 		mov	r6, r6, pull #8
 | |
| 		orr	r6, r6, r7, push #24
 | |
| 		mov	r7, r7, pull #8
 | |
| 		orr	r7, r7, r8, push #24
 | |
| 		stmia	dst!, {r4, r5, r6, r7}
 | |
| 		adcs	sum, sum, r4
 | |
| 		adcs	sum, sum, r5
 | |
| 		adcs	sum, sum, r6
 | |
| 		adcs	sum, sum, r7
 | |
| 		mov	r4, r8, pull #8
 | |
| 		sub	ip, ip, #16
 | |
| 		teq	ip, #0
 | |
| 		bne	1b
 | |
| 2:		ands	ip, len, #12
 | |
| 		beq	4f
 | |
| 		tst	ip, #8
 | |
| 		beq	3f
 | |
| 		load2l	r5, r6
 | |
| 		orr	r4, r4, r5, push #24
 | |
| 		mov	r5, r5, pull #8
 | |
| 		orr	r5, r5, r6, push #24
 | |
| 		stmia	dst!, {r4, r5}
 | |
| 		adcs	sum, sum, r4
 | |
| 		adcs	sum, sum, r5
 | |
| 		mov	r4, r6, pull #8
 | |
| 		tst	ip, #4
 | |
| 		beq	4f
 | |
| 3:		load1l	r5
 | |
| 		orr	r4, r4, r5, push #24
 | |
| 		str	r4, [dst], #4
 | |
| 		adcs	sum, sum, r4
 | |
| 		mov	r4, r5, pull #8
 | |
| 4:		ands	len, len, #3
 | |
| 		beq	.Ldone
 | |
| 		mov	r5, r4, get_byte_0
 | |
| 		tst	len, #2
 | |
| 		beq	.Lexit
 | |
| 		adcs	sum, sum, r4, push #16
 | |
| 		strb	r5, [dst], #1
 | |
| 		mov	r5, r4, get_byte_1
 | |
| 		strb	r5, [dst], #1
 | |
| 		mov	r5, r4, get_byte_2
 | |
| 		b	.Lexit
 | |
| 
 | |
| .Lsrc2_aligned:	mov	r4, r5, pull #16
 | |
| 		adds	sum, sum, #0
 | |
| 		bics	ip, len, #15
 | |
| 		beq	2f
 | |
| 1:		load4l	r5, r6, r7, r8
 | |
| 		orr	r4, r4, r5, push #16
 | |
| 		mov	r5, r5, pull #16
 | |
| 		orr	r5, r5, r6, push #16
 | |
| 		mov	r6, r6, pull #16
 | |
| 		orr	r6, r6, r7, push #16
 | |
| 		mov	r7, r7, pull #16
 | |
| 		orr	r7, r7, r8, push #16
 | |
| 		stmia	dst!, {r4, r5, r6, r7}
 | |
| 		adcs	sum, sum, r4
 | |
| 		adcs	sum, sum, r5
 | |
| 		adcs	sum, sum, r6
 | |
| 		adcs	sum, sum, r7
 | |
| 		mov	r4, r8, pull #16
 | |
| 		sub	ip, ip, #16
 | |
| 		teq	ip, #0
 | |
| 		bne	1b
 | |
| 2:		ands	ip, len, #12
 | |
| 		beq	4f
 | |
| 		tst	ip, #8
 | |
| 		beq	3f
 | |
| 		load2l	r5, r6
 | |
| 		orr	r4, r4, r5, push #16
 | |
| 		mov	r5, r5, pull #16
 | |
| 		orr	r5, r5, r6, push #16
 | |
| 		stmia	dst!, {r4, r5}
 | |
| 		adcs	sum, sum, r4
 | |
| 		adcs	sum, sum, r5
 | |
| 		mov	r4, r6, pull #16
 | |
| 		tst	ip, #4
 | |
| 		beq	4f
 | |
| 3:		load1l	r5
 | |
| 		orr	r4, r4, r5, push #16
 | |
| 		str	r4, [dst], #4
 | |
| 		adcs	sum, sum, r4
 | |
| 		mov	r4, r5, pull #16
 | |
| 4:		ands	len, len, #3
 | |
| 		beq	.Ldone
 | |
| 		mov	r5, r4, get_byte_0
 | |
| 		tst	len, #2
 | |
| 		beq	.Lexit
 | |
| 		adcs	sum, sum, r4
 | |
| 		strb	r5, [dst], #1
 | |
| 		mov	r5, r4, get_byte_1
 | |
| 		strb	r5, [dst], #1
 | |
| 		tst	len, #1
 | |
| 		beq	.Ldone
 | |
| 		load1b	r5
 | |
| 		b	.Lexit
 | |
| 
 | |
| .Lsrc3_aligned:	mov	r4, r5, pull #24
 | |
| 		adds	sum, sum, #0
 | |
| 		bics	ip, len, #15
 | |
| 		beq	2f
 | |
| 1:		load4l	r5, r6, r7, r8
 | |
| 		orr	r4, r4, r5, push #8
 | |
| 		mov	r5, r5, pull #24
 | |
| 		orr	r5, r5, r6, push #8
 | |
| 		mov	r6, r6, pull #24
 | |
| 		orr	r6, r6, r7, push #8
 | |
| 		mov	r7, r7, pull #24
 | |
| 		orr	r7, r7, r8, push #8
 | |
| 		stmia	dst!, {r4, r5, r6, r7}
 | |
| 		adcs	sum, sum, r4
 | |
| 		adcs	sum, sum, r5
 | |
| 		adcs	sum, sum, r6
 | |
| 		adcs	sum, sum, r7
 | |
| 		mov	r4, r8, pull #24
 | |
| 		sub	ip, ip, #16
 | |
| 		teq	ip, #0
 | |
| 		bne	1b
 | |
| 2:		ands	ip, len, #12
 | |
| 		beq	4f
 | |
| 		tst	ip, #8
 | |
| 		beq	3f
 | |
| 		load2l	r5, r6
 | |
| 		orr	r4, r4, r5, push #8
 | |
| 		mov	r5, r5, pull #24
 | |
| 		orr	r5, r5, r6, push #8
 | |
| 		stmia	dst!, {r4, r5}
 | |
| 		adcs	sum, sum, r4
 | |
| 		adcs	sum, sum, r5
 | |
| 		mov	r4, r6, pull #24
 | |
| 		tst	ip, #4
 | |
| 		beq	4f
 | |
| 3:		load1l	r5
 | |
| 		orr	r4, r4, r5, push #8
 | |
| 		str	r4, [dst], #4
 | |
| 		adcs	sum, sum, r4
 | |
| 		mov	r4, r5, pull #24
 | |
| 4:		ands	len, len, #3
 | |
| 		beq	.Ldone
 | |
| 		mov	r5, r4, get_byte_0
 | |
| 		tst	len, #2
 | |
| 		beq	.Lexit
 | |
| 		strb	r5, [dst], #1
 | |
| 		adcs	sum, sum, r4
 | |
| 		load1l	r4
 | |
| 		mov	r5, r4, get_byte_0
 | |
| 		strb	r5, [dst], #1
 | |
| 		adcs	sum, sum, r4, push #24
 | |
| 		mov	r5, r4, get_byte_1
 | |
| 		b	.Lexit
 | |
| FN_EXIT
 |