420 lines
		
	
	
	
		
			7.5 KiB
			
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
		
		
			
		
	
	
			420 lines
		
	
	
	
		
			7.5 KiB
			
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
|   | ;
 | ||
|  | ;  linux/arch/c6x/lib/csum_64plus.s
 | ||
|  | ;
 | ||
|  | ;  Port on Texas Instruments TMS320C6x architecture
 | ||
|  | ;
 | ||
|  | ;  Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated
 | ||
|  | ;  Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
 | ||
|  | ;
 | ||
|  | ;  This program is free software; you can redistribute it and/or modify
 | ||
|  | ;  it under the terms of the GNU General Public License version 2 as
 | ||
|  | ;  published by the Free Software Foundation.
 | ||
|  | ;
 | ||
|  | #include <linux/linkage.h> | ||
|  | 
 | ||
|  | ;
 | ||
|  | ;unsigned int csum_partial_copy(const char *src, char * dst,
 | ||
|  | ;				int len, int sum)
 | ||
|  | ;
 | ||
|  | ; A4:	src
 | ||
|  | ; B4:	dst
 | ||
|  | ; A6:	len
 | ||
|  | ; B6:	sum
 | ||
|  | ; return csum in A4
 | ||
|  | ;
 | ||
|  | 
 | ||
|  | 	.text | ||
|  | ENTRY(csum_partial_copy) | ||
|  | 	MVC	.S2	ILC,B30 | ||
|  | 
 | ||
|  | 	MV	.D1X	B6,A31		; given csum
 | ||
|  | 	ZERO	.D1	A9		; csum (a side)
 | ||
|  | ||	ZERO	.D2	B9		; csum (b side)
 | ||
|  | ||	SHRU	.S2X	A6,2,B5		; len / 4
 | ||
|  | 
 | ||
|  | 	;; Check alignment and size
 | ||
|  | 	AND	.S1	3,A4,A1 | ||
|  | ||	AND	.S2	3,B4,B0 | ||
|  | 	OR	.L2X	B0,A1,B0	; non aligned condition
 | ||
|  | ||	MVC	.S2	B5,ILC | ||
|  | ||	MVK	.D2	1,B2 | ||
|  | ||	MV	.D1X	B5,A1		; words condition
 | ||
|  |   [!A1]	B	.S1	L8 | ||
|  |    [B0] BNOP	.S1	L6,5 | ||
|  | 
 | ||
|  | 	SPLOOP		1 | ||
|  | 
 | ||
|  | 	;; Main loop for aligned words
 | ||
|  | 	LDW	.D1T1	*A4++,A7 | ||
|  | 	NOP	4 | ||
|  | 	MV	.S2X	A7,B7 | ||
|  | ||	EXTU	.S1	A7,0,16,A16 | ||
|  | 	STW	.D2T2	B7,*B4++ | ||
|  | ||	MPYU	.M2	B7,B2,B8 | ||
|  | ||	ADD	.L1	A16,A9,A9 | ||
|  | 	NOP | ||
|  | 	SPKERNEL	8,0 | ||
|  | ||	ADD	.L2	B8,B9,B9 | ||
|  | 
 | ||
|  | 	ZERO	.D1	A1 | ||
|  | ||	ADD	.L1X	A9,B9,A9	;  add csum from a and b sides
 | ||
|  | 
 | ||
|  | L6: | ||
|  |   [!A1]	BNOP	.S1	L8,5 | ||
|  | 
 | ||
|  | 	;; Main loop for non-aligned words
 | ||
|  | 	SPLOOP		2 | ||
|  |  ||	MVK	.L1	1,A2 | ||
|  | 
 | ||
|  | 	LDNW	.D1T1	*A4++,A7 | ||
|  | 	NOP		3 | ||
|  | 
 | ||
|  | 	NOP | ||
|  | 	MV	.S2X	A7,B7 | ||
|  |  ||	EXTU	.S1	A7,0,16,A16 | ||
|  |  ||	MPYU	.M1	A7,A2,A8 | ||
|  | 
 | ||
|  | 	ADD	.L1	A16,A9,A9 | ||
|  | 	SPKERNEL	6,0 | ||
|  |  ||	STNW	.D2T2	B7,*B4++ | ||
|  |  ||	ADD	.L1	A8,A9,A9 | ||
|  | 
 | ||
|  | L8:	AND	.S2X	2,A6,B5 | ||
|  | 	CMPGT	.L2	B5,0,B0 | ||
|  |   [!B0]	BNOP	.S1	L82,4 | ||
|  | 
 | ||
|  | 	;; Manage half-word
 | ||
|  | 	ZERO	.L1	A7 | ||
|  | ||	ZERO	.D1	A8 | ||
|  | 
 | ||
|  | #ifdef CONFIG_CPU_BIG_ENDIAN | ||
|  | 
 | ||
|  | 	LDBU	.D1T1	*A4++,A7 | ||
|  | 	LDBU	.D1T1	*A4++,A8 | ||
|  | 	NOP		3 | ||
|  | 	SHL	.S1	A7,8,A0 | ||
|  | 	ADD	.S1	A8,A9,A9 | ||
|  | 	STB	.D2T1	A7,*B4++ | ||
|  | ||	ADD	.S1	A0,A9,A9 | ||
|  | 	STB	.D2T1	A8,*B4++ | ||
|  | 
 | ||
|  | #else | ||
|  | 
 | ||
|  | 	LDBU	.D1T1	*A4++,A7 | ||
|  | 	LDBU	.D1T1	*A4++,A8 | ||
|  | 	NOP		3 | ||
|  | 	ADD	.S1	A7,A9,A9 | ||
|  | 	SHL	.S1	A8,8,A0 | ||
|  | 
 | ||
|  | 	STB	.D2T1	A7,*B4++ | ||
|  | ||	ADD	.S1	A0,A9,A9 | ||
|  | 	STB	.D2T1	A8,*B4++ | ||
|  | 
 | ||
|  | #endif | ||
|  | 
 | ||
|  | 	;; Manage eventually the last byte
 | ||
|  | L82:	AND	.S2X	1,A6,B0 | ||
|  |   [!B0]	BNOP	.S1	L9,5 | ||
|  | 
 | ||
|  | ||	ZERO	.L1	A7 | ||
|  | 
 | ||
|  | L83:	LDBU	.D1T1	*A4++,A7 | ||
|  | 	NOP		4 | ||
|  | 
 | ||
|  | 	MV	.L2X	A7,B7 | ||
|  | 
 | ||
|  | #ifdef CONFIG_CPU_BIG_ENDIAN | ||
|  | 
 | ||
|  | 	STB	.D2T2	B7,*B4++ | ||
|  | ||	SHL	.S1	A7,8,A7 | ||
|  | 	ADD	.S1	A7,A9,A9 | ||
|  | 
 | ||
|  | #else | ||
|  | 
 | ||
|  | 	STB	.D2T2	B7,*B4++ | ||
|  | ||	ADD	.S1	A7,A9,A9 | ||
|  | 
 | ||
|  | #endif | ||
|  | 
 | ||
|  | 	;; Fold the csum
 | ||
|  | L9:	SHRU	.S2X	A9,16,B0 | ||
|  |   [!B0]	BNOP	.S1	L10,5 | ||
|  | 
 | ||
|  | L91:	SHRU	.S2X	A9,16,B4 | ||
|  | ||	EXTU	.S1	A9,16,16,A3 | ||
|  | 	ADD	.D1X	A3,B4,A9 | ||
|  | 
 | ||
|  | 	SHRU	.S1	A9,16,A0 | ||
|  |    [A0]	BNOP	.S1	L91,5 | ||
|  | 
 | ||
|  | L10:	ADD	.D1	A31,A9,A9 | ||
|  | 	MV	.D1	A9,A4 | ||
|  | 
 | ||
|  | 	BNOP	.S2	B3,4 | ||
|  | 	MVC	.S2	B30,ILC | ||
|  | ENDPROC(csum_partial_copy) | ||
|  | 
 | ||
|  | ;
 | ||
|  | ;unsigned short
 | ||
|  | ;ip_fast_csum(unsigned char *iph, unsigned int ihl)
 | ||
|  | ;{
 | ||
|  | ;	unsigned int checksum = 0;
 | ||
|  | ;	unsigned short *tosum = (unsigned short *) iph;
 | ||
|  | ;	int len;
 | ||
|  | ;
 | ||
|  | ;	len = ihl*4;
 | ||
|  | ;
 | ||
|  | ;	if (len <= 0)
 | ||
|  | ;		return 0;
 | ||
|  | ;
 | ||
|  | ;	while(len) {
 | ||
|  | ;		len -= 2;
 | ||
|  | ;		checksum += *tosum++;
 | ||
|  | ;	}
 | ||
|  | ;	if (len & 1)
 | ||
|  | ;		checksum += *(unsigned char*) tosum;
 | ||
|  | ;
 | ||
|  | ;	while(checksum >> 16)
 | ||
|  | ;		checksum = (checksum & 0xffff) + (checksum >> 16);
 | ||
|  | ;
 | ||
|  | ;	return ~checksum;
 | ||
|  | ;}
 | ||
|  | ;
 | ||
|  | ; A4:	iph
 | ||
|  | ; B4:	ihl
 | ||
|  | ; return checksum in A4
 | ||
|  | ;
 | ||
|  | 	.text | ||
|  | 
 | ||
|  | ENTRY(ip_fast_csum) | ||
|  | 	ZERO	.D1	A5 | ||
|  |  ||	MVC	.S2	ILC,B30 | ||
|  | 	SHL	.S2	B4,2,B0 | ||
|  | 	CMPGT	.L2	B0,0,B1 | ||
|  |   [!B1] BNOP	.S1	L15,4 | ||
|  |   [!B1]	ZERO	.D1	A3 | ||
|  | 
 | ||
|  |   [!B0]	B	.S1	L12 | ||
|  | 	SHRU	.S2	B0,1,B0 | ||
|  | 	MVC	.S2	B0,ILC | ||
|  | 	NOP	3 | ||
|  | 
 | ||
|  | 	SPLOOP	1 | ||
|  | 	LDHU	.D1T1	*A4++,A3 | ||
|  | 	NOP	3 | ||
|  | 	NOP | ||
|  | 	SPKERNEL	5,0 | ||
|  |  ||	ADD	.L1	A3,A5,A5 | ||
|  | 
 | ||
|  | L12:	SHRU	.S1	A5,16,A0 | ||
|  |   [!A0]	BNOP	.S1	L14,5 | ||
|  | 
 | ||
|  | L13:	SHRU	.S2X	A5,16,B4 | ||
|  | 	EXTU	.S1	A5,16,16,A3 | ||
|  | 	ADD	.D1X	A3,B4,A5 | ||
|  | 	SHRU	.S1	A5,16,A0 | ||
|  |   [A0]	BNOP	.S1	L13,5 | ||
|  | 
 | ||
|  | L14:	NOT	.D1	A5,A3 | ||
|  | 	EXTU	.S1	A3,16,16,A3 | ||
|  | 
 | ||
|  | L15:	BNOP	.S2	B3,3 | ||
|  | 	MVC	.S2	B30,ILC | ||
|  | 	MV	.D1	A3,A4 | ||
|  | ENDPROC(ip_fast_csum) | ||
|  | 
 | ||
|  | ;
 | ||
|  | ;unsigned short
 | ||
|  | ;do_csum(unsigned char *buff, unsigned int len)
 | ||
|  | ;{
 | ||
|  | ;	int odd, count;
 | ||
|  | ;	unsigned int result = 0;
 | ||
|  | ;
 | ||
|  | ;	if (len <= 0)
 | ||
|  | ;		goto out;
 | ||
|  | ;	odd = 1 & (unsigned long) buff;
 | ||
|  | ;	if (odd) {
 | ||
|  | ;#ifdef __LITTLE_ENDIAN
 | ||
|  | ;		result += (*buff << 8);
 | ||
|  | ;#else
 | ||
|  | ;		result = *buff;
 | ||
|  | ;#endif
 | ||
|  | ;		len--;
 | ||
|  | ;		buff++;
 | ||
|  | ;	}
 | ||
|  | ;	count = len >> 1;		/* nr of 16-bit words.. */
 | ||
|  | ;	if (count) {
 | ||
|  | ;		if (2 & (unsigned long) buff) {
 | ||
|  | ;			result += *(unsigned short *) buff;
 | ||
|  | ;			count--;
 | ||
|  | ;			len -= 2;
 | ||
|  | ;			buff += 2;
 | ||
|  | ;		}
 | ||
|  | ;		count >>= 1;		/* nr of 32-bit words.. */
 | ||
|  | ;		if (count) {
 | ||
|  | ;			unsigned int carry = 0;
 | ||
|  | ;			do {
 | ||
|  | ;				unsigned int w = *(unsigned int *) buff;
 | ||
|  | ;				count--;
 | ||
|  | ;				buff += 4;
 | ||
|  | ;				result += carry;
 | ||
|  | ;				result += w;
 | ||
|  | ;				carry = (w > result);
 | ||
|  | ;			} while (count);
 | ||
|  | ;			result += carry;
 | ||
|  | ;			result = (result & 0xffff) + (result >> 16);
 | ||
|  | ;		}
 | ||
|  | ;		if (len & 2) {
 | ||
|  | ;			result += *(unsigned short *) buff;
 | ||
|  | ;			buff += 2;
 | ||
|  | ;		}
 | ||
|  | ;	}
 | ||
|  | ;	if (len & 1)
 | ||
|  | ;#ifdef __LITTLE_ENDIAN
 | ||
|  | ;		result += *buff;
 | ||
|  | ;#else
 | ||
|  | ;		result += (*buff << 8);
 | ||
|  | ;#endif
 | ||
|  | ;	result = (result & 0xffff) + (result >> 16);
 | ||
|  | ;	/* add up carry.. */
 | ||
|  | ;	result = (result & 0xffff) + (result >> 16);
 | ||
|  | ;	if (odd)
 | ||
|  | ;		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
 | ||
|  | ;out:
 | ||
|  | ;	return result;
 | ||
|  | ;}
 | ||
|  | ;
 | ||
|  | ; A4:	buff
 | ||
|  | ; B4:	len
 | ||
|  | ; return checksum in A4
 | ||
|  | ;
 | ||
|  | 
 | ||
|  | ENTRY(do_csum) | ||
|  | 	   CMPGT   .L2	   B4,0,B0 | ||
|  |    [!B0]   BNOP    .S1	   L26,3 | ||
|  | 	   EXTU    .S1	   A4,31,31,A0 | ||
|  | 
 | ||
|  | 	   MV	   .L1	   A0,A3 | ||
|  | ||	   MV	   .S1X    B3,A5 | ||
|  | ||	   MV	   .L2	   B4,B3 | ||
|  | ||	   ZERO    .D1	   A1 | ||
|  | 
 | ||
|  | #ifdef CONFIG_CPU_BIG_ENDIAN | ||
|  |    [A0]    SUB	   .L2	   B3,1,B3 | ||
|  | || [A0]    LDBU    .D1T1   *A4++,A1 | ||
|  | #else | ||
|  |    [!A0]   BNOP    .S1	   L21,5 | ||
|  | || [A0]    LDBU    .D1T1   *A4++,A0 | ||
|  | 	   SUB	   .L2	   B3,1,B3 | ||
|  | ||	   SHL	   .S1	   A0,8,A1 | ||
|  | L21: | ||
|  | #endif | ||
|  | 	   SHR	   .S2	   B3,1,B0 | ||
|  |    [!B0]   BNOP    .S1	   L24,3 | ||
|  | 	   MVK	   .L1	   2,A0 | ||
|  | 	   AND	   .L1	   A4,A0,A0 | ||
|  | 
 | ||
|  |    [!A0]   BNOP    .S1	   L22,5 | ||
|  | || [A0]    LDHU    .D1T1   *A4++,A0 | ||
|  | 	   SUB	   .L2	   B0,1,B0 | ||
|  | ||	   SUB	   .S2	   B3,2,B3 | ||
|  | ||	   ADD	   .L1	   A0,A1,A1 | ||
|  | L22: | ||
|  | 	   SHR	   .S2	   B0,1,B0 | ||
|  | ||	   ZERO    .L1	   A0 | ||
|  | 
 | ||
|  |    [!B0]   BNOP    .S1	   L23,5 | ||
|  | || [B0]    MVC	   .S2	   B0,ILC | ||
|  | 
 | ||
|  | 	   SPLOOP  3 | ||
|  | 	   SPMASK  L1 | ||
|  | ||	   MV	   .L1	   A1,A2 | ||
|  | ||	   LDW	   .D1T1   *A4++,A1 | ||
|  | 
 | ||
|  | 	   NOP	   4 | ||
|  | 	   ADD	   .L1	   A0,A1,A0 | ||
|  | 	   ADD	   .L1	   A2,A0,A2 | ||
|  | 
 | ||
|  | 	   SPKERNEL 1,2 | ||
|  | ||	   CMPGTU  .L1	   A1,A2,A0 | ||
|  | 
 | ||
|  | 	   ADD	   .L1	   A0,A2,A6 | ||
|  | 	   EXTU    .S1	   A6,16,16,A7 | ||
|  | 	   SHRU    .S2X    A6,16,B0 | ||
|  | 	   NOP		   1 | ||
|  | 	   ADD	   .L1X    A7,B0,A1 | ||
|  | L23: | ||
|  | 	   MVK	   .L2	   2,B0 | ||
|  | 	   AND	   .L2	   B3,B0,B0 | ||
|  |    [B0]    LDHU    .D1T1   *A4++,A0 | ||
|  | 	   NOP	   4 | ||
|  |    [B0]    ADD	   .L1	   A0,A1,A1 | ||
|  | L24: | ||
|  | 	   EXTU    .S2	   B3,31,31,B0 | ||
|  | #ifdef CONFIG_CPU_BIG_ENDIAN | ||
|  |    [!B0]   BNOP    .S1	   L25,4 | ||
|  | || [B0]    LDBU    .D1T1   *A4,A0 | ||
|  | 	   SHL	   .S1	   A0,8,A0 | ||
|  | 	   ADD	   .L1	   A0,A1,A1 | ||
|  | L25: | ||
|  | #else | ||
|  |    [B0]    LDBU    .D1T1   *A4,A0 | ||
|  | 	   NOP	   4 | ||
|  |    [B0]    ADD	   .L1	   A0,A1,A1 | ||
|  | #endif | ||
|  | 	   EXTU    .S1	   A1,16,16,A0 | ||
|  | 	   SHRU    .S2X    A1,16,B0 | ||
|  | 	   NOP	   1 | ||
|  | 	   ADD	   .L1X    A0,B0,A0 | ||
|  | 	   SHRU    .S1	   A0,16,A1 | ||
|  | 	   ADD	   .L1	   A0,A1,A0 | ||
|  | 	   EXTU    .S1	   A0,16,16,A1 | ||
|  | 	   EXTU    .S1	   A1,16,24,A2 | ||
|  | 
 | ||
|  | 	   EXTU    .S1	   A1,24,16,A0 | ||
|  | ||	   MV	   .L2X    A3,B0 | ||
|  | 
 | ||
|  |    [B0]    OR	   .L1	   A0,A2,A1 | ||
|  | L26: | ||
|  | 	   NOP	   1 | ||
|  | 	   BNOP    .S2X    A5,4 | ||
|  | 	   MV	   .L1	   A1,A4 | ||
|  | ENDPROC(do_csum) | ||
|  | 
 | ||
|  | ;__wsum csum_partial(const void *buff, int len, __wsum wsum)
 | ||
|  | ;{
 | ||
|  | ;	unsigned int sum = (__force unsigned int)wsum;
 | ||
|  | ;	unsigned int result = do_csum(buff, len);
 | ||
|  | ;
 | ||
|  | ;	/* add in old sum, and carry.. */
 | ||
|  | ;	result += sum;
 | ||
|  | ;	if (sum > result)
 | ||
|  | ;		result += 1;
 | ||
|  | ;	return (__force __wsum)result;
 | ||
|  | ;}
 | ||
|  | ;
 | ||
|  | ENTRY(csum_partial) | ||
|  | 	   MV	   .L1X    B3,A9 | ||
|  | ||	   CALLP   .S2	   do_csum,B3 | ||
|  | ||	   MV	   .S1	   A6,A8 | ||
|  | 	   BNOP    .S2X    A9,2 | ||
|  | 	   ADD	   .L1	   A8,A4,A1 | ||
|  | 	   CMPGTU  .L1	   A8,A1,A0 | ||
|  | 	   ADD	   .L1	   A1,A0,A4 | ||
|  | ENDPROC(csum_partial) | ||
|  | 
 | ||
|  | ;unsigned short
 | ||
|  | ;ip_compute_csum(unsigned char *buff, unsigned int len)
 | ||
|  | ;
 | ||
|  | ; A4:	buff
 | ||
|  | ; B4:	len
 | ||
|  | ; return checksum in A4
 | ||
|  | 
 | ||
|  | ENTRY(ip_compute_csum) | ||
|  | 	   MV	   .L1X    B3,A9 | ||
|  | ||	   CALLP   .S2	   do_csum,B3 | ||
|  | 	   BNOP    .S2X    A9,3 | ||
|  | 	   NOT	   .S1	   A4,A4 | ||
|  | 	   CLR     .S1	   A4,16,31,A4 | ||
|  | ENDPROC(ip_compute_csum) |