97 lines
		
	
	
	
		
			2.6 KiB
			
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
		
		
			
		
	
	
			97 lines
		
	
	
	
		
			2.6 KiB
			
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
|   | /* | ||
|  |  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) | ||
|  |  * | ||
|  |  * This program is free software; you can redistribute it and/or modify
 | ||
|  |  * it under the terms of the GNU General Public License version 2 as | ||
|  |  * published by the Free Software Foundation. | ||
|  |  */ | ||
|  | 
 | ||
|  | /* This is optimized primarily for the ARC700. | ||
|  |    It would be possible to speed up the loops by one cycle / word | ||
|  |    respective one cycle / byte by forcing double source 1 alignment, unrolling | ||
|  |    by a factor of two, and speculatively loading the second word / byte of | ||
|  |    source 1; however, that would increase the overhead for loop setup / finish,
 | ||
|  |    and strcmp might often terminate early.  */ | ||
|  | 
 | ||
|  | #include <asm/linkage.h> | ||
|  | 
 | ||
|  | ARC_ENTRY strcmp | ||
|  | 	or	r2,r0,r1 | ||
|  | 	bmsk_s	r2,r2,1 | ||
|  | 	brne	r2,0,.Lcharloop | ||
|  | 	mov_s	r12,0x01010101 | ||
|  | 	ror	r5,r12 | ||
|  | .Lwordloop: | ||
|  | 	ld.ab	r2,[r0,4] | ||
|  | 	ld.ab	r3,[r1,4] | ||
|  | 	nop_s | ||
|  | 	sub	r4,r2,r12 | ||
|  | 	bic	r4,r4,r2 | ||
|  | 	and	r4,r4,r5 | ||
|  | 	brne	r4,0,.Lfound0 | ||
|  | 	breq	r2,r3,.Lwordloop | ||
|  | #ifdef	__LITTLE_ENDIAN__ | ||
|  | 	xor	r0,r2,r3	; mask for difference
 | ||
|  | 	sub_s	r1,r0,1 | ||
|  | 	bic_s	r0,r0,r1	; mask for least significant difference bit
 | ||
|  | 	sub	r1,r5,r0 | ||
|  | 	xor	r0,r5,r1	; mask for least significant difference byte
 | ||
|  | 	and_s	r2,r2,r0 | ||
|  | 	and_s	r3,r3,r0 | ||
|  | #endif /* LITTLE ENDIAN */ | ||
|  | 	cmp_s	r2,r3 | ||
|  | 	mov_s	r0,1 | ||
|  | 	j_s.d	[blink] | ||
|  | 	bset.lo	r0,r0,31 | ||
|  | 
 | ||
|  | 	.balign	4
 | ||
|  | #ifdef __LITTLE_ENDIAN__ | ||
|  | .Lfound0: | ||
|  | 	xor	r0,r2,r3	; mask for difference
 | ||
|  | 	or	r0,r0,r4	; or in zero indicator
 | ||
|  | 	sub_s	r1,r0,1 | ||
|  | 	bic_s	r0,r0,r1	; mask for least significant difference bit
 | ||
|  | 	sub	r1,r5,r0 | ||
|  | 	xor	r0,r5,r1	; mask for least significant difference byte
 | ||
|  | 	and_s	r2,r2,r0 | ||
|  | 	and_s	r3,r3,r0 | ||
|  | 	sub.f	r0,r2,r3 | ||
|  | 	mov.hi	r0,1 | ||
|  | 	j_s.d	[blink] | ||
|  | 	bset.lo	r0,r0,31 | ||
|  | #else /* BIG ENDIAN */ | ||
|  | 	/* The zero-detection above can mis-detect 0x01 bytes as zeroes | ||
|  | 	   because of carry-propagateion from a lower significant zero byte. | ||
|  | 	   We can compensate for this by checking that bit0 is zero. | ||
|  | 	   This compensation is not necessary in the step where we | ||
|  | 	   get a low estimate for r2, because in any affected bytes | ||
|  | 	   we already have 0x00 or 0x01, which will remain unchanged | ||
|  | 	   when bit 7 is cleared.  */ | ||
|  | 	.balign	4
 | ||
|  | .Lfound0: | ||
|  | 	lsr	r0,r4,8 | ||
|  | 	lsr_s	r1,r2 | ||
|  | 	bic_s	r2,r2,r0	; get low estimate for r2 and get ...
 | ||
|  | 	bic_s	r0,r0,r1	; <this is the adjusted mask for zeros>
 | ||
|  | 	or_s	r3,r3,r0	; ... high estimate r3 so that r2 > r3 will ...
 | ||
|  | 	cmp_s	r3,r2		; ... be independent of trailing garbage
 | ||
|  | 	or_s	r2,r2,r0	; likewise for r3 > r2
 | ||
|  | 	bic_s	r3,r3,r0 | ||
|  | 	rlc	r0,0		; r0 := r2 > r3 ? 1 : 0
 | ||
|  | 	cmp_s	r2,r3 | ||
|  | 	j_s.d	[blink] | ||
|  | 	bset.lo	r0,r0,31 | ||
|  | #endif /* ENDIAN */ | ||
|  | 
 | ||
|  | 	.balign	4
 | ||
|  | .Lcharloop: | ||
|  | 	ldb.ab	r2,[r0,1] | ||
|  | 	ldb.ab	r3,[r1,1] | ||
|  | 	nop_s | ||
|  | 	breq	r2,0,.Lcmpend | ||
|  | 	breq	r2,r3,.Lcharloop | ||
|  | .Lcmpend: | ||
|  | 	j_s.d	[blink] | ||
|  | 	sub	r0,r2,r3 | ||
|  | ARC_EXIT strcmp |