ARMv6 and greater introduced a new instruction ("bx") which can be used
to return from function calls.  Recent CPUs perform better when the
"bx lr" instruction is used rather than the "mov pc, lr" instruction,
and this sequence is strongly recommended to be used by the ARM
architecture manual (section A.4.1.1).
We provide a new macro "ret" with all its variants for the condition
code which will resolve to the appropriate instruction.
Rather than doing this piecemeal, and miss some instances, change all
the "mov pc" instances to use the new macro, with the exception of
the "movs" instruction and the kprobes code.  This allows us to detect
the "mov pc, lr" case and fix it up - and also gives us the possibility
of deploying this for other registers depending on the CPU selection.
Reported-by: Will Deacon <will.deacon@arm.com>
Tested-by: Stephen Warren <swarren@nvidia.com> # Tegra Jetson TK1
Tested-by: Robert Jarzmik <robert.jarzmik@free.fr> # mioa701_bootresume.S
Tested-by: Andrew Lunn <andrew@lunn.ch> # Kirkwood
Tested-by: Shawn Guo <shawn.guo@freescale.com>
Tested-by: Tony Lindgren <tony@atomide.com> # OMAPs
Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com> # Armada XP, 375, 385
Acked-by: Sekhar Nori <nsekhar@ti.com> # DaVinci
Acked-by: Christoffer Dall <christoffer.dall@linaro.org> # kvm/hyp
Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com> # PXA3xx
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> # Xen
Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> # ARMv7M
Tested-by: Simon Horman <horms+renesas@verge.net.au> # Shmobile
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
		
	
			
		
			
				
	
	
		
			363 lines
		
	
	
	
		
			8 KiB
			
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			363 lines
		
	
	
	
		
			8 KiB
			
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
/*
 | 
						|
 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
 | 
						|
 *
 | 
						|
 * Author: Nicolas Pitre <nico@fluxnic.net>
 | 
						|
 *   - contributed to gcc-3.4 on Sep 30, 2003
 | 
						|
 *   - adapted for the Linux kernel on Oct 2, 2003
 | 
						|
 */
 | 
						|
 | 
						|
/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
 | 
						|
 | 
						|
This file is free software; you can redistribute it and/or modify it
 | 
						|
under the terms of the GNU General Public License as published by the
 | 
						|
Free Software Foundation; either version 2, or (at your option) any
 | 
						|
later version.
 | 
						|
 | 
						|
In addition to the permissions in the GNU General Public License, the
 | 
						|
Free Software Foundation gives you unlimited permission to link the
 | 
						|
compiled version of this file into combinations with other programs,
 | 
						|
and to distribute those combinations without any restriction coming
 | 
						|
from the use of this file.  (The General Public License restrictions
 | 
						|
do apply in other respects; for example, they cover modification of
 | 
						|
the file, and distribution when not linked into a combine
 | 
						|
executable.)
 | 
						|
 | 
						|
This file is distributed in the hope that it will be useful, but
 | 
						|
WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
						|
General Public License for more details.
 | 
						|
 | 
						|
You should have received a copy of the GNU General Public License
 | 
						|
along with this program; see the file COPYING.  If not, write to
 | 
						|
the Free Software Foundation, 59 Temple Place - Suite 330,
 | 
						|
Boston, MA 02111-1307, USA.  */
 | 
						|
 | 
						|
 | 
						|
#include <linux/linkage.h>
 | 
						|
#include <asm/assembler.h>
 | 
						|
#include <asm/unwind.h>
 | 
						|
 | 
						|
.macro ARM_DIV_BODY dividend, divisor, result, curbit
 | 
						|
 | 
						|
#if __LINUX_ARM_ARCH__ >= 5
 | 
						|
 | 
						|
	clz	\curbit, \divisor
 | 
						|
	clz	\result, \dividend
 | 
						|
	sub	\result, \curbit, \result
 | 
						|
	mov	\curbit, #1
 | 
						|
	mov	\divisor, \divisor, lsl \result
 | 
						|
	mov	\curbit, \curbit, lsl \result
 | 
						|
	mov	\result, #0
 | 
						|
	
 | 
						|
#else
 | 
						|
 | 
						|
	@ Initially shift the divisor left 3 bits if possible,
 | 
						|
	@ set curbit accordingly.  This allows for curbit to be located
 | 
						|
	@ at the left end of each 4 bit nibbles in the division loop
 | 
						|
	@ to save one loop in most cases.
 | 
						|
	tst	\divisor, #0xe0000000
 | 
						|
	moveq	\divisor, \divisor, lsl #3
 | 
						|
	moveq	\curbit, #8
 | 
						|
	movne	\curbit, #1
 | 
						|
 | 
						|
	@ Unless the divisor is very big, shift it up in multiples of
 | 
						|
	@ four bits, since this is the amount of unwinding in the main
 | 
						|
	@ division loop.  Continue shifting until the divisor is 
 | 
						|
	@ larger than the dividend.
 | 
						|
1:	cmp	\divisor, #0x10000000
 | 
						|
	cmplo	\divisor, \dividend
 | 
						|
	movlo	\divisor, \divisor, lsl #4
 | 
						|
	movlo	\curbit, \curbit, lsl #4
 | 
						|
	blo	1b
 | 
						|
 | 
						|
	@ For very big divisors, we must shift it a bit at a time, or
 | 
						|
	@ we will be in danger of overflowing.
 | 
						|
1:	cmp	\divisor, #0x80000000
 | 
						|
	cmplo	\divisor, \dividend
 | 
						|
	movlo	\divisor, \divisor, lsl #1
 | 
						|
	movlo	\curbit, \curbit, lsl #1
 | 
						|
	blo	1b
 | 
						|
 | 
						|
	mov	\result, #0
 | 
						|
 | 
						|
#endif
 | 
						|
 | 
						|
	@ Division loop
 | 
						|
1:	cmp	\dividend, \divisor
 | 
						|
	subhs	\dividend, \dividend, \divisor
 | 
						|
	orrhs	\result,   \result,   \curbit
 | 
						|
	cmp	\dividend, \divisor,  lsr #1
 | 
						|
	subhs	\dividend, \dividend, \divisor, lsr #1
 | 
						|
	orrhs	\result,   \result,   \curbit,  lsr #1
 | 
						|
	cmp	\dividend, \divisor,  lsr #2
 | 
						|
	subhs	\dividend, \dividend, \divisor, lsr #2
 | 
						|
	orrhs	\result,   \result,   \curbit,  lsr #2
 | 
						|
	cmp	\dividend, \divisor,  lsr #3
 | 
						|
	subhs	\dividend, \dividend, \divisor, lsr #3
 | 
						|
	orrhs	\result,   \result,   \curbit,  lsr #3
 | 
						|
	cmp	\dividend, #0			@ Early termination?
 | 
						|
	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
 | 
						|
	movne	\divisor,  \divisor, lsr #4
 | 
						|
	bne	1b
 | 
						|
 | 
						|
.endm
 | 
						|
 | 
						|
 | 
						|
.macro ARM_DIV2_ORDER divisor, order
 | 
						|
 | 
						|
#if __LINUX_ARM_ARCH__ >= 5
 | 
						|
 | 
						|
	clz	\order, \divisor
 | 
						|
	rsb	\order, \order, #31
 | 
						|
 | 
						|
#else
 | 
						|
 | 
						|
	cmp	\divisor, #(1 << 16)
 | 
						|
	movhs	\divisor, \divisor, lsr #16
 | 
						|
	movhs	\order, #16
 | 
						|
	movlo	\order, #0
 | 
						|
 | 
						|
	cmp	\divisor, #(1 << 8)
 | 
						|
	movhs	\divisor, \divisor, lsr #8
 | 
						|
	addhs	\order, \order, #8
 | 
						|
 | 
						|
	cmp	\divisor, #(1 << 4)
 | 
						|
	movhs	\divisor, \divisor, lsr #4
 | 
						|
	addhs	\order, \order, #4
 | 
						|
 | 
						|
	cmp	\divisor, #(1 << 2)
 | 
						|
	addhi	\order, \order, #3
 | 
						|
	addls	\order, \order, \divisor, lsr #1
 | 
						|
 | 
						|
#endif
 | 
						|
 | 
						|
.endm
 | 
						|
 | 
						|
 | 
						|
.macro ARM_MOD_BODY dividend, divisor, order, spare
 | 
						|
 | 
						|
#if __LINUX_ARM_ARCH__ >= 5
 | 
						|
 | 
						|
	clz	\order, \divisor
 | 
						|
	clz	\spare, \dividend
 | 
						|
	sub	\order, \order, \spare
 | 
						|
	mov	\divisor, \divisor, lsl \order
 | 
						|
 | 
						|
#else
 | 
						|
 | 
						|
	mov	\order, #0
 | 
						|
 | 
						|
	@ Unless the divisor is very big, shift it up in multiples of
 | 
						|
	@ four bits, since this is the amount of unwinding in the main
 | 
						|
	@ division loop.  Continue shifting until the divisor is 
 | 
						|
	@ larger than the dividend.
 | 
						|
1:	cmp	\divisor, #0x10000000
 | 
						|
	cmplo	\divisor, \dividend
 | 
						|
	movlo	\divisor, \divisor, lsl #4
 | 
						|
	addlo	\order, \order, #4
 | 
						|
	blo	1b
 | 
						|
 | 
						|
	@ For very big divisors, we must shift it a bit at a time, or
 | 
						|
	@ we will be in danger of overflowing.
 | 
						|
1:	cmp	\divisor, #0x80000000
 | 
						|
	cmplo	\divisor, \dividend
 | 
						|
	movlo	\divisor, \divisor, lsl #1
 | 
						|
	addlo	\order, \order, #1
 | 
						|
	blo	1b
 | 
						|
 | 
						|
#endif
 | 
						|
 | 
						|
	@ Perform all needed substractions to keep only the reminder.
 | 
						|
	@ Do comparisons in batch of 4 first.
 | 
						|
	subs	\order, \order, #3		@ yes, 3 is intended here
 | 
						|
	blt	2f
 | 
						|
 | 
						|
1:	cmp	\dividend, \divisor
 | 
						|
	subhs	\dividend, \dividend, \divisor
 | 
						|
	cmp	\dividend, \divisor,  lsr #1
 | 
						|
	subhs	\dividend, \dividend, \divisor, lsr #1
 | 
						|
	cmp	\dividend, \divisor,  lsr #2
 | 
						|
	subhs	\dividend, \dividend, \divisor, lsr #2
 | 
						|
	cmp	\dividend, \divisor,  lsr #3
 | 
						|
	subhs	\dividend, \dividend, \divisor, lsr #3
 | 
						|
	cmp	\dividend, #1
 | 
						|
	mov	\divisor, \divisor, lsr #4
 | 
						|
	subges	\order, \order, #4
 | 
						|
	bge	1b
 | 
						|
 | 
						|
	tst	\order, #3
 | 
						|
	teqne	\dividend, #0
 | 
						|
	beq	5f
 | 
						|
 | 
						|
	@ Either 1, 2 or 3 comparison/substractions are left.
 | 
						|
2:	cmn	\order, #2
 | 
						|
	blt	4f
 | 
						|
	beq	3f
 | 
						|
	cmp	\dividend, \divisor
 | 
						|
	subhs	\dividend, \dividend, \divisor
 | 
						|
	mov	\divisor,  \divisor,  lsr #1
 | 
						|
3:	cmp	\dividend, \divisor
 | 
						|
	subhs	\dividend, \dividend, \divisor
 | 
						|
	mov	\divisor,  \divisor,  lsr #1
 | 
						|
4:	cmp	\dividend, \divisor
 | 
						|
	subhs	\dividend, \dividend, \divisor
 | 
						|
5:
 | 
						|
.endm
 | 
						|
 | 
						|
 | 
						|
ENTRY(__udivsi3)
 | 
						|
ENTRY(__aeabi_uidiv)
 | 
						|
UNWIND(.fnstart)
 | 
						|
 | 
						|
	subs	r2, r1, #1
 | 
						|
	reteq	lr
 | 
						|
	bcc	Ldiv0
 | 
						|
	cmp	r0, r1
 | 
						|
	bls	11f
 | 
						|
	tst	r1, r2
 | 
						|
	beq	12f
 | 
						|
 | 
						|
	ARM_DIV_BODY r0, r1, r2, r3
 | 
						|
 | 
						|
	mov	r0, r2
 | 
						|
	ret	lr
 | 
						|
 | 
						|
11:	moveq	r0, #1
 | 
						|
	movne	r0, #0
 | 
						|
	ret	lr
 | 
						|
 | 
						|
12:	ARM_DIV2_ORDER r1, r2
 | 
						|
 | 
						|
	mov	r0, r0, lsr r2
 | 
						|
	ret	lr
 | 
						|
 | 
						|
UNWIND(.fnend)
 | 
						|
ENDPROC(__udivsi3)
 | 
						|
ENDPROC(__aeabi_uidiv)
 | 
						|
 | 
						|
ENTRY(__umodsi3)
 | 
						|
UNWIND(.fnstart)
 | 
						|
 | 
						|
	subs	r2, r1, #1			@ compare divisor with 1
 | 
						|
	bcc	Ldiv0
 | 
						|
	cmpne	r0, r1				@ compare dividend with divisor
 | 
						|
	moveq   r0, #0
 | 
						|
	tsthi	r1, r2				@ see if divisor is power of 2
 | 
						|
	andeq	r0, r0, r2
 | 
						|
	retls	lr
 | 
						|
 | 
						|
	ARM_MOD_BODY r0, r1, r2, r3
 | 
						|
 | 
						|
	ret	lr
 | 
						|
 | 
						|
UNWIND(.fnend)
 | 
						|
ENDPROC(__umodsi3)
 | 
						|
 | 
						|
ENTRY(__divsi3)
 | 
						|
ENTRY(__aeabi_idiv)
 | 
						|
UNWIND(.fnstart)
 | 
						|
 | 
						|
	cmp	r1, #0
 | 
						|
	eor	ip, r0, r1			@ save the sign of the result.
 | 
						|
	beq	Ldiv0
 | 
						|
	rsbmi	r1, r1, #0			@ loops below use unsigned.
 | 
						|
	subs	r2, r1, #1			@ division by 1 or -1 ?
 | 
						|
	beq	10f
 | 
						|
	movs	r3, r0
 | 
						|
	rsbmi	r3, r0, #0			@ positive dividend value
 | 
						|
	cmp	r3, r1
 | 
						|
	bls	11f
 | 
						|
	tst	r1, r2				@ divisor is power of 2 ?
 | 
						|
	beq	12f
 | 
						|
 | 
						|
	ARM_DIV_BODY r3, r1, r0, r2
 | 
						|
 | 
						|
	cmp	ip, #0
 | 
						|
	rsbmi	r0, r0, #0
 | 
						|
	ret	lr
 | 
						|
 | 
						|
10:	teq	ip, r0				@ same sign ?
 | 
						|
	rsbmi	r0, r0, #0
 | 
						|
	ret	lr
 | 
						|
 | 
						|
11:	movlo	r0, #0
 | 
						|
	moveq	r0, ip, asr #31
 | 
						|
	orreq	r0, r0, #1
 | 
						|
	ret	lr
 | 
						|
 | 
						|
12:	ARM_DIV2_ORDER r1, r2
 | 
						|
 | 
						|
	cmp	ip, #0
 | 
						|
	mov	r0, r3, lsr r2
 | 
						|
	rsbmi	r0, r0, #0
 | 
						|
	ret	lr
 | 
						|
 | 
						|
UNWIND(.fnend)
 | 
						|
ENDPROC(__divsi3)
 | 
						|
ENDPROC(__aeabi_idiv)
 | 
						|
 | 
						|
ENTRY(__modsi3)
 | 
						|
UNWIND(.fnstart)
 | 
						|
 | 
						|
	cmp	r1, #0
 | 
						|
	beq	Ldiv0
 | 
						|
	rsbmi	r1, r1, #0			@ loops below use unsigned.
 | 
						|
	movs	ip, r0				@ preserve sign of dividend
 | 
						|
	rsbmi	r0, r0, #0			@ if negative make positive
 | 
						|
	subs	r2, r1, #1			@ compare divisor with 1
 | 
						|
	cmpne	r0, r1				@ compare dividend with divisor
 | 
						|
	moveq	r0, #0
 | 
						|
	tsthi	r1, r2				@ see if divisor is power of 2
 | 
						|
	andeq	r0, r0, r2
 | 
						|
	bls	10f
 | 
						|
 | 
						|
	ARM_MOD_BODY r0, r1, r2, r3
 | 
						|
 | 
						|
10:	cmp	ip, #0
 | 
						|
	rsbmi	r0, r0, #0
 | 
						|
	ret	lr
 | 
						|
 | 
						|
UNWIND(.fnend)
 | 
						|
ENDPROC(__modsi3)
 | 
						|
 | 
						|
#ifdef CONFIG_AEABI
 | 
						|
 | 
						|
ENTRY(__aeabi_uidivmod)
 | 
						|
UNWIND(.fnstart)
 | 
						|
UNWIND(.save {r0, r1, ip, lr}	)
 | 
						|
 | 
						|
	stmfd	sp!, {r0, r1, ip, lr}
 | 
						|
	bl	__aeabi_uidiv
 | 
						|
	ldmfd	sp!, {r1, r2, ip, lr}
 | 
						|
	mul	r3, r0, r2
 | 
						|
	sub	r1, r1, r3
 | 
						|
	ret	lr
 | 
						|
 | 
						|
UNWIND(.fnend)
 | 
						|
ENDPROC(__aeabi_uidivmod)
 | 
						|
 | 
						|
ENTRY(__aeabi_idivmod)
 | 
						|
UNWIND(.fnstart)
 | 
						|
UNWIND(.save {r0, r1, ip, lr}	)
 | 
						|
	stmfd	sp!, {r0, r1, ip, lr}
 | 
						|
	bl	__aeabi_idiv
 | 
						|
	ldmfd	sp!, {r1, r2, ip, lr}
 | 
						|
	mul	r3, r0, r2
 | 
						|
	sub	r1, r1, r3
 | 
						|
	ret	lr
 | 
						|
 | 
						|
UNWIND(.fnend)
 | 
						|
ENDPROC(__aeabi_idivmod)
 | 
						|
 | 
						|
#endif
 | 
						|
 | 
						|
Ldiv0:
 | 
						|
UNWIND(.fnstart)
 | 
						|
UNWIND(.pad #4)
 | 
						|
UNWIND(.save {lr})
 | 
						|
	str	lr, [sp, #-8]!
 | 
						|
	bl	__div0
 | 
						|
	mov	r0, #0			@ About as wrong as it could be.
 | 
						|
	ldr	pc, [sp], #8
 | 
						|
UNWIND(.fnend)
 | 
						|
ENDPROC(Ldiv0)
 |