| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | /* | 
					
						
							|  |  |  |  *  linux/arch/arm/lib/delay.S | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *  Copyright (C) 1995, 1996 Russell King | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This program is free software; you can redistribute it and/or modify
 | 
					
						
							|  |  |  |  * it under the terms of the GNU General Public License version 2 as | 
					
						
							|  |  |  |  * published by the Free Software Foundation. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | #include <linux/linkage.h> | 
					
						
							|  |  |  | #include <asm/assembler.h> | 
					
						
							| 
									
										
										
										
											2012-07-06 15:47:17 +01:00
										 |  |  | #include <asm/delay.h> | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 		.text | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-11-11 21:51:49 +00:00
										 |  |  | .LC0:		.word	loops_per_jiffy
 | 
					
						
							| 
									
										
										
										
											2012-07-06 15:47:17 +01:00
										 |  |  | .LC1:		.word	UDELAY_MULT
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | /* | 
					
						
							| 
									
										
										
										
											2006-03-20 17:10:09 +00:00
										 |  |  |  * r0  <= 2000 | 
					
						
							|  |  |  |  * lpj <= 0x01ffffff (max. 3355 bogomips) | 
					
						
							|  |  |  |  * HZ  <= 1000 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  |  */ | 
					
						
							| 
									
										
										
										
											2006-03-20 17:10:09 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2012-07-06 15:47:17 +01:00
										 |  |  | ENTRY(__loop_udelay) | 
					
						
							| 
									
										
										
										
											2006-03-20 17:10:09 +00:00
										 |  |  | 		ldr	r2, .LC1 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 		mul	r0, r2, r0 | 
					
						
							| 
									
										
										
										
											2012-07-06 15:47:17 +01:00
										 |  |  | ENTRY(__loop_const_udelay)			@ 0 <= r0 <= 0x7fffff06
 | 
					
						
							| 
									
										
										
										
											2011-01-10 23:55:59 +00:00
										 |  |  | 		mov	r1, #-1 | 
					
						
							| 
									
										
										
										
											2005-11-11 21:51:49 +00:00
										 |  |  | 		ldr	r2, .LC0 | 
					
						
							| 
									
										
										
										
											2006-03-20 17:10:09 +00:00
										 |  |  | 		ldr	r2, [r2]		@ max = 0x01ffffff
 | 
					
						
							| 
									
										
										
										
											2011-01-10 23:55:59 +00:00
										 |  |  | 		add	r0, r0, r1, lsr #32-14 | 
					
						
							| 
									
										
										
										
											2006-03-20 17:10:09 +00:00
										 |  |  | 		mov	r0, r0, lsr #14		@ max = 0x0001ffff
 | 
					
						
							| 
									
										
										
										
											2011-01-10 23:55:59 +00:00
										 |  |  | 		add	r2, r2, r1, lsr #32-10 | 
					
						
							| 
									
										
										
										
											2006-03-20 17:10:09 +00:00
										 |  |  | 		mov	r2, r2, lsr #10		@ max = 0x00007fff
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 		mul	r0, r2, r0		@ max = 2^32-1
 | 
					
						
							| 
									
										
										
										
											2011-01-10 23:55:59 +00:00
										 |  |  | 		add	r0, r0, r1, lsr #32-6 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 		movs	r0, r0, lsr #6 | 
					
						
							| 
									
										
										
										
											2014-06-30 16:29:12 +01:00
										 |  |  | 		reteq	lr | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | /* | 
					
						
							| 
									
										
										
										
											2006-03-20 17:10:09 +00:00
										 |  |  |  * loops = r0 * HZ * loops_per_jiffy / 1000000 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  |  */ | 
					
						
							| 
									
										
											  
											
												ARM: 7907/1: lib: delay-loop: Add align directive to fix BogoMIPS calculation
Currently mx53 (CortexA8) running at 1GHz reports:
Calibrating delay loop... 663.55 BogoMIPS (lpj=3317760)
Tom Evans verified that alignments of 0x0 and 0x8 run the two instructions of __loop_delay in one clock cycle (1 clock/loop), while alignments of 0x4 and 0xc take 3 clocks to run the loop twice. (1.5 clock/loop)
The original object code looks like this:
00000010 <__loop_const_udelay>:
  10:	e3e01000 	mvn	r1, #0
  14:	e51f201c 	ldr	r2, [pc, #-28]	; 0 <__loop_udelay-0x8>
  18:	e5922000 	ldr	r2, [r2]
  1c:	e0800921 	add	r0, r0, r1, lsr #18
  20:	e1a00720 	lsr	r0, r0, #14
  24:	e0822b21 	add	r2, r2, r1, lsr #22
  28:	e1a02522 	lsr	r2, r2, #10
  2c:	e0000092 	mul	r0, r2, r0
  30:	e0800d21 	add	r0, r0, r1, lsr #26
  34:	e1b00320 	lsrs	r0, r0, #6
  38:	01a0f00e 	moveq	pc, lr
0000003c <__loop_delay>:
  3c:	e2500001 	subs	r0, r0, #1
  40:	8afffffe 	bhi	3c <__loop_delay>
  44:	e1a0f00e 	mov	pc, lr
After adding the 'align 3' directive to __loop_delay (align to 8 bytes):
00000010 <__loop_const_udelay>:
  10:	e3e01000 	mvn	r1, #0
  14:	e51f201c 	ldr	r2, [pc, #-28]	; 0 <__loop_udelay-0x8>
  18:	e5922000 	ldr	r2, [r2]
  1c:	e0800921 	add	r0, r0, r1, lsr #18
  20:	e1a00720 	lsr	r0, r0, #14
  24:	e0822b21 	add	r2, r2, r1, lsr #22
  28:	e1a02522 	lsr	r2, r2, #10
  2c:	e0000092 	mul	r0, r2, r0
  30:	e0800d21 	add	r0, r0, r1, lsr #26
  34:	e1b00320 	lsrs	r0, r0, #6
  38:	01a0f00e 	moveq	pc, lr
  3c:	e320f000 	nop	{0}
00000040 <__loop_delay>:
  40:	e2500001 	subs	r0, r0, #1
  44:	8afffffe 	bhi	40 <__loop_delay>
  48:	e1a0f00e 	mov	pc, lr
  4c:	e320f000 	nop	{0}
, which now reports:
Calibrating delay loop... 996.14 BogoMIPS (lpj=4980736)
Some more test results:
On mx31 (ARM1136) running at 532 MHz, before the patch:
Calibrating delay loop... 351.43 BogoMIPS (lpj=1757184)
On mx31 (ARM1136) running at 532 MHz after the patch:
Calibrating delay loop... 528.79 BogoMIPS (lpj=2643968)
Also tested on mx6 (CortexA9) and on mx27 (ARM926), which shows the same
BogoMIPS value before and after this patch.
Reported-by: Tom Evans <tom_usenet@optusnet.com.au>
Suggested-by: Tom Evans <tom_usenet@optusnet.com.au>
Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
											
										 
											2013-11-30 15:24:42 +01:00
										 |  |  | 		.align 3
 | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | @ Delay routine
 | 
					
						
							| 
									
										
										
										
											2012-07-06 15:47:17 +01:00
										 |  |  | ENTRY(__loop_delay) | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 		subs	r0, r0, #1 | 
					
						
							|  |  |  | #if 0 | 
					
						
							| 
									
										
										
										
											2014-06-30 16:29:12 +01:00
										 |  |  | 		retls	lr | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 		subs	r0, r0, #1 | 
					
						
							| 
									
										
										
										
											2014-06-30 16:29:12 +01:00
										 |  |  | 		retls	lr | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 		subs	r0, r0, #1 | 
					
						
							| 
									
										
										
										
											2014-06-30 16:29:12 +01:00
										 |  |  | 		retls	lr | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 		subs	r0, r0, #1 | 
					
						
							| 
									
										
										
										
											2014-06-30 16:29:12 +01:00
										 |  |  | 		retls	lr | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 		subs	r0, r0, #1 | 
					
						
							| 
									
										
										
										
											2014-06-30 16:29:12 +01:00
										 |  |  | 		retls	lr | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 		subs	r0, r0, #1 | 
					
						
							| 
									
										
										
										
											2014-06-30 16:29:12 +01:00
										 |  |  | 		retls	lr | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 		subs	r0, r0, #1 | 
					
						
							| 
									
										
										
										
											2014-06-30 16:29:12 +01:00
										 |  |  | 		retls	lr | 
					
						
							| 
									
										
										
										
											2005-04-16 15:20:36 -07:00
										 |  |  | 		subs	r0, r0, #1 | 
					
						
							|  |  |  | #endif | 
					
						
							| 
									
										
										
										
											2012-07-06 15:47:17 +01:00
										 |  |  | 		bhi	__loop_delay | 
					
						
							| 
									
										
										
										
											2014-06-30 16:29:12 +01:00
										 |  |  | 		ret	lr | 
					
						
							| 
									
										
										
										
											2012-07-06 15:47:17 +01:00
										 |  |  | ENDPROC(__loop_udelay) | 
					
						
							|  |  |  | ENDPROC(__loop_const_udelay) | 
					
						
							|  |  |  | ENDPROC(__loop_delay) |