471 lines
		
	
	
	
		
			11 KiB
			
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
		
		
			
		
	
	
			471 lines
		
	
	
	
		
			11 KiB
			
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
|   | 	.file	"wm_sqrt.S" | ||
|  | /*---------------------------------------------------------------------------+ | ||
|  |  |  wm_sqrt.S                                                                | | ||
|  |  |                                                                           | | ||
|  |  | Fixed point arithmetic square root evaluation.                            | | ||
|  |  |                                                                           | | ||
|  |  | Copyright (C) 1992,1993,1995,1997                                         | | ||
|  |  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      | | ||
|  |  |                       Australia.  E-mail billm@suburbia.net               |
 | ||
|  |  |                                                                           | | ||
|  |  | Call from C as:                                                           | | ||
|  |  |    int wm_sqrt(FPU_REG *n, unsigned int control_word)                     | | ||
|  |  |                                                                           | | ||
|  |  +---------------------------------------------------------------------------*/ | ||
|  | 
 | ||
|  | /*---------------------------------------------------------------------------+ | ||
|  |  |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           | | ||
|  |  |    returns the square root of n in n.                                     | | ||
|  |  |                                                                           | | ||
|  |  |  Use Newton's method to compute the square root of a number, which must   | | ||
|  |  |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     | | ||
|  |  |  Does not check the sign or tag of the argument.                          | | ||
|  |  |  Sets the exponent, but not the sign or tag of the result.                | | ||
|  |  |                                                                           | | ||
|  |  |  The guess is kept in %esi:%edi                                           | | ||
|  |  +---------------------------------------------------------------------------*/ | ||
|  | 
 | ||
|  | #include "exception.h" | ||
|  | #include "fpu_emu.h" | ||
|  | 
 | ||
|  | 
 | ||
|  | #ifndef NON_REENTRANT_FPU | ||
|  | /*	Local storage on the stack: */ | ||
|  | #define FPU_accum_3	-4(%ebp)	/* ms word */ | ||
|  | #define FPU_accum_2	-8(%ebp) | ||
|  | #define FPU_accum_1	-12(%ebp) | ||
|  | #define FPU_accum_0	-16(%ebp) | ||
|  | 
 | ||
|  | /* | ||
|  |  * The de-normalised argument: | ||
|  |  *                  sq_2                  sq_1              sq_0 | ||
|  |  *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0 | ||
|  |  *           ^ binary point here | ||
|  |  */ | ||
|  | #define FPU_fsqrt_arg_2	-20(%ebp)	/* ms word */ | ||
|  | #define FPU_fsqrt_arg_1	-24(%ebp) | ||
|  | #define FPU_fsqrt_arg_0	-28(%ebp)	/* ls word, at most the ms bit is set */ | ||
|  | 
 | ||
|  | #else | ||
|  | /*	Local storage in a static area: */ | ||
|  | .data | ||
|  | 	.align 4,0 | ||
|  | FPU_accum_3: | ||
|  | 	.long	0		/* ms word */ | ||
|  | FPU_accum_2: | ||
|  | 	.long	0
 | ||
|  | FPU_accum_1: | ||
|  | 	.long	0
 | ||
|  | FPU_accum_0: | ||
|  | 	.long	0
 | ||
|  | 
 | ||
|  | /* The de-normalised argument: | ||
|  |                     sq_2                  sq_1              sq_0 | ||
|  |           b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0 | ||
|  |              ^ binary point here | ||
|  |  */ | ||
|  | FPU_fsqrt_arg_2: | ||
|  | 	.long	0		/* ms word */ | ||
|  | FPU_fsqrt_arg_1: | ||
|  | 	.long	0
 | ||
|  | FPU_fsqrt_arg_0: | ||
|  | 	.long	0		/* ls word, at most the ms bit is set */ | ||
|  | #endif /* NON_REENTRANT_FPU */  | ||
|  | 
 | ||
|  | 
 | ||
|  | .text | ||
|  | ENTRY(wm_sqrt) | ||
|  | 	pushl	%ebp | ||
|  | 	movl	%esp,%ebp | ||
|  | #ifndef NON_REENTRANT_FPU | ||
|  | 	subl	$28,%esp | ||
|  | #endif /* NON_REENTRANT_FPU */ | ||
|  | 	pushl	%esi | ||
|  | 	pushl	%edi | ||
|  | 	pushl	%ebx | ||
|  | 
 | ||
|  | 	movl	PARAM1,%esi | ||
|  | 
 | ||
|  | 	movl	SIGH(%esi),%eax | ||
|  | 	movl	SIGL(%esi),%ecx | ||
|  | 	xorl	%edx,%edx | ||
|  | 
 | ||
|  | /* We use a rough linear estimate for the first guess.. */ | ||
|  | 
 | ||
|  | 	cmpw	EXP_BIAS,EXP(%esi) | ||
|  | 	jnz	sqrt_arg_ge_2 | ||
|  | 
 | ||
|  | 	shrl	$1,%eax			/* arg is in the range  [1.0 .. 2.0) */ | ||
|  | 	rcrl	$1,%ecx | ||
|  | 	rcrl	$1,%edx | ||
|  | 
 | ||
|  | sqrt_arg_ge_2: | ||
|  | /* From here on, n is never accessed directly again until it is | ||
|  |    replaced by the answer. */ | ||
|  | 
 | ||
|  | 	movl	%eax,FPU_fsqrt_arg_2		/* ms word of n */ | ||
|  | 	movl	%ecx,FPU_fsqrt_arg_1 | ||
|  | 	movl	%edx,FPU_fsqrt_arg_0 | ||
|  | 
 | ||
|  | /* Make a linear first estimate */ | ||
|  | 	shrl	$1,%eax | ||
|  | 	addl	$0x40000000,%eax | ||
|  | 	movl	$0xaaaaaaaa,%ecx | ||
|  | 	mull	%ecx | ||
|  | 	shll	%edx			/* max result was 7fff... */ | ||
|  | 	testl	$0x80000000,%edx	/* but min was 3fff... */ | ||
|  | 	jnz	sqrt_prelim_no_adjust | ||
|  | 
 | ||
|  | 	movl	$0x80000000,%edx	/* round up */ | ||
|  | 
 | ||
|  | sqrt_prelim_no_adjust: | ||
|  | 	movl	%edx,%esi	/* Our first guess */ | ||
|  | 
 | ||
|  | /* We have now computed (approx)   (2 + x) / 3, which forms the basis | ||
|  |    for a few iterations of Newton's method */ | ||
|  | 
 | ||
|  | 	movl	FPU_fsqrt_arg_2,%ecx	/* ms word */ | ||
|  | 
 | ||
|  | /* | ||
|  |  * From our initial estimate, three iterations are enough to get us | ||
|  |  * to 30 bits or so. This will then allow two iterations at better | ||
|  |  * precision to complete the process. | ||
|  |  */ | ||
|  | 
 | ||
|  | /* Compute  (g + n/g)/2  at each iteration (g is the guess). */ | ||
|  | 	shrl	%ecx		/* Doing this first will prevent a divide */ | ||
|  | 				/* overflow later. */ | ||
|  | 
 | ||
|  | 	movl	%ecx,%edx	/* msw of the arg / 2 */ | ||
|  | 	divl	%esi		/* current estimate */ | ||
|  | 	shrl	%esi		/* divide by 2 */ | ||
|  | 	addl	%eax,%esi	/* the new estimate */ | ||
|  | 
 | ||
|  | 	movl	%ecx,%edx | ||
|  | 	divl	%esi | ||
|  | 	shrl	%esi | ||
|  | 	addl	%eax,%esi | ||
|  | 
 | ||
|  | 	movl	%ecx,%edx | ||
|  | 	divl	%esi | ||
|  | 	shrl	%esi | ||
|  | 	addl	%eax,%esi | ||
|  | 
 | ||
|  | /* | ||
|  |  * Now that an estimate accurate to about 30 bits has been obtained (in %esi), | ||
|  |  * we improve it to 60 bits or so. | ||
|  |  * | ||
|  |  * The strategy from now on is to compute new estimates from | ||
|  |  *      guess := guess + (n - guess^2) / (2 * guess) | ||
|  |  */ | ||
|  | 
 | ||
|  | /* First, find the square of the guess */ | ||
|  | 	movl	%esi,%eax | ||
|  | 	mull	%esi | ||
|  | /* guess^2 now in %edx:%eax */ | ||
|  | 
 | ||
|  | 	movl	FPU_fsqrt_arg_1,%ecx | ||
|  | 	subl	%ecx,%eax | ||
|  | 	movl	FPU_fsqrt_arg_2,%ecx	/* ms word of normalized n */ | ||
|  | 	sbbl	%ecx,%edx | ||
|  | 	jnc	sqrt_stage_2_positive | ||
|  | 
 | ||
|  | /* Subtraction gives a negative result, | ||
|  |    negate the result before division. */ | ||
|  | 	notl	%edx | ||
|  | 	notl	%eax | ||
|  | 	addl	$1,%eax | ||
|  | 	adcl	$0,%edx | ||
|  | 
 | ||
|  | 	divl	%esi | ||
|  | 	movl	%eax,%ecx | ||
|  | 
 | ||
|  | 	movl	%edx,%eax | ||
|  | 	divl	%esi | ||
|  | 	jmp	sqrt_stage_2_finish | ||
|  | 
 | ||
|  | sqrt_stage_2_positive: | ||
|  | 	divl	%esi | ||
|  | 	movl	%eax,%ecx | ||
|  | 
 | ||
|  | 	movl	%edx,%eax | ||
|  | 	divl	%esi | ||
|  | 
 | ||
|  | 	notl	%ecx | ||
|  | 	notl	%eax | ||
|  | 	addl	$1,%eax | ||
|  | 	adcl	$0,%ecx | ||
|  | 
 | ||
|  | sqrt_stage_2_finish: | ||
|  | 	sarl	$1,%ecx		/* divide by 2 */ | ||
|  | 	rcrl	$1,%eax | ||
|  | 
 | ||
|  | 	/* Form the new estimate in %esi:%edi */ | ||
|  | 	movl	%eax,%edi | ||
|  | 	addl	%ecx,%esi | ||
|  | 
 | ||
|  | 	jnz	sqrt_stage_2_done	/* result should be [1..2) */ | ||
|  | 
 | ||
|  | #ifdef PARANOID | ||
|  | /* It should be possible to get here only if the arg is ffff....ffff */ | ||
|  | 	cmp	$0xffffffff,FPU_fsqrt_arg_1 | ||
|  | 	jnz	sqrt_stage_2_error | ||
|  | #endif /* PARANOID */ | ||
|  | 
 | ||
|  | /* The best rounded result. */ | ||
|  | 	xorl	%eax,%eax | ||
|  | 	decl	%eax | ||
|  | 	movl	%eax,%edi | ||
|  | 	movl	%eax,%esi | ||
|  | 	movl	$0x7fffffff,%eax | ||
|  | 	jmp	sqrt_round_result | ||
|  | 
 | ||
|  | #ifdef PARANOID | ||
|  | sqrt_stage_2_error: | ||
|  | 	pushl	EX_INTERNAL|0x213 | ||
|  | 	call	EXCEPTION | ||
|  | #endif /* PARANOID */  | ||
|  | 
 | ||
|  | sqrt_stage_2_done: | ||
|  | 
 | ||
|  | /* Now the square root has been computed to better than 60 bits. */ | ||
|  | 
 | ||
|  | /* Find the square of the guess. */ | ||
|  | 	movl	%edi,%eax		/* ls word of guess */ | ||
|  | 	mull	%edi | ||
|  | 	movl	%edx,FPU_accum_1 | ||
|  | 
 | ||
|  | 	movl	%esi,%eax | ||
|  | 	mull	%esi | ||
|  | 	movl	%edx,FPU_accum_3 | ||
|  | 	movl	%eax,FPU_accum_2 | ||
|  | 
 | ||
|  | 	movl	%edi,%eax | ||
|  | 	mull	%esi | ||
|  | 	addl	%eax,FPU_accum_1 | ||
|  | 	adcl	%edx,FPU_accum_2 | ||
|  | 	adcl	$0,FPU_accum_3 | ||
|  | 
 | ||
|  | /*	movl	%esi,%eax */ | ||
|  | /*	mull	%edi */ | ||
|  | 	addl	%eax,FPU_accum_1 | ||
|  | 	adcl	%edx,FPU_accum_2 | ||
|  | 	adcl	$0,FPU_accum_3 | ||
|  | 
 | ||
|  | /* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */ | ||
|  | 
 | ||
|  | 	movl	FPU_fsqrt_arg_0,%eax		/* get normalized n */ | ||
|  | 	subl	%eax,FPU_accum_1 | ||
|  | 	movl	FPU_fsqrt_arg_1,%eax | ||
|  | 	sbbl	%eax,FPU_accum_2 | ||
|  | 	movl	FPU_fsqrt_arg_2,%eax		/* ms word of normalized n */ | ||
|  | 	sbbl	%eax,FPU_accum_3 | ||
|  | 	jnc	sqrt_stage_3_positive | ||
|  | 
 | ||
|  | /* Subtraction gives a negative result, | ||
|  |    negate the result before division */ | ||
|  | 	notl	FPU_accum_1 | ||
|  | 	notl	FPU_accum_2 | ||
|  | 	notl	FPU_accum_3 | ||
|  | 	addl	$1,FPU_accum_1 | ||
|  | 	adcl	$0,FPU_accum_2 | ||
|  | 
 | ||
|  | #ifdef PARANOID | ||
|  | 	adcl	$0,FPU_accum_3	/* This must be zero */ | ||
|  | 	jz	sqrt_stage_3_no_error | ||
|  | 
 | ||
|  | sqrt_stage_3_error: | ||
|  | 	pushl	EX_INTERNAL|0x207 | ||
|  | 	call	EXCEPTION | ||
|  | 
 | ||
|  | sqrt_stage_3_no_error: | ||
|  | #endif /* PARANOID */ | ||
|  | 
 | ||
|  | 	movl	FPU_accum_2,%edx | ||
|  | 	movl	FPU_accum_1,%eax | ||
|  | 	divl	%esi | ||
|  | 	movl	%eax,%ecx | ||
|  | 
 | ||
|  | 	movl	%edx,%eax | ||
|  | 	divl	%esi | ||
|  | 
 | ||
|  | 	sarl	$1,%ecx		/* divide by 2 */ | ||
|  | 	rcrl	$1,%eax | ||
|  | 
 | ||
|  | 	/* prepare to round the result */ | ||
|  | 
 | ||
|  | 	addl	%ecx,%edi | ||
|  | 	adcl	$0,%esi | ||
|  | 
 | ||
|  | 	jmp	sqrt_stage_3_finished | ||
|  | 
 | ||
|  | sqrt_stage_3_positive: | ||
|  | 	movl	FPU_accum_2,%edx | ||
|  | 	movl	FPU_accum_1,%eax | ||
|  | 	divl	%esi | ||
|  | 	movl	%eax,%ecx | ||
|  | 
 | ||
|  | 	movl	%edx,%eax | ||
|  | 	divl	%esi | ||
|  | 
 | ||
|  | 	sarl	$1,%ecx		/* divide by 2 */ | ||
|  | 	rcrl	$1,%eax | ||
|  | 
 | ||
|  | 	/* prepare to round the result */ | ||
|  | 
 | ||
|  | 	notl	%eax		/* Negate the correction term */ | ||
|  | 	notl	%ecx | ||
|  | 	addl	$1,%eax | ||
|  | 	adcl	$0,%ecx		/* carry here ==> correction == 0 */ | ||
|  | 	adcl	$0xffffffff,%esi | ||
|  | 
 | ||
|  | 	addl	%ecx,%edi | ||
|  | 	adcl	$0,%esi | ||
|  | 
 | ||
|  | sqrt_stage_3_finished: | ||
|  | 
 | ||
|  | /* | ||
|  |  * The result in %esi:%edi:%esi should be good to about 90 bits here, | ||
|  |  * and the rounding information here does not have sufficient accuracy | ||
|  |  * in a few rare cases. | ||
|  |  */ | ||
|  | 	cmpl	$0xffffffe0,%eax | ||
|  | 	ja	sqrt_near_exact_x | ||
|  | 
 | ||
|  | 	cmpl	$0x00000020,%eax | ||
|  | 	jb	sqrt_near_exact | ||
|  | 
 | ||
|  | 	cmpl	$0x7fffffe0,%eax | ||
|  | 	jb	sqrt_round_result | ||
|  | 
 | ||
|  | 	cmpl	$0x80000020,%eax | ||
|  | 	jb	sqrt_get_more_precision | ||
|  | 
 | ||
|  | sqrt_round_result: | ||
|  | /* Set up for rounding operations */ | ||
|  | 	movl	%eax,%edx | ||
|  | 	movl	%esi,%eax | ||
|  | 	movl	%edi,%ebx | ||
|  | 	movl	PARAM1,%edi | ||
|  | 	movw	EXP_BIAS,EXP(%edi)	/* Result is in  [1.0 .. 2.0) */ | ||
|  | 	jmp	fpu_reg_round | ||
|  | 
 | ||
|  | 
 | ||
|  | sqrt_near_exact_x: | ||
|  | /* First, the estimate must be rounded up. */ | ||
|  | 	addl	$1,%edi | ||
|  | 	adcl	$0,%esi | ||
|  | 
 | ||
|  | sqrt_near_exact: | ||
|  | /* | ||
|  |  * This is an easy case because x^1/2 is monotonic. | ||
|  |  * We need just find the square of our estimate, compare it | ||
|  |  * with the argument, and deduce whether our estimate is | ||
|  |  * above, below, or exact. We use the fact that the estimate | ||
|  |  * is known to be accurate to about 90 bits. | ||
|  |  */ | ||
|  | 	movl	%edi,%eax		/* ls word of guess */ | ||
|  | 	mull	%edi | ||
|  | 	movl	%edx,%ebx		/* 2nd ls word of square */ | ||
|  | 	movl	%eax,%ecx		/* ls word of square */ | ||
|  | 
 | ||
|  | 	movl	%edi,%eax | ||
|  | 	mull	%esi | ||
|  | 	addl	%eax,%ebx | ||
|  | 	addl	%eax,%ebx | ||
|  | 
 | ||
|  | #ifdef PARANOID | ||
|  | 	cmp	$0xffffffb0,%ebx | ||
|  | 	jb	sqrt_near_exact_ok | ||
|  | 
 | ||
|  | 	cmp	$0x00000050,%ebx | ||
|  | 	ja	sqrt_near_exact_ok | ||
|  | 
 | ||
|  | 	pushl	EX_INTERNAL|0x214 | ||
|  | 	call	EXCEPTION | ||
|  | 
 | ||
|  | sqrt_near_exact_ok: | ||
|  | #endif /* PARANOID */  | ||
|  | 
 | ||
|  | 	or	%ebx,%ebx | ||
|  | 	js	sqrt_near_exact_small | ||
|  | 
 | ||
|  | 	jnz	sqrt_near_exact_large | ||
|  | 
 | ||
|  | 	or	%ebx,%edx | ||
|  | 	jnz	sqrt_near_exact_large | ||
|  | 
 | ||
|  | /* Our estimate is exactly the right answer */ | ||
|  | 	xorl	%eax,%eax | ||
|  | 	jmp	sqrt_round_result | ||
|  | 
 | ||
|  | sqrt_near_exact_small: | ||
|  | /* Our estimate is too small */ | ||
|  | 	movl	$0x000000ff,%eax | ||
|  | 	jmp	sqrt_round_result | ||
|  | 	 | ||
|  | sqrt_near_exact_large: | ||
|  | /* Our estimate is too large, we need to decrement it */ | ||
|  | 	subl	$1,%edi | ||
|  | 	sbbl	$0,%esi | ||
|  | 	movl	$0xffffff00,%eax | ||
|  | 	jmp	sqrt_round_result | ||
|  | 
 | ||
|  | 
 | ||
|  | sqrt_get_more_precision: | ||
|  | /* This case is almost the same as the above, except we start | ||
|  |    with an extra bit of precision in the estimate. */ | ||
|  | 	stc			/* The extra bit. */ | ||
|  | 	rcll	$1,%edi		/* Shift the estimate left one bit */ | ||
|  | 	rcll	$1,%esi | ||
|  | 
 | ||
|  | 	movl	%edi,%eax		/* ls word of guess */ | ||
|  | 	mull	%edi | ||
|  | 	movl	%edx,%ebx		/* 2nd ls word of square */ | ||
|  | 	movl	%eax,%ecx		/* ls word of square */ | ||
|  | 
 | ||
|  | 	movl	%edi,%eax | ||
|  | 	mull	%esi | ||
|  | 	addl	%eax,%ebx | ||
|  | 	addl	%eax,%ebx | ||
|  | 
 | ||
|  | /* Put our estimate back to its original value */ | ||
|  | 	stc			/* The ms bit. */ | ||
|  | 	rcrl	$1,%esi		/* Shift the estimate left one bit */ | ||
|  | 	rcrl	$1,%edi | ||
|  | 
 | ||
|  | #ifdef PARANOID | ||
|  | 	cmp	$0xffffff60,%ebx | ||
|  | 	jb	sqrt_more_prec_ok | ||
|  | 
 | ||
|  | 	cmp	$0x000000a0,%ebx | ||
|  | 	ja	sqrt_more_prec_ok | ||
|  | 
 | ||
|  | 	pushl	EX_INTERNAL|0x215 | ||
|  | 	call	EXCEPTION | ||
|  | 
 | ||
|  | sqrt_more_prec_ok: | ||
|  | #endif /* PARANOID */  | ||
|  | 
 | ||
|  | 	or	%ebx,%ebx | ||
|  | 	js	sqrt_more_prec_small | ||
|  | 
 | ||
|  | 	jnz	sqrt_more_prec_large | ||
|  | 
 | ||
|  | 	or	%ebx,%ecx | ||
|  | 	jnz	sqrt_more_prec_large | ||
|  | 
 | ||
|  | /* Our estimate is exactly the right answer */ | ||
|  | 	movl	$0x80000000,%eax | ||
|  | 	jmp	sqrt_round_result | ||
|  | 
 | ||
|  | sqrt_more_prec_small: | ||
|  | /* Our estimate is too small */ | ||
|  | 	movl	$0x800000ff,%eax | ||
|  | 	jmp	sqrt_round_result | ||
|  | 	 | ||
|  | sqrt_more_prec_large: | ||
|  | /* Our estimate is too large */ | ||
|  | 	movl	$0x7fffff00,%eax | ||
|  | 	jmp	sqrt_round_result |