471 lines
		
	
	
	
		
			11 KiB
			
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
		
		
			
		
	
	
			471 lines
		
	
	
	
		
			11 KiB
			
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| 
								 | 
							
									.file	"wm_sqrt.S"
							 | 
						||
| 
								 | 
							
								/*---------------------------------------------------------------------------+
							 | 
						||
| 
								 | 
							
								 |  wm_sqrt.S                                                                |
							 | 
						||
| 
								 | 
							
								 |                                                                           |
							 | 
						||
| 
								 | 
							
								 | Fixed point arithmetic square root evaluation.                            |
							 | 
						||
| 
								 | 
							
								 |                                                                           |
							 | 
						||
| 
								 | 
							
								 | Copyright (C) 1992,1993,1995,1997                                         |
							 | 
						||
| 
								 | 
							
								 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
							 | 
						||
| 
								 | 
							
								 |                       Australia.  E-mail billm@suburbia.net               |
							 | 
						||
| 
								 | 
							
								 |                                                                           |
							 | 
						||
| 
								 | 
							
								 | Call from C as:                                                           |
							 | 
						||
| 
								 | 
							
								 |    int wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
							 | 
						||
| 
								 | 
							
								 |                                                                           |
							 | 
						||
| 
								 | 
							
								 +---------------------------------------------------------------------------*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*---------------------------------------------------------------------------+
							 | 
						||
| 
								 | 
							
								 |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
							 | 
						||
| 
								 | 
							
								 |    returns the square root of n in n.                                     |
							 | 
						||
| 
								 | 
							
								 |                                                                           |
							 | 
						||
| 
								 | 
							
								 |  Use Newton's method to compute the square root of a number, which must   |
							 | 
						||
| 
								 | 
							
								 |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
							 | 
						||
| 
								 | 
							
								 |  Does not check the sign or tag of the argument.                          |
							 | 
						||
| 
								 | 
							
								 |  Sets the exponent, but not the sign or tag of the result.                |
							 | 
						||
| 
								 | 
							
								 |                                                                           |
							 | 
						||
| 
								 | 
							
								 |  The guess is kept in %esi:%edi                                           |
							 | 
						||
| 
								 | 
							
								 +---------------------------------------------------------------------------*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#include "exception.h"
							 | 
						||
| 
								 | 
							
								#include "fpu_emu.h"
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifndef NON_REENTRANT_FPU
							 | 
						||
| 
								 | 
							
								/*	Local storage on the stack: */
							 | 
						||
| 
								 | 
							
								#define FPU_accum_3	-4(%ebp)	/* ms word */
							 | 
						||
| 
								 | 
							
								#define FPU_accum_2	-8(%ebp)
							 | 
						||
| 
								 | 
							
								#define FPU_accum_1	-12(%ebp)
							 | 
						||
| 
								 | 
							
								#define FPU_accum_0	-16(%ebp)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								 * The de-normalised argument:
							 | 
						||
| 
								 | 
							
								 *                  sq_2                  sq_1              sq_0
							 | 
						||
| 
								 | 
							
								 *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
							 | 
						||
| 
								 | 
							
								 *           ^ binary point here
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								#define FPU_fsqrt_arg_2	-20(%ebp)	/* ms word */
							 | 
						||
| 
								 | 
							
								#define FPU_fsqrt_arg_1	-24(%ebp)
							 | 
						||
| 
								 | 
							
								#define FPU_fsqrt_arg_0	-28(%ebp)	/* ls word, at most the ms bit is set */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#else
							 | 
						||
| 
								 | 
							
								/*	Local storage in a static area: */
							 | 
						||
| 
								 | 
							
								.data
							 | 
						||
| 
								 | 
							
									.align 4,0
							 | 
						||
| 
								 | 
							
								FPU_accum_3:
							 | 
						||
| 
								 | 
							
									.long	0		/* ms word */
							 | 
						||
| 
								 | 
							
								FPU_accum_2:
							 | 
						||
| 
								 | 
							
									.long	0
							 | 
						||
| 
								 | 
							
								FPU_accum_1:
							 | 
						||
| 
								 | 
							
									.long	0
							 | 
						||
| 
								 | 
							
								FPU_accum_0:
							 | 
						||
| 
								 | 
							
									.long	0
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* The de-normalised argument:
							 | 
						||
| 
								 | 
							
								                    sq_2                  sq_1              sq_0
							 | 
						||
| 
								 | 
							
								          b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
							 | 
						||
| 
								 | 
							
								             ^ binary point here
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								FPU_fsqrt_arg_2:
							 | 
						||
| 
								 | 
							
									.long	0		/* ms word */
							 | 
						||
| 
								 | 
							
								FPU_fsqrt_arg_1:
							 | 
						||
| 
								 | 
							
									.long	0
							 | 
						||
| 
								 | 
							
								FPU_fsqrt_arg_0:
							 | 
						||
| 
								 | 
							
									.long	0		/* ls word, at most the ms bit is set */
							 | 
						||
| 
								 | 
							
								#endif /* NON_REENTRANT_FPU */ 
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								.text
							 | 
						||
| 
								 | 
							
								ENTRY(wm_sqrt)
							 | 
						||
| 
								 | 
							
									pushl	%ebp
							 | 
						||
| 
								 | 
							
									movl	%esp,%ebp
							 | 
						||
| 
								 | 
							
								#ifndef NON_REENTRANT_FPU
							 | 
						||
| 
								 | 
							
									subl	$28,%esp
							 | 
						||
| 
								 | 
							
								#endif /* NON_REENTRANT_FPU */
							 | 
						||
| 
								 | 
							
									pushl	%esi
							 | 
						||
| 
								 | 
							
									pushl	%edi
							 | 
						||
| 
								 | 
							
									pushl	%ebx
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									movl	PARAM1,%esi
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									movl	SIGH(%esi),%eax
							 | 
						||
| 
								 | 
							
									movl	SIGL(%esi),%ecx
							 | 
						||
| 
								 | 
							
									xorl	%edx,%edx
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* We use a rough linear estimate for the first guess.. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									cmpw	EXP_BIAS,EXP(%esi)
							 | 
						||
| 
								 | 
							
									jnz	sqrt_arg_ge_2
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									shrl	$1,%eax			/* arg is in the range  [1.0 .. 2.0) */
							 | 
						||
| 
								 | 
							
									rcrl	$1,%ecx
							 | 
						||
| 
								 | 
							
									rcrl	$1,%edx
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								sqrt_arg_ge_2:
							 | 
						||
| 
								 | 
							
								/* From here on, n is never accessed directly again until it is
							 | 
						||
| 
								 | 
							
								   replaced by the answer. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									movl	%eax,FPU_fsqrt_arg_2		/* ms word of n */
							 | 
						||
| 
								 | 
							
									movl	%ecx,FPU_fsqrt_arg_1
							 | 
						||
| 
								 | 
							
									movl	%edx,FPU_fsqrt_arg_0
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* Make a linear first estimate */
							 | 
						||
| 
								 | 
							
									shrl	$1,%eax
							 | 
						||
| 
								 | 
							
									addl	$0x40000000,%eax
							 | 
						||
| 
								 | 
							
									movl	$0xaaaaaaaa,%ecx
							 | 
						||
| 
								 | 
							
									mull	%ecx
							 | 
						||
| 
								 | 
							
									shll	%edx			/* max result was 7fff... */
							 | 
						||
| 
								 | 
							
									testl	$0x80000000,%edx	/* but min was 3fff... */
							 | 
						||
| 
								 | 
							
									jnz	sqrt_prelim_no_adjust
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									movl	$0x80000000,%edx	/* round up */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								sqrt_prelim_no_adjust:
							 | 
						||
| 
								 | 
							
									movl	%edx,%esi	/* Our first guess */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* We have now computed (approx)   (2 + x) / 3, which forms the basis
							 | 
						||
| 
								 | 
							
								   for a few iterations of Newton's method */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									movl	FPU_fsqrt_arg_2,%ecx	/* ms word */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								 * From our initial estimate, three iterations are enough to get us
							 | 
						||
| 
								 | 
							
								 * to 30 bits or so. This will then allow two iterations at better
							 | 
						||
| 
								 | 
							
								 * precision to complete the process.
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* Compute  (g + n/g)/2  at each iteration (g is the guess). */
							 | 
						||
| 
								 | 
							
									shrl	%ecx		/* Doing this first will prevent a divide */
							 | 
						||
| 
								 | 
							
												/* overflow later. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									movl	%ecx,%edx	/* msw of the arg / 2 */
							 | 
						||
| 
								 | 
							
									divl	%esi		/* current estimate */
							 | 
						||
| 
								 | 
							
									shrl	%esi		/* divide by 2 */
							 | 
						||
| 
								 | 
							
									addl	%eax,%esi	/* the new estimate */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									movl	%ecx,%edx
							 | 
						||
| 
								 | 
							
									divl	%esi
							 | 
						||
| 
								 | 
							
									shrl	%esi
							 | 
						||
| 
								 | 
							
									addl	%eax,%esi
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									movl	%ecx,%edx
							 | 
						||
| 
								 | 
							
									divl	%esi
							 | 
						||
| 
								 | 
							
									shrl	%esi
							 | 
						||
| 
								 | 
							
									addl	%eax,%esi
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								 * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
							 | 
						||
| 
								 | 
							
								 * we improve it to 60 bits or so.
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * The strategy from now on is to compute new estimates from
							 | 
						||
| 
								 | 
							
								 *      guess := guess + (n - guess^2) / (2 * guess)
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* First, find the square of the guess */
							 | 
						||
| 
								 | 
							
									movl	%esi,%eax
							 | 
						||
| 
								 | 
							
									mull	%esi
							 | 
						||
| 
								 | 
							
								/* guess^2 now in %edx:%eax */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									movl	FPU_fsqrt_arg_1,%ecx
							 | 
						||
| 
								 | 
							
									subl	%ecx,%eax
							 | 
						||
| 
								 | 
							
									movl	FPU_fsqrt_arg_2,%ecx	/* ms word of normalized n */
							 | 
						||
| 
								 | 
							
									sbbl	%ecx,%edx
							 | 
						||
| 
								 | 
							
									jnc	sqrt_stage_2_positive
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* Subtraction gives a negative result,
							 | 
						||
| 
								 | 
							
								   negate the result before division. */
							 | 
						||
| 
								 | 
							
									notl	%edx
							 | 
						||
| 
								 | 
							
									notl	%eax
							 | 
						||
| 
								 | 
							
									addl	$1,%eax
							 | 
						||
| 
								 | 
							
									adcl	$0,%edx
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									divl	%esi
							 | 
						||
| 
								 | 
							
									movl	%eax,%ecx
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									movl	%edx,%eax
							 | 
						||
| 
								 | 
							
									divl	%esi
							 | 
						||
| 
								 | 
							
									jmp	sqrt_stage_2_finish
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								sqrt_stage_2_positive:
							 | 
						||
| 
								 | 
							
									divl	%esi
							 | 
						||
| 
								 | 
							
									movl	%eax,%ecx
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									movl	%edx,%eax
							 | 
						||
| 
								 | 
							
									divl	%esi
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									notl	%ecx
							 | 
						||
| 
								 | 
							
									notl	%eax
							 | 
						||
| 
								 | 
							
									addl	$1,%eax
							 | 
						||
| 
								 | 
							
									adcl	$0,%ecx
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								sqrt_stage_2_finish:
							 | 
						||
| 
								 | 
							
									sarl	$1,%ecx		/* divide by 2 */
							 | 
						||
| 
								 | 
							
									rcrl	$1,%eax
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/* Form the new estimate in %esi:%edi */
							 | 
						||
| 
								 | 
							
									movl	%eax,%edi
							 | 
						||
| 
								 | 
							
									addl	%ecx,%esi
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									jnz	sqrt_stage_2_done	/* result should be [1..2) */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef PARANOID
							 | 
						||
| 
								 | 
							
								/* It should be possible to get here only if the arg is ffff....ffff */
							 | 
						||
| 
								 | 
							
									cmp	$0xffffffff,FPU_fsqrt_arg_1
							 | 
						||
| 
								 | 
							
									jnz	sqrt_stage_2_error
							 | 
						||
| 
								 | 
							
								#endif /* PARANOID */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* The best rounded result. */
							 | 
						||
| 
								 | 
							
									xorl	%eax,%eax
							 | 
						||
| 
								 | 
							
									decl	%eax
							 | 
						||
| 
								 | 
							
									movl	%eax,%edi
							 | 
						||
| 
								 | 
							
									movl	%eax,%esi
							 | 
						||
| 
								 | 
							
									movl	$0x7fffffff,%eax
							 | 
						||
| 
								 | 
							
									jmp	sqrt_round_result
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef PARANOID
							 | 
						||
| 
								 | 
							
								sqrt_stage_2_error:
							 | 
						||
| 
								 | 
							
									pushl	EX_INTERNAL|0x213
							 | 
						||
| 
								 | 
							
									call	EXCEPTION
							 | 
						||
| 
								 | 
							
								#endif /* PARANOID */ 
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								sqrt_stage_2_done:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* Now the square root has been computed to better than 60 bits. */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* Find the square of the guess. */
							 | 
						||
| 
								 | 
							
									movl	%edi,%eax		/* ls word of guess */
							 | 
						||
| 
								 | 
							
									mull	%edi
							 | 
						||
| 
								 | 
							
									movl	%edx,FPU_accum_1
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									movl	%esi,%eax
							 | 
						||
| 
								 | 
							
									mull	%esi
							 | 
						||
| 
								 | 
							
									movl	%edx,FPU_accum_3
							 | 
						||
| 
								 | 
							
									movl	%eax,FPU_accum_2
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									movl	%edi,%eax
							 | 
						||
| 
								 | 
							
									mull	%esi
							 | 
						||
| 
								 | 
							
									addl	%eax,FPU_accum_1
							 | 
						||
| 
								 | 
							
									adcl	%edx,FPU_accum_2
							 | 
						||
| 
								 | 
							
									adcl	$0,FPU_accum_3
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*	movl	%esi,%eax */
							 | 
						||
| 
								 | 
							
								/*	mull	%edi */
							 | 
						||
| 
								 | 
							
									addl	%eax,FPU_accum_1
							 | 
						||
| 
								 | 
							
									adcl	%edx,FPU_accum_2
							 | 
						||
| 
								 | 
							
									adcl	$0,FPU_accum_3
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									movl	FPU_fsqrt_arg_0,%eax		/* get normalized n */
							 | 
						||
| 
								 | 
							
									subl	%eax,FPU_accum_1
							 | 
						||
| 
								 | 
							
									movl	FPU_fsqrt_arg_1,%eax
							 | 
						||
| 
								 | 
							
									sbbl	%eax,FPU_accum_2
							 | 
						||
| 
								 | 
							
									movl	FPU_fsqrt_arg_2,%eax		/* ms word of normalized n */
							 | 
						||
| 
								 | 
							
									sbbl	%eax,FPU_accum_3
							 | 
						||
| 
								 | 
							
									jnc	sqrt_stage_3_positive
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* Subtraction gives a negative result,
							 | 
						||
| 
								 | 
							
								   negate the result before division */
							 | 
						||
| 
								 | 
							
									notl	FPU_accum_1
							 | 
						||
| 
								 | 
							
									notl	FPU_accum_2
							 | 
						||
| 
								 | 
							
									notl	FPU_accum_3
							 | 
						||
| 
								 | 
							
									addl	$1,FPU_accum_1
							 | 
						||
| 
								 | 
							
									adcl	$0,FPU_accum_2
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef PARANOID
							 | 
						||
| 
								 | 
							
									adcl	$0,FPU_accum_3	/* This must be zero */
							 | 
						||
| 
								 | 
							
									jz	sqrt_stage_3_no_error
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								sqrt_stage_3_error:
							 | 
						||
| 
								 | 
							
									pushl	EX_INTERNAL|0x207
							 | 
						||
| 
								 | 
							
									call	EXCEPTION
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								sqrt_stage_3_no_error:
							 | 
						||
| 
								 | 
							
								#endif /* PARANOID */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									movl	FPU_accum_2,%edx
							 | 
						||
| 
								 | 
							
									movl	FPU_accum_1,%eax
							 | 
						||
| 
								 | 
							
									divl	%esi
							 | 
						||
| 
								 | 
							
									movl	%eax,%ecx
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									movl	%edx,%eax
							 | 
						||
| 
								 | 
							
									divl	%esi
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									sarl	$1,%ecx		/* divide by 2 */
							 | 
						||
| 
								 | 
							
									rcrl	$1,%eax
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/* prepare to round the result */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									addl	%ecx,%edi
							 | 
						||
| 
								 | 
							
									adcl	$0,%esi
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									jmp	sqrt_stage_3_finished
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								sqrt_stage_3_positive:
							 | 
						||
| 
								 | 
							
									movl	FPU_accum_2,%edx
							 | 
						||
| 
								 | 
							
									movl	FPU_accum_1,%eax
							 | 
						||
| 
								 | 
							
									divl	%esi
							 | 
						||
| 
								 | 
							
									movl	%eax,%ecx
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									movl	%edx,%eax
							 | 
						||
| 
								 | 
							
									divl	%esi
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									sarl	$1,%ecx		/* divide by 2 */
							 | 
						||
| 
								 | 
							
									rcrl	$1,%eax
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/* prepare to round the result */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									notl	%eax		/* Negate the correction term */
							 | 
						||
| 
								 | 
							
									notl	%ecx
							 | 
						||
| 
								 | 
							
									addl	$1,%eax
							 | 
						||
| 
								 | 
							
									adcl	$0,%ecx		/* carry here ==> correction == 0 */
							 | 
						||
| 
								 | 
							
									adcl	$0xffffffff,%esi
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									addl	%ecx,%edi
							 | 
						||
| 
								 | 
							
									adcl	$0,%esi
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								sqrt_stage_3_finished:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								 * The result in %esi:%edi:%esi should be good to about 90 bits here,
							 | 
						||
| 
								 | 
							
								 * and the rounding information here does not have sufficient accuracy
							 | 
						||
| 
								 | 
							
								 * in a few rare cases.
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
									cmpl	$0xffffffe0,%eax
							 | 
						||
| 
								 | 
							
									ja	sqrt_near_exact_x
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									cmpl	$0x00000020,%eax
							 | 
						||
| 
								 | 
							
									jb	sqrt_near_exact
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									cmpl	$0x7fffffe0,%eax
							 | 
						||
| 
								 | 
							
									jb	sqrt_round_result
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									cmpl	$0x80000020,%eax
							 | 
						||
| 
								 | 
							
									jb	sqrt_get_more_precision
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								sqrt_round_result:
							 | 
						||
| 
								 | 
							
								/* Set up for rounding operations */
							 | 
						||
| 
								 | 
							
									movl	%eax,%edx
							 | 
						||
| 
								 | 
							
									movl	%esi,%eax
							 | 
						||
| 
								 | 
							
									movl	%edi,%ebx
							 | 
						||
| 
								 | 
							
									movl	PARAM1,%edi
							 | 
						||
| 
								 | 
							
									movw	EXP_BIAS,EXP(%edi)	/* Result is in  [1.0 .. 2.0) */
							 | 
						||
| 
								 | 
							
									jmp	fpu_reg_round
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								sqrt_near_exact_x:
							 | 
						||
| 
								 | 
							
								/* First, the estimate must be rounded up. */
							 | 
						||
| 
								 | 
							
									addl	$1,%edi
							 | 
						||
| 
								 | 
							
									adcl	$0,%esi
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								sqrt_near_exact:
							 | 
						||
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								 * This is an easy case because x^1/2 is monotonic.
							 | 
						||
| 
								 | 
							
								 * We need just find the square of our estimate, compare it
							 | 
						||
| 
								 | 
							
								 * with the argument, and deduce whether our estimate is
							 | 
						||
| 
								 | 
							
								 * above, below, or exact. We use the fact that the estimate
							 | 
						||
| 
								 | 
							
								 * is known to be accurate to about 90 bits.
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
									movl	%edi,%eax		/* ls word of guess */
							 | 
						||
| 
								 | 
							
									mull	%edi
							 | 
						||
| 
								 | 
							
									movl	%edx,%ebx		/* 2nd ls word of square */
							 | 
						||
| 
								 | 
							
									movl	%eax,%ecx		/* ls word of square */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									movl	%edi,%eax
							 | 
						||
| 
								 | 
							
									mull	%esi
							 | 
						||
| 
								 | 
							
									addl	%eax,%ebx
							 | 
						||
| 
								 | 
							
									addl	%eax,%ebx
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef PARANOID
							 | 
						||
| 
								 | 
							
									cmp	$0xffffffb0,%ebx
							 | 
						||
| 
								 | 
							
									jb	sqrt_near_exact_ok
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									cmp	$0x00000050,%ebx
							 | 
						||
| 
								 | 
							
									ja	sqrt_near_exact_ok
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									pushl	EX_INTERNAL|0x214
							 | 
						||
| 
								 | 
							
									call	EXCEPTION
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								sqrt_near_exact_ok:
							 | 
						||
| 
								 | 
							
								#endif /* PARANOID */ 
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									or	%ebx,%ebx
							 | 
						||
| 
								 | 
							
									js	sqrt_near_exact_small
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									jnz	sqrt_near_exact_large
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									or	%ebx,%edx
							 | 
						||
| 
								 | 
							
									jnz	sqrt_near_exact_large
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* Our estimate is exactly the right answer */
							 | 
						||
| 
								 | 
							
									xorl	%eax,%eax
							 | 
						||
| 
								 | 
							
									jmp	sqrt_round_result
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								sqrt_near_exact_small:
							 | 
						||
| 
								 | 
							
								/* Our estimate is too small */
							 | 
						||
| 
								 | 
							
									movl	$0x000000ff,%eax
							 | 
						||
| 
								 | 
							
									jmp	sqrt_round_result
							 | 
						||
| 
								 | 
							
									
							 | 
						||
| 
								 | 
							
								sqrt_near_exact_large:
							 | 
						||
| 
								 | 
							
								/* Our estimate is too large, we need to decrement it */
							 | 
						||
| 
								 | 
							
									subl	$1,%edi
							 | 
						||
| 
								 | 
							
									sbbl	$0,%esi
							 | 
						||
| 
								 | 
							
									movl	$0xffffff00,%eax
							 | 
						||
| 
								 | 
							
									jmp	sqrt_round_result
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								sqrt_get_more_precision:
							 | 
						||
| 
								 | 
							
								/* This case is almost the same as the above, except we start
							 | 
						||
| 
								 | 
							
								   with an extra bit of precision in the estimate. */
							 | 
						||
| 
								 | 
							
									stc			/* The extra bit. */
							 | 
						||
| 
								 | 
							
									rcll	$1,%edi		/* Shift the estimate left one bit */
							 | 
						||
| 
								 | 
							
									rcll	$1,%esi
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									movl	%edi,%eax		/* ls word of guess */
							 | 
						||
| 
								 | 
							
									mull	%edi
							 | 
						||
| 
								 | 
							
									movl	%edx,%ebx		/* 2nd ls word of square */
							 | 
						||
| 
								 | 
							
									movl	%eax,%ecx		/* ls word of square */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									movl	%edi,%eax
							 | 
						||
| 
								 | 
							
									mull	%esi
							 | 
						||
| 
								 | 
							
									addl	%eax,%ebx
							 | 
						||
| 
								 | 
							
									addl	%eax,%ebx
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* Put our estimate back to its original value */
							 | 
						||
| 
								 | 
							
									stc			/* The ms bit. */
							 | 
						||
| 
								 | 
							
									rcrl	$1,%esi		/* Shift the estimate left one bit */
							 | 
						||
| 
								 | 
							
									rcrl	$1,%edi
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef PARANOID
							 | 
						||
| 
								 | 
							
									cmp	$0xffffff60,%ebx
							 | 
						||
| 
								 | 
							
									jb	sqrt_more_prec_ok
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									cmp	$0x000000a0,%ebx
							 | 
						||
| 
								 | 
							
									ja	sqrt_more_prec_ok
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									pushl	EX_INTERNAL|0x215
							 | 
						||
| 
								 | 
							
									call	EXCEPTION
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								sqrt_more_prec_ok:
							 | 
						||
| 
								 | 
							
								#endif /* PARANOID */ 
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									or	%ebx,%ebx
							 | 
						||
| 
								 | 
							
									js	sqrt_more_prec_small
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									jnz	sqrt_more_prec_large
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									or	%ebx,%ecx
							 | 
						||
| 
								 | 
							
									jnz	sqrt_more_prec_large
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* Our estimate is exactly the right answer */
							 | 
						||
| 
								 | 
							
									movl	$0x80000000,%eax
							 | 
						||
| 
								 | 
							
									jmp	sqrt_round_result
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								sqrt_more_prec_small:
							 | 
						||
| 
								 | 
							
								/* Our estimate is too small */
							 | 
						||
| 
								 | 
							
									movl	$0x800000ff,%eax
							 | 
						||
| 
								 | 
							
									jmp	sqrt_round_result
							 | 
						||
| 
								 | 
							
									
							 | 
						||
| 
								 | 
							
								sqrt_more_prec_large:
							 | 
						||
| 
								 | 
							
								/* Our estimate is too large */
							 | 
						||
| 
								 | 
							
									movl	$0x7fffff00,%eax
							 | 
						||
| 
								 | 
							
									jmp	sqrt_round_result
							 |