593 lines
		
	
	
	
		
			15 KiB
			
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
		
		
			
		
	
	
			593 lines
		
	
	
	
		
			15 KiB
			
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								 *  Copyright (C) 2003-2013 Altera Corporation
							 | 
						||
| 
								 | 
							
								 *  All rights reserved.
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * This program is free software; you can redistribute it and/or modify
							 | 
						||
| 
								 | 
							
								 * it under the terms of the GNU General Public License as published by
							 | 
						||
| 
								 | 
							
								 * the Free Software Foundation; either version 2 of the License, or
							 | 
						||
| 
								 | 
							
								 * (at your option) any later version.
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * This program is distributed in the hope that it will be useful,
							 | 
						||
| 
								 | 
							
								 * but WITHOUT ANY WARRANTY; without even the implied warranty of
							 | 
						||
| 
								 | 
							
								 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
							 | 
						||
| 
								 | 
							
								 * GNU General Public License for more details.
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * You should have received a copy of the GNU General Public License
							 | 
						||
| 
								 | 
							
								 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#include <linux/linkage.h>
							 | 
						||
| 
								 | 
							
								#include <asm/entry.h>
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								.set noat
							 | 
						||
| 
								 | 
							
								.set nobreak
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								* Explicitly allow the use of r1 (the assembler temporary register)
							 | 
						||
| 
								 | 
							
								* within this code. This register is normally reserved for the use of
							 | 
						||
| 
								 | 
							
								* the compiler.
							 | 
						||
| 
								 | 
							
								*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								ENTRY(instruction_trap)
							 | 
						||
| 
								 | 
							
									ldw	r1, PT_R1(sp)		// Restore registers
							 | 
						||
| 
								 | 
							
									ldw	r2, PT_R2(sp)
							 | 
						||
| 
								 | 
							
									ldw	r3, PT_R3(sp)
							 | 
						||
| 
								 | 
							
									ldw	r4, PT_R4(sp)
							 | 
						||
| 
								 | 
							
									ldw	r5, PT_R5(sp)
							 | 
						||
| 
								 | 
							
									ldw	r6, PT_R6(sp)
							 | 
						||
| 
								 | 
							
									ldw	r7, PT_R7(sp)
							 | 
						||
| 
								 | 
							
									ldw	r8, PT_R8(sp)
							 | 
						||
| 
								 | 
							
									ldw	r9, PT_R9(sp)
							 | 
						||
| 
								 | 
							
									ldw	r10, PT_R10(sp)
							 | 
						||
| 
								 | 
							
									ldw	r11, PT_R11(sp)
							 | 
						||
| 
								 | 
							
									ldw	r12, PT_R12(sp)
							 | 
						||
| 
								 | 
							
									ldw	r13, PT_R13(sp)
							 | 
						||
| 
								 | 
							
									ldw	r14, PT_R14(sp)
							 | 
						||
| 
								 | 
							
									ldw	r15, PT_R15(sp)
							 | 
						||
| 
								 | 
							
									ldw	ra, PT_RA(sp)
							 | 
						||
| 
								 | 
							
									ldw	fp, PT_FP(sp)
							 | 
						||
| 
								 | 
							
									ldw	gp, PT_GP(sp)
							 | 
						||
| 
								 | 
							
									ldw	et, PT_ESTATUS(sp)
							 | 
						||
| 
								 | 
							
									wrctl	estatus, et
							 | 
						||
| 
								 | 
							
									ldw	ea, PT_EA(sp)
							 | 
						||
| 
								 | 
							
									ldw	et, PT_SP(sp)		/* backup sp in et */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									addi	sp, sp, PT_REGS_SIZE
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/* INSTRUCTION EMULATION
							 | 
						||
| 
								 | 
							
									*  ---------------------
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									* Nios II processors generate exceptions for unimplemented instructions.
							 | 
						||
| 
								 | 
							
									* The routines below emulate these instructions.  Depending on the
							 | 
						||
| 
								 | 
							
									* processor core, the only instructions that might need to be emulated
							 | 
						||
| 
								 | 
							
									* are div, divu, mul, muli, mulxss, mulxsu, and mulxuu.
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									* The emulations match the instructions, except for the following
							 | 
						||
| 
								 | 
							
									* limitations:
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									* 1) The emulation routines do not emulate the use of the exception
							 | 
						||
| 
								 | 
							
									*    temporary register (et) as a source operand because the exception
							 | 
						||
| 
								 | 
							
									*    handler already has modified it.
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									* 2) The routines do not emulate the use of the stack pointer (sp) or
							 | 
						||
| 
								 | 
							
									*    the exception return address register (ea) as a destination because
							 | 
						||
| 
								 | 
							
									*    modifying these registers crashes the exception handler or the
							 | 
						||
| 
								 | 
							
									*    interrupted routine.
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									* Detailed Design
							 | 
						||
| 
								 | 
							
									* ---------------
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									* The emulation routines expect the contents of integer registers r0-r31
							 | 
						||
| 
								 | 
							
									* to be on the stack at addresses sp, 4(sp), 8(sp), ... 124(sp).  The
							 | 
						||
| 
								 | 
							
									* routines retrieve source operands from the stack and modify the
							 | 
						||
| 
								 | 
							
									* destination register's value on the stack prior to the end of the
							 | 
						||
| 
								 | 
							
									* exception handler.  Then all registers except the destination register
							 | 
						||
| 
								 | 
							
									* are restored to their previous values.
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									* The instruction that causes the exception is found at address -4(ea).
							 | 
						||
| 
								 | 
							
									* The instruction's OP and OPX fields identify the operation to be
							 | 
						||
| 
								 | 
							
									* performed.
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									* One instruction, muli, is an I-type instruction that is identified by
							 | 
						||
| 
								 | 
							
									* an OP field of 0x24.
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									* muli   AAAAA,BBBBB,IIIIIIIIIIIIIIII,-0x24-
							 | 
						||
| 
								 | 
							
									*           27    22                6      0    <-- LSB of field
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									* The remaining emulated instructions are R-type and have an OP field
							 | 
						||
| 
								 | 
							
									* of 0x3a.  Their OPX fields identify them.
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									* R-type AAAAA,BBBBB,CCCCC,XXXXXX,NNNNN,-0x3a-
							 | 
						||
| 
								 | 
							
									*           27    22    17     11     6      0  <-- LSB of field
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									* Opcode Encoding.  muli is identified by its OP value.  Then OPX & 0x02
							 | 
						||
| 
								 | 
							
									* is used to differentiate between the division opcodes and the
							 | 
						||
| 
								 | 
							
									* remaining multiplication opcodes.
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									* Instruction   OP      OPX    OPX & 0x02
							 | 
						||
| 
								 | 
							
									* -----------   ----    ----   ----------
							 | 
						||
| 
								 | 
							
									* muli          0x24
							 | 
						||
| 
								 | 
							
									* divu          0x3a    0x24         0
							 | 
						||
| 
								 | 
							
									* div           0x3a    0x25         0
							 | 
						||
| 
								 | 
							
									* mul           0x3a    0x27      != 0
							 | 
						||
| 
								 | 
							
									* mulxuu        0x3a    0x07      != 0
							 | 
						||
| 
								 | 
							
									* mulxsu        0x3a    0x17      != 0
							 | 
						||
| 
								 | 
							
									* mulxss        0x3a    0x1f      != 0
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/*
							 | 
						||
| 
								 | 
							
									* Save everything on the stack to make it easy for the emulation
							 | 
						||
| 
								 | 
							
									* routines to retrieve the source register operands.
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									addi sp, sp, -128
							 | 
						||
| 
								 | 
							
									stw zero, 0(sp)	/* Save zero on stack to avoid special case for r0. */
							 | 
						||
| 
								 | 
							
									stw r1, 4(sp)
							 | 
						||
| 
								 | 
							
									stw r2,  8(sp)
							 | 
						||
| 
								 | 
							
									stw r3, 12(sp)
							 | 
						||
| 
								 | 
							
									stw r4, 16(sp)
							 | 
						||
| 
								 | 
							
									stw r5, 20(sp)
							 | 
						||
| 
								 | 
							
									stw r6, 24(sp)
							 | 
						||
| 
								 | 
							
									stw r7, 28(sp)
							 | 
						||
| 
								 | 
							
									stw r8, 32(sp)
							 | 
						||
| 
								 | 
							
									stw r9, 36(sp)
							 | 
						||
| 
								 | 
							
									stw r10, 40(sp)
							 | 
						||
| 
								 | 
							
									stw r11, 44(sp)
							 | 
						||
| 
								 | 
							
									stw r12, 48(sp)
							 | 
						||
| 
								 | 
							
									stw r13, 52(sp)
							 | 
						||
| 
								 | 
							
									stw r14, 56(sp)
							 | 
						||
| 
								 | 
							
									stw r15, 60(sp)
							 | 
						||
| 
								 | 
							
									stw r16, 64(sp)
							 | 
						||
| 
								 | 
							
									stw r17, 68(sp)
							 | 
						||
| 
								 | 
							
									stw r18, 72(sp)
							 | 
						||
| 
								 | 
							
									stw r19, 76(sp)
							 | 
						||
| 
								 | 
							
									stw r20, 80(sp)
							 | 
						||
| 
								 | 
							
									stw r21, 84(sp)
							 | 
						||
| 
								 | 
							
									stw r22, 88(sp)
							 | 
						||
| 
								 | 
							
									stw r23, 92(sp)
							 | 
						||
| 
								 | 
							
										/* Don't bother to save et.  It's already been changed. */
							 | 
						||
| 
								 | 
							
									rdctl r5, estatus
							 | 
						||
| 
								 | 
							
									stw r5,  100(sp)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									stw gp, 104(sp)
							 | 
						||
| 
								 | 
							
									stw et, 108(sp)	/* et contains previous sp value. */
							 | 
						||
| 
								 | 
							
									stw fp, 112(sp)
							 | 
						||
| 
								 | 
							
									stw ea, 116(sp)
							 | 
						||
| 
								 | 
							
									stw ra, 120(sp)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/*
							 | 
						||
| 
								 | 
							
									* Split the instruction into its fields.  We need 4*A, 4*B, and 4*C as
							 | 
						||
| 
								 | 
							
									* offsets to the stack pointer for access to the stored register values.
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
									ldw r2,-4(ea)	/* r2 = AAAAA,BBBBB,IIIIIIIIIIIIIIII,PPPPPP */
							 | 
						||
| 
								 | 
							
									roli r3, r2, 7	/* r3 = BBB,IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BB */
							 | 
						||
| 
								 | 
							
									roli r4, r3, 3	/* r4 = IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB */
							 | 
						||
| 
								 | 
							
									roli r5, r4, 2	/* r5 = IIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB,II */
							 | 
						||
| 
								 | 
							
									srai r4, r4, 16	/* r4 = (sign-extended) IMM16 */
							 | 
						||
| 
								 | 
							
									roli r6, r5, 5	/* r6 = XXXX,NNNNN,PPPPPP,AAAAA,BBBBB,CCCCC,XX */
							 | 
						||
| 
								 | 
							
									andi r2, r2, 0x3f	/* r2 = 00000000000000000000000000,PPPPPP */
							 | 
						||
| 
								 | 
							
									andi r3, r3, 0x7c	/* r3 = 0000000000000000000000000,AAAAA,00 */
							 | 
						||
| 
								 | 
							
									andi r5, r5, 0x7c	/* r5 = 0000000000000000000000000,BBBBB,00 */
							 | 
						||
| 
								 | 
							
									andi r6, r6, 0x7c	/* r6 = 0000000000000000000000000,CCCCC,00 */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/* Now
							 | 
						||
| 
								 | 
							
									* r2 = OP
							 | 
						||
| 
								 | 
							
									* r3 = 4*A
							 | 
						||
| 
								 | 
							
									* r4 = IMM16 (sign extended)
							 | 
						||
| 
								 | 
							
									* r5 = 4*B
							 | 
						||
| 
								 | 
							
									* r6 = 4*C
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/*
							 | 
						||
| 
								 | 
							
									* Get the operands.
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									* It is necessary to check for muli because it uses an I-type
							 | 
						||
| 
								 | 
							
									* instruction format, while the other instructions are have an R-type
							 | 
						||
| 
								 | 
							
									* format.
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									*  Prepare for either multiplication or division loop.
							 | 
						||
| 
								 | 
							
									*  They both loop 32 times.
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
									movi r14, 32
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									add  r3, r3, sp		/* r3 = address of A-operand. */
							 | 
						||
| 
								 | 
							
									ldw  r3, 0(r3)		/* r3 = A-operand. */
							 | 
						||
| 
								 | 
							
									movi r7, 0x24		/* muli opcode (I-type instruction format) */
							 | 
						||
| 
								 | 
							
									beq r2, r7, mul_immed /* muli doesn't use the B register as a source */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									add  r5, r5, sp		/* r5 = address of B-operand. */
							 | 
						||
| 
								 | 
							
									ldw  r5, 0(r5)		/* r5 = B-operand. */
							 | 
						||
| 
								 | 
							
												/* r4 = SSSSSSSSSSSSSSSS,-----IMM16------ */
							 | 
						||
| 
								 | 
							
												/* IMM16 not needed, align OPX portion */
							 | 
						||
| 
								 | 
							
												/* r4 = SSSSSSSSSSSSSSSS,CCCCC,-OPX--,00000 */
							 | 
						||
| 
								 | 
							
									srli r4, r4, 5		/* r4 = 00000,SSSSSSSSSSSSSSSS,CCCCC,-OPX-- */
							 | 
						||
| 
								 | 
							
									andi r4, r4, 0x3f	/* r4 = 00000000000000000000000000,-OPX-- */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/* Now
							 | 
						||
| 
								 | 
							
									* r2 = OP
							 | 
						||
| 
								 | 
							
									* r3 = src1
							 | 
						||
| 
								 | 
							
									* r5 = src2
							 | 
						||
| 
								 | 
							
									* r4 = OPX (no longer can be muli)
							 | 
						||
| 
								 | 
							
									* r6 = 4*C
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/*
							 | 
						||
| 
								 | 
							
									*  Multiply or Divide?
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
									andi r7, r4, 0x02	/* For R-type multiply instructions,
							 | 
						||
| 
								 | 
							
												   OPX & 0x02 != 0 */
							 | 
						||
| 
								 | 
							
									bne r7, zero, multiply
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/* DIVISION
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									* Divide an unsigned dividend by an unsigned divisor using
							 | 
						||
| 
								 | 
							
									* a shift-and-subtract algorithm.  The example below shows
							 | 
						||
| 
								 | 
							
									* 43 div 7 = 6 for 8-bit integers.  This classic algorithm uses a
							 | 
						||
| 
								 | 
							
									* single register to store both the dividend and the quotient,
							 | 
						||
| 
								 | 
							
									* allowing both values to be shifted with a single instruction.
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									*                               remainder dividend:quotient
							 | 
						||
| 
								 | 
							
									*                               --------- -----------------
							 | 
						||
| 
								 | 
							
									*   initialize                   00000000     00101011:
							 | 
						||
| 
								 | 
							
									*   shift                        00000000     0101011:_
							 | 
						||
| 
								 | 
							
									*   remainder >= divisor? no     00000000     0101011:0
							 | 
						||
| 
								 | 
							
									*   shift                        00000000     101011:0_
							 | 
						||
| 
								 | 
							
									*   remainder >= divisor? no     00000000     101011:00
							 | 
						||
| 
								 | 
							
									*   shift                        00000001     01011:00_
							 | 
						||
| 
								 | 
							
									*   remainder >= divisor? no     00000001     01011:000
							 | 
						||
| 
								 | 
							
									*   shift                        00000010     1011:000_
							 | 
						||
| 
								 | 
							
									*   remainder >= divisor? no     00000010     1011:0000
							 | 
						||
| 
								 | 
							
									*   shift                        00000101     011:0000_
							 | 
						||
| 
								 | 
							
									*   remainder >= divisor? no     00000101     011:00000
							 | 
						||
| 
								 | 
							
									*   shift                        00001010     11:00000_
							 | 
						||
| 
								 | 
							
									*   remainder >= divisor? yes    00001010     11:000001
							 | 
						||
| 
								 | 
							
									*       remainder -= divisor   - 00000111
							 | 
						||
| 
								 | 
							
									*                              ----------
							 | 
						||
| 
								 | 
							
									*                                00000011     11:000001
							 | 
						||
| 
								 | 
							
									*   shift                        00000111     1:000001_
							 | 
						||
| 
								 | 
							
									*   remainder >= divisor? yes    00000111     1:0000011
							 | 
						||
| 
								 | 
							
									*       remainder -= divisor   - 00000111
							 | 
						||
| 
								 | 
							
									*                              ----------
							 | 
						||
| 
								 | 
							
									*                                00000000     1:0000011
							 | 
						||
| 
								 | 
							
									*   shift                        00000001     :0000011_
							 | 
						||
| 
								 | 
							
									*   remainder >= divisor? no     00000001     :00000110
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									* The quotient is 00000110.
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								divide:
							 | 
						||
| 
								 | 
							
									/*
							 | 
						||
| 
								 | 
							
									*  Prepare for division by assuming the result
							 | 
						||
| 
								 | 
							
									*  is unsigned, and storing its "sign" as 0.
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
									movi r17, 0
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/* Which division opcode? */
							 | 
						||
| 
								 | 
							
									xori r7, r4, 0x25		/* OPX of div */
							 | 
						||
| 
								 | 
							
									bne r7, zero, unsigned_division
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/*
							 | 
						||
| 
								 | 
							
									*  OPX is div.  Determine and store the sign of the quotient.
							 | 
						||
| 
								 | 
							
									*  Then take the absolute value of both operands.
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
									xor r17, r3, r5		/* MSB contains sign of quotient */
							 | 
						||
| 
								 | 
							
									bge r3,zero,dividend_is_nonnegative
							 | 
						||
| 
								 | 
							
									sub r3, zero, r3	/* -r3 */
							 | 
						||
| 
								 | 
							
								dividend_is_nonnegative:
							 | 
						||
| 
								 | 
							
									bge r5, zero, divisor_is_nonnegative
							 | 
						||
| 
								 | 
							
									sub r5, zero, r5	/* -r5 */
							 | 
						||
| 
								 | 
							
								divisor_is_nonnegative:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								unsigned_division:
							 | 
						||
| 
								 | 
							
									/* Initialize the unsigned-division loop. */
							 | 
						||
| 
								 | 
							
									movi r13, 0	/* remainder = 0 */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/* Now
							 | 
						||
| 
								 | 
							
									* r3 = dividend : quotient
							 | 
						||
| 
								 | 
							
									* r4 = 0x25 for div, 0x24 for divu
							 | 
						||
| 
								 | 
							
									* r5 = divisor
							 | 
						||
| 
								 | 
							
									* r13 = remainder
							 | 
						||
| 
								 | 
							
									* r14 = loop counter (already initialized to 32)
							 | 
						||
| 
								 | 
							
									* r17 = MSB contains sign of quotient
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/*
							 | 
						||
| 
								 | 
							
									*   for (count = 32; count > 0; --count)
							 | 
						||
| 
								 | 
							
									*   {
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
								divide_loop:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/*
							 | 
						||
| 
								 | 
							
									*       Division:
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									*       (remainder:dividend:quotient) <<= 1;
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
									slli r13, r13, 1
							 | 
						||
| 
								 | 
							
									cmplt r7, r3, zero	/* r7 = MSB of r3 */
							 | 
						||
| 
								 | 
							
									or r13, r13, r7
							 | 
						||
| 
								 | 
							
									slli r3, r3, 1
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/*
							 | 
						||
| 
								 | 
							
									*       if (remainder >= divisor)
							 | 
						||
| 
								 | 
							
									*       {
							 | 
						||
| 
								 | 
							
									*           set LSB of quotient
							 | 
						||
| 
								 | 
							
									*           remainder -= divisor;
							 | 
						||
| 
								 | 
							
									*       }
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
									bltu r13, r5, div_skip
							 | 
						||
| 
								 | 
							
									ori r3, r3, 1
							 | 
						||
| 
								 | 
							
									sub r13, r13, r5
							 | 
						||
| 
								 | 
							
								div_skip:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/*
							 | 
						||
| 
								 | 
							
									*   }
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
									subi r14, r14, 1
							 | 
						||
| 
								 | 
							
									bne r14, zero, divide_loop
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/* Now
							 | 
						||
| 
								 | 
							
									* r3 = quotient
							 | 
						||
| 
								 | 
							
									* r4 = 0x25 for div, 0x24 for divu
							 | 
						||
| 
								 | 
							
									* r6 = 4*C
							 | 
						||
| 
								 | 
							
									* r17 = MSB contains sign of quotient
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/*
							 | 
						||
| 
								 | 
							
									*  Conditionally negate signed quotient.  If quotient is unsigned,
							 | 
						||
| 
								 | 
							
									*  the sign already is initialized to 0.
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
									bge r17, zero, quotient_is_nonnegative
							 | 
						||
| 
								 | 
							
									sub r3, zero, r3		/* -r3 */
							 | 
						||
| 
								 | 
							
									quotient_is_nonnegative:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/*
							 | 
						||
| 
								 | 
							
									*  Final quotient is in r3.
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
									add r6, r6, sp
							 | 
						||
| 
								 | 
							
									stw r3, 0(r6)	/* write quotient to stack */
							 | 
						||
| 
								 | 
							
									br restore_registers
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/* MULTIPLICATION
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									* A "product" is the number that one gets by summing a "multiplicand"
							 | 
						||
| 
								 | 
							
									* several times.  The "multiplier" specifies the number of copies of the
							 | 
						||
| 
								 | 
							
									* multiplicand that are summed.
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									* Actual multiplication algorithms don't use repeated addition, however.
							 | 
						||
| 
								 | 
							
									* Shift-and-add algorithms get the same answer as repeated addition, and
							 | 
						||
| 
								 | 
							
									* they are faster.  To compute the lower half of a product (pppp below)
							 | 
						||
| 
								 | 
							
									* one shifts the product left before adding in each of the partial
							 | 
						||
| 
								 | 
							
									* products (a * mmmm) through (d * mmmm).
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									* To compute the upper half of a product (PPPP below), one adds in the
							 | 
						||
| 
								 | 
							
									* partial products (d * mmmm) through (a * mmmm), each time following
							 | 
						||
| 
								 | 
							
									* the add by a right shift of the product.
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									*     mmmm
							 | 
						||
| 
								 | 
							
									*   * abcd
							 | 
						||
| 
								 | 
							
									*   ------
							 | 
						||
| 
								 | 
							
									*     ####  = d * mmmm
							 | 
						||
| 
								 | 
							
									*    ####   = c * mmmm
							 | 
						||
| 
								 | 
							
									*   ####    = b * mmmm
							 | 
						||
| 
								 | 
							
									*  ####     = a * mmmm
							 | 
						||
| 
								 | 
							
									* --------
							 | 
						||
| 
								 | 
							
									* PPPPpppp
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									* The example above shows 4 partial products.  Computing actual Nios II
							 | 
						||
| 
								 | 
							
									* products requires 32 partials.
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									* It is possible to compute the result of mulxsu from the result of
							 | 
						||
| 
								 | 
							
									* mulxuu because the only difference between the results of these two
							 | 
						||
| 
								 | 
							
									* opcodes is the value of the partial product associated with the sign
							 | 
						||
| 
								 | 
							
									* bit of rA.
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									*   mulxsu = mulxuu - (rA < 0) ? rB : 0;
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									* It is possible to compute the result of mulxss from the result of
							 | 
						||
| 
								 | 
							
									* mulxsu because the only difference between the results of these two
							 | 
						||
| 
								 | 
							
									* opcodes is the value of the partial product associated with the sign
							 | 
						||
| 
								 | 
							
									* bit of rB.
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									*   mulxss = mulxsu - (rB < 0) ? rA : 0;
							 | 
						||
| 
								 | 
							
									*
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								mul_immed:
							 | 
						||
| 
								 | 
							
									/* Opcode is muli.  Change it into mul for remainder of algorithm. */
							 | 
						||
| 
								 | 
							
									mov r6, r5		/* Field B is dest register, not field C. */
							 | 
						||
| 
								 | 
							
									mov r5, r4		/* Field IMM16 is src2, not field B. */
							 | 
						||
| 
								 | 
							
									movi r4, 0x27		/* OPX of mul is 0x27 */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								multiply:
							 | 
						||
| 
								 | 
							
									/* Initialize the multiplication loop. */
							 | 
						||
| 
								 | 
							
									movi r9, 0	/* mul_product    = 0 */
							 | 
						||
| 
								 | 
							
									movi r10, 0	/* mulxuu_product = 0 */
							 | 
						||
| 
								 | 
							
									mov r11, r5	/* save original multiplier for mulxsu and mulxss */
							 | 
						||
| 
								 | 
							
									mov r12, r5	/* mulxuu_multiplier (will be shifted) */
							 | 
						||
| 
								 | 
							
									movi r16, 1	/* used to create "rori B,A,1" from "ror B,A,r16" */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/* Now
							 | 
						||
| 
								 | 
							
									* r3 = multiplicand
							 | 
						||
| 
								 | 
							
									* r5 = mul_multiplier
							 | 
						||
| 
								 | 
							
									* r6 = 4 * dest_register (used later as offset to sp)
							 | 
						||
| 
								 | 
							
									* r7 = temp
							 | 
						||
| 
								 | 
							
									* r9 = mul_product
							 | 
						||
| 
								 | 
							
									* r10 = mulxuu_product
							 | 
						||
| 
								 | 
							
									* r11 = original multiplier
							 | 
						||
| 
								 | 
							
									* r12 = mulxuu_multiplier
							 | 
						||
| 
								 | 
							
									* r14 = loop counter (already initialized)
							 | 
						||
| 
								 | 
							
									* r16 = 1
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/*
							 | 
						||
| 
								 | 
							
									*   for (count = 32; count > 0; --count)
							 | 
						||
| 
								 | 
							
									*   {
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
								multiply_loop:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/*
							 | 
						||
| 
								 | 
							
									*       mul_product <<= 1;
							 | 
						||
| 
								 | 
							
									*       lsb = multiplier & 1;
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
									slli r9, r9, 1
							 | 
						||
| 
								 | 
							
									andi r7, r12, 1
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/*
							 | 
						||
| 
								 | 
							
									*       if (lsb == 1)
							 | 
						||
| 
								 | 
							
									*       {
							 | 
						||
| 
								 | 
							
									*           mulxuu_product += multiplicand;
							 | 
						||
| 
								 | 
							
									*       }
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
									beq r7, zero, mulx_skip
							 | 
						||
| 
								 | 
							
									add r10, r10, r3
							 | 
						||
| 
								 | 
							
									cmpltu r7, r10, r3 /* Save the carry from the MSB of mulxuu_product. */
							 | 
						||
| 
								 | 
							
									ror r7, r7, r16	/* r7 = 0x80000000 on carry, or else 0x00000000 */
							 | 
						||
| 
								 | 
							
								mulx_skip:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/*
							 | 
						||
| 
								 | 
							
									*       if (MSB of mul_multiplier == 1)
							 | 
						||
| 
								 | 
							
									*       {
							 | 
						||
| 
								 | 
							
									*           mul_product += multiplicand;
							 | 
						||
| 
								 | 
							
									*       }
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
									bge r5, zero, mul_skip
							 | 
						||
| 
								 | 
							
									add r9, r9, r3
							 | 
						||
| 
								 | 
							
								mul_skip:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/*
							 | 
						||
| 
								 | 
							
									*       mulxuu_product >>= 1;           logical shift
							 | 
						||
| 
								 | 
							
									*       mul_multiplier <<= 1;           done with MSB
							 | 
						||
| 
								 | 
							
									*       mulx_multiplier >>= 1;          done with LSB
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
									srli r10, r10, 1
							 | 
						||
| 
								 | 
							
									or r10, r10, r7		/* OR in the saved carry bit. */
							 | 
						||
| 
								 | 
							
									slli r5, r5, 1
							 | 
						||
| 
								 | 
							
									srli r12, r12, 1
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/*
							 | 
						||
| 
								 | 
							
									*   }
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
									subi r14, r14, 1
							 | 
						||
| 
								 | 
							
									bne r14, zero, multiply_loop
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/*
							 | 
						||
| 
								 | 
							
									*  Multiply emulation loop done.
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/* Now
							 | 
						||
| 
								 | 
							
									* r3 = multiplicand
							 | 
						||
| 
								 | 
							
									* r4 = OPX
							 | 
						||
| 
								 | 
							
									* r6 = 4 * dest_register (used later as offset to sp)
							 | 
						||
| 
								 | 
							
									* r7 = temp
							 | 
						||
| 
								 | 
							
									* r9 = mul_product
							 | 
						||
| 
								 | 
							
									* r10 = mulxuu_product
							 | 
						||
| 
								 | 
							
									* r11 = original multiplier
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/* Calculate address for result from 4 * dest_register */
							 | 
						||
| 
								 | 
							
									add r6, r6, sp
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/*
							 | 
						||
| 
								 | 
							
									* Select/compute the result based on OPX.
							 | 
						||
| 
								 | 
							
									*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/* OPX == mul?  Then store. */
							 | 
						||
| 
								 | 
							
									xori r7, r4, 0x27
							 | 
						||
| 
								 | 
							
									beq r7, zero, store_product
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/* It's one of the mulx.. opcodes.  Move over the result. */
							 | 
						||
| 
								 | 
							
									mov r9, r10
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/* OPX == mulxuu?  Then store. */
							 | 
						||
| 
								 | 
							
									xori r7, r4, 0x07
							 | 
						||
| 
								 | 
							
									beq r7, zero, store_product
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/* Compute mulxsu
							 | 
						||
| 
								 | 
							
									 *
							 | 
						||
| 
								 | 
							
									 * mulxsu = mulxuu - (rA < 0) ? rB : 0;
							 | 
						||
| 
								 | 
							
									 */
							 | 
						||
| 
								 | 
							
									bge r3, zero, mulxsu_skip
							 | 
						||
| 
								 | 
							
									sub r9, r9, r11
							 | 
						||
| 
								 | 
							
								mulxsu_skip:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/* OPX == mulxsu?  Then store. */
							 | 
						||
| 
								 | 
							
									xori r7, r4, 0x17
							 | 
						||
| 
								 | 
							
									beq r7, zero, store_product
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									/* Compute mulxss
							 | 
						||
| 
								 | 
							
									 *
							 | 
						||
| 
								 | 
							
									 * mulxss = mulxsu - (rB < 0) ? rA : 0;
							 | 
						||
| 
								 | 
							
									 */
							 | 
						||
| 
								 | 
							
									bge r11,zero,mulxss_skip
							 | 
						||
| 
								 | 
							
									sub r9, r9, r3
							 | 
						||
| 
								 | 
							
								mulxss_skip:
							 | 
						||
| 
								 | 
							
									/* At this point, assume that OPX is mulxss, so store*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								store_product:
							 | 
						||
| 
								 | 
							
									stw r9, 0(r6)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								restore_registers:
							 | 
						||
| 
								 | 
							
											/* No need to restore r0. */
							 | 
						||
| 
								 | 
							
									ldw r5, 100(sp)
							 | 
						||
| 
								 | 
							
									wrctl estatus, r5
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									ldw r1, 4(sp)
							 | 
						||
| 
								 | 
							
									ldw r2, 8(sp)
							 | 
						||
| 
								 | 
							
									ldw r3, 12(sp)
							 | 
						||
| 
								 | 
							
									ldw r4, 16(sp)
							 | 
						||
| 
								 | 
							
									ldw r5, 20(sp)
							 | 
						||
| 
								 | 
							
									ldw r6, 24(sp)
							 | 
						||
| 
								 | 
							
									ldw r7, 28(sp)
							 | 
						||
| 
								 | 
							
									ldw r8, 32(sp)
							 | 
						||
| 
								 | 
							
									ldw r9, 36(sp)
							 | 
						||
| 
								 | 
							
									ldw r10, 40(sp)
							 | 
						||
| 
								 | 
							
									ldw r11, 44(sp)
							 | 
						||
| 
								 | 
							
									ldw r12, 48(sp)
							 | 
						||
| 
								 | 
							
									ldw r13, 52(sp)
							 | 
						||
| 
								 | 
							
									ldw r14, 56(sp)
							 | 
						||
| 
								 | 
							
									ldw r15, 60(sp)
							 | 
						||
| 
								 | 
							
									ldw r16, 64(sp)
							 | 
						||
| 
								 | 
							
									ldw r17, 68(sp)
							 | 
						||
| 
								 | 
							
									ldw r18, 72(sp)
							 | 
						||
| 
								 | 
							
									ldw r19, 76(sp)
							 | 
						||
| 
								 | 
							
									ldw r20, 80(sp)
							 | 
						||
| 
								 | 
							
									ldw r21, 84(sp)
							 | 
						||
| 
								 | 
							
									ldw r22, 88(sp)
							 | 
						||
| 
								 | 
							
									ldw r23, 92(sp)
							 | 
						||
| 
								 | 
							
											/* Does not need to restore et */
							 | 
						||
| 
								 | 
							
									ldw gp, 104(sp)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
									ldw fp, 112(sp)
							 | 
						||
| 
								 | 
							
									ldw ea, 116(sp)
							 | 
						||
| 
								 | 
							
									ldw ra, 120(sp)
							 | 
						||
| 
								 | 
							
									ldw sp, 108(sp)	/* last restore sp */
							 | 
						||
| 
								 | 
							
									eret
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								.set at
							 | 
						||
| 
								 | 
							
								.set break
							 |