352 lines
		
	
	
	
		
			7.7 KiB
			
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
		
		
			
		
	
	
			352 lines
		
	
	
	
		
			7.7 KiB
			
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								 * Fast AES implementation for SPE instruction set (PPC)
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * This code makes use of the SPE SIMD instruction set as defined in
							 | 
						||
| 
								 | 
							
								 * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
							 | 
						||
| 
								 | 
							
								 * Implementation is based on optimization guide notes from
							 | 
						||
| 
								 | 
							
								 * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * This program is free software; you can redistribute it and/or modify it
							 | 
						||
| 
								 | 
							
								 * under the terms of the GNU General Public License as published by the Free
							 | 
						||
| 
								 | 
							
								 * Software Foundation; either version 2 of the License, or (at your option)
							 | 
						||
| 
								 | 
							
								 * any later version.
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#include <asm/ppc_asm.h>
							 | 
						||
| 
								 | 
							
								#include "aes-spe-regs.h"
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#define	EAD(in, bpos) \
							 | 
						||
| 
								 | 
							
									rlwimi		rT0,in,28-((bpos+3)%4)*8,20,27;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#define DAD(in, bpos) \
							 | 
						||
| 
								 | 
							
									rlwimi		rT1,in,24-((bpos+3)%4)*8,24,31;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#define LWH(out, off) \
							 | 
						||
| 
								 | 
							
									evlwwsplat	out,off(rT0);	/* load word high		*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#define LWL(out, off) \
							 | 
						||
| 
								 | 
							
									lwz		out,off(rT0);	/* load word low		*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#define LBZ(out, tab, off) \
							 | 
						||
| 
								 | 
							
									lbz		out,off(tab);	/* load byte			*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#define LAH(out, in, bpos, off) \
							 | 
						||
| 
								 | 
							
									EAD(in, bpos)			/* calc addr + load word high	*/ \
							 | 
						||
| 
								 | 
							
									LWH(out, off)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#define LAL(out, in, bpos, off) \
							 | 
						||
| 
								 | 
							
									EAD(in, bpos)			/* calc addr + load word low	*/ \
							 | 
						||
| 
								 | 
							
									LWL(out, off)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#define LAE(out, in, bpos) \
							 | 
						||
| 
								 | 
							
									EAD(in, bpos)			/* calc addr + load enc byte	*/ \
							 | 
						||
| 
								 | 
							
									LBZ(out, rT0, 8)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#define LBE(out) \
							 | 
						||
| 
								 | 
							
									LBZ(out, rT0, 8)		/* load enc byte		*/
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#define LAD(out, in, bpos) \
							 | 
						||
| 
								 | 
							
									DAD(in, bpos)			/* calc addr + load dec byte	*/ \
							 | 
						||
| 
								 | 
							
									LBZ(out, rT1, 0)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#define LBD(out) \
							 | 
						||
| 
								 | 
							
									LBZ(out, rT1, 0)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								 * ppc_encrypt_block: The central encryption function for a single 16 bytes
							 | 
						||
| 
								 | 
							
								 * block. It does no stack handling or register saving to support fast calls
							 | 
						||
| 
								 | 
							
								 * via bl/blr. It expects that caller has pre-xored input data with first
							 | 
						||
| 
								 | 
							
								 * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
							 | 
						||
| 
								 | 
							
								 * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
							 | 
						||
| 
								 | 
							
								 * and rW0-rW3 and caller must execute a final xor on the ouput registers.
							 | 
						||
| 
								 | 
							
								 * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								_GLOBAL(ppc_encrypt_block)
							 | 
						||
| 
								 | 
							
									LAH(rW4, rD1, 2, 4)
							 | 
						||
| 
								 | 
							
									LAH(rW6, rD0, 3, 0)
							 | 
						||
| 
								 | 
							
									LAH(rW3, rD0, 1, 8)
							 | 
						||
| 
								 | 
							
								ppc_encrypt_block_loop:
							 | 
						||
| 
								 | 
							
									LAH(rW0, rD3, 0, 12)
							 | 
						||
| 
								 | 
							
									LAL(rW0, rD0, 0, 12)
							 | 
						||
| 
								 | 
							
									LAH(rW1, rD1, 0, 12)
							 | 
						||
| 
								 | 
							
									LAH(rW2, rD2, 1, 8)
							 | 
						||
| 
								 | 
							
									LAL(rW2, rD3, 1, 8)
							 | 
						||
| 
								 | 
							
									LAL(rW3, rD1, 1, 8)
							 | 
						||
| 
								 | 
							
									LAL(rW4, rD2, 2, 4)
							 | 
						||
| 
								 | 
							
									LAL(rW6, rD1, 3, 0)
							 | 
						||
| 
								 | 
							
									LAH(rW5, rD3, 2, 4)
							 | 
						||
| 
								 | 
							
									LAL(rW5, rD0, 2, 4)
							 | 
						||
| 
								 | 
							
									LAH(rW7, rD2, 3, 0)
							 | 
						||
| 
								 | 
							
									evldw		rD1,16(rKP)
							 | 
						||
| 
								 | 
							
									EAD(rD3, 3)
							 | 
						||
| 
								 | 
							
									evxor		rW2,rW2,rW4
							 | 
						||
| 
								 | 
							
									LWL(rW7, 0)
							 | 
						||
| 
								 | 
							
									evxor		rW2,rW2,rW6
							 | 
						||
| 
								 | 
							
									EAD(rD2, 0)
							 | 
						||
| 
								 | 
							
									evxor		rD1,rD1,rW2
							 | 
						||
| 
								 | 
							
									LWL(rW1, 12)
							 | 
						||
| 
								 | 
							
									evxor		rD1,rD1,rW0
							 | 
						||
| 
								 | 
							
									evldw		rD3,24(rKP)
							 | 
						||
| 
								 | 
							
									evmergehi	rD0,rD0,rD1
							 | 
						||
| 
								 | 
							
									EAD(rD1, 2)
							 | 
						||
| 
								 | 
							
									evxor		rW3,rW3,rW5
							 | 
						||
| 
								 | 
							
									LWH(rW4, 4)
							 | 
						||
| 
								 | 
							
									evxor		rW3,rW3,rW7
							 | 
						||
| 
								 | 
							
									EAD(rD0, 3)
							 | 
						||
| 
								 | 
							
									evxor		rD3,rD3,rW3
							 | 
						||
| 
								 | 
							
									LWH(rW6, 0)
							 | 
						||
| 
								 | 
							
									evxor		rD3,rD3,rW1
							 | 
						||
| 
								 | 
							
									EAD(rD0, 1)
							 | 
						||
| 
								 | 
							
									evmergehi	rD2,rD2,rD3
							 | 
						||
| 
								 | 
							
									LWH(rW3, 8)
							 | 
						||
| 
								 | 
							
									LAH(rW0, rD3, 0, 12)
							 | 
						||
| 
								 | 
							
									LAL(rW0, rD0, 0, 12)
							 | 
						||
| 
								 | 
							
									LAH(rW1, rD1, 0, 12)
							 | 
						||
| 
								 | 
							
									LAH(rW2, rD2, 1, 8)
							 | 
						||
| 
								 | 
							
									LAL(rW2, rD3, 1, 8)
							 | 
						||
| 
								 | 
							
									LAL(rW3, rD1, 1, 8)
							 | 
						||
| 
								 | 
							
									LAL(rW4, rD2, 2, 4)
							 | 
						||
| 
								 | 
							
									LAL(rW6, rD1, 3, 0)
							 | 
						||
| 
								 | 
							
									LAH(rW5, rD3, 2, 4)
							 | 
						||
| 
								 | 
							
									LAL(rW5, rD0, 2, 4)
							 | 
						||
| 
								 | 
							
									LAH(rW7, rD2, 3, 0)
							 | 
						||
| 
								 | 
							
									evldw		rD1,32(rKP)
							 | 
						||
| 
								 | 
							
									EAD(rD3, 3)
							 | 
						||
| 
								 | 
							
									evxor		rW2,rW2,rW4
							 | 
						||
| 
								 | 
							
									LWL(rW7, 0)
							 | 
						||
| 
								 | 
							
									evxor		rW2,rW2,rW6
							 | 
						||
| 
								 | 
							
									EAD(rD2, 0)
							 | 
						||
| 
								 | 
							
									evxor		rD1,rD1,rW2
							 | 
						||
| 
								 | 
							
									LWL(rW1, 12)
							 | 
						||
| 
								 | 
							
									evxor		rD1,rD1,rW0
							 | 
						||
| 
								 | 
							
									evldw		rD3,40(rKP)
							 | 
						||
| 
								 | 
							
									evmergehi	rD0,rD0,rD1
							 | 
						||
| 
								 | 
							
									EAD(rD1, 2)
							 | 
						||
| 
								 | 
							
									evxor		rW3,rW3,rW5
							 | 
						||
| 
								 | 
							
									LWH(rW4, 4)
							 | 
						||
| 
								 | 
							
									evxor		rW3,rW3,rW7
							 | 
						||
| 
								 | 
							
									EAD(rD0, 3)
							 | 
						||
| 
								 | 
							
									evxor		rD3,rD3,rW3
							 | 
						||
| 
								 | 
							
									LWH(rW6, 0)
							 | 
						||
| 
								 | 
							
									evxor		rD3,rD3,rW1
							 | 
						||
| 
								 | 
							
									EAD(rD0, 1)
							 | 
						||
| 
								 | 
							
									evmergehi	rD2,rD2,rD3
							 | 
						||
| 
								 | 
							
									LWH(rW3, 8)
							 | 
						||
| 
								 | 
							
									addi		rKP,rKP,32
							 | 
						||
| 
								 | 
							
									bdnz		ppc_encrypt_block_loop
							 | 
						||
| 
								 | 
							
									LAH(rW0, rD3, 0, 12)
							 | 
						||
| 
								 | 
							
									LAL(rW0, rD0, 0, 12)
							 | 
						||
| 
								 | 
							
									LAH(rW1, rD1, 0, 12)
							 | 
						||
| 
								 | 
							
									LAH(rW2, rD2, 1, 8)
							 | 
						||
| 
								 | 
							
									LAL(rW2, rD3, 1, 8)
							 | 
						||
| 
								 | 
							
									LAL(rW3, rD1, 1, 8)
							 | 
						||
| 
								 | 
							
									LAL(rW4, rD2, 2, 4)
							 | 
						||
| 
								 | 
							
									LAH(rW5, rD3, 2, 4)
							 | 
						||
| 
								 | 
							
									LAL(rW6, rD1, 3, 0)
							 | 
						||
| 
								 | 
							
									LAL(rW5, rD0, 2, 4)
							 | 
						||
| 
								 | 
							
									LAH(rW7, rD2, 3, 0)
							 | 
						||
| 
								 | 
							
									evldw		rD1,16(rKP)
							 | 
						||
| 
								 | 
							
									EAD(rD3, 3)
							 | 
						||
| 
								 | 
							
									evxor		rW2,rW2,rW4
							 | 
						||
| 
								 | 
							
									LWL(rW7, 0)
							 | 
						||
| 
								 | 
							
									evxor		rW2,rW2,rW6
							 | 
						||
| 
								 | 
							
									EAD(rD2, 0)
							 | 
						||
| 
								 | 
							
									evxor		rD1,rD1,rW2
							 | 
						||
| 
								 | 
							
									LWL(rW1, 12)
							 | 
						||
| 
								 | 
							
									evxor		rD1,rD1,rW0
							 | 
						||
| 
								 | 
							
									evldw		rD3,24(rKP)
							 | 
						||
| 
								 | 
							
									evmergehi	rD0,rD0,rD1
							 | 
						||
| 
								 | 
							
									EAD(rD1, 0)
							 | 
						||
| 
								 | 
							
									evxor		rW3,rW3,rW5
							 | 
						||
| 
								 | 
							
									LBE(rW2)
							 | 
						||
| 
								 | 
							
									evxor		rW3,rW3,rW7
							 | 
						||
| 
								 | 
							
									EAD(rD0, 1)
							 | 
						||
| 
								 | 
							
									evxor		rD3,rD3,rW3
							 | 
						||
| 
								 | 
							
									LBE(rW6)
							 | 
						||
| 
								 | 
							
									evxor		rD3,rD3,rW1
							 | 
						||
| 
								 | 
							
									EAD(rD0, 0)
							 | 
						||
| 
								 | 
							
									evmergehi	rD2,rD2,rD3
							 | 
						||
| 
								 | 
							
									LBE(rW1)
							 | 
						||
| 
								 | 
							
									LAE(rW0, rD3, 0)
							 | 
						||
| 
								 | 
							
									LAE(rW1, rD0, 0)
							 | 
						||
| 
								 | 
							
									LAE(rW4, rD2, 1)
							 | 
						||
| 
								 | 
							
									LAE(rW5, rD3, 1)
							 | 
						||
| 
								 | 
							
									LAE(rW3, rD2, 0)
							 | 
						||
| 
								 | 
							
									LAE(rW7, rD1, 1)
							 | 
						||
| 
								 | 
							
									rlwimi		rW0,rW4,8,16,23
							 | 
						||
| 
								 | 
							
									rlwimi		rW1,rW5,8,16,23
							 | 
						||
| 
								 | 
							
									LAE(rW4, rD1, 2)
							 | 
						||
| 
								 | 
							
									LAE(rW5, rD2, 2)
							 | 
						||
| 
								 | 
							
									rlwimi		rW2,rW6,8,16,23
							 | 
						||
| 
								 | 
							
									rlwimi		rW3,rW7,8,16,23
							 | 
						||
| 
								 | 
							
									LAE(rW6, rD3, 2)
							 | 
						||
| 
								 | 
							
									LAE(rW7, rD0, 2)
							 | 
						||
| 
								 | 
							
									rlwimi		rW0,rW4,16,8,15
							 | 
						||
| 
								 | 
							
									rlwimi		rW1,rW5,16,8,15
							 | 
						||
| 
								 | 
							
									LAE(rW4, rD0, 3)
							 | 
						||
| 
								 | 
							
									LAE(rW5, rD1, 3)
							 | 
						||
| 
								 | 
							
									rlwimi		rW2,rW6,16,8,15
							 | 
						||
| 
								 | 
							
									lwz		rD0,32(rKP)
							 | 
						||
| 
								 | 
							
									rlwimi		rW3,rW7,16,8,15
							 | 
						||
| 
								 | 
							
									lwz		rD1,36(rKP)
							 | 
						||
| 
								 | 
							
									LAE(rW6, rD2, 3)
							 | 
						||
| 
								 | 
							
									LAE(rW7, rD3, 3)
							 | 
						||
| 
								 | 
							
									rlwimi		rW0,rW4,24,0,7
							 | 
						||
| 
								 | 
							
									lwz		rD2,40(rKP)
							 | 
						||
| 
								 | 
							
									rlwimi		rW1,rW5,24,0,7
							 | 
						||
| 
								 | 
							
									lwz		rD3,44(rKP)
							 | 
						||
| 
								 | 
							
									rlwimi		rW2,rW6,24,0,7
							 | 
						||
| 
								 | 
							
									rlwimi		rW3,rW7,24,0,7
							 | 
						||
| 
								 | 
							
									blr
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/*
							 | 
						||
| 
								 | 
							
								 * ppc_decrypt_block: The central decryption function for a single 16 bytes
							 | 
						||
| 
								 | 
							
								 * block. It does no stack handling or register saving to support fast calls
							 | 
						||
| 
								 | 
							
								 * via bl/blr. It expects that caller has pre-xored input data with first
							 | 
						||
| 
								 | 
							
								 * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
							 | 
						||
| 
								 | 
							
								 * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
							 | 
						||
| 
								 | 
							
								 * and rW0-rW3 and caller must execute a final xor on the ouput registers.
							 | 
						||
| 
								 | 
							
								 * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								_GLOBAL(ppc_decrypt_block)
							 | 
						||
| 
								 | 
							
									LAH(rW0, rD1, 0, 12)
							 | 
						||
| 
								 | 
							
									LAH(rW6, rD0, 3, 0)
							 | 
						||
| 
								 | 
							
									LAH(rW3, rD0, 1, 8)
							 | 
						||
| 
								 | 
							
								ppc_decrypt_block_loop:
							 | 
						||
| 
								 | 
							
									LAH(rW1, rD3, 0, 12)
							 | 
						||
| 
								 | 
							
									LAL(rW0, rD2, 0, 12)
							 | 
						||
| 
								 | 
							
									LAH(rW2, rD2, 1, 8)
							 | 
						||
| 
								 | 
							
									LAL(rW2, rD3, 1, 8)
							 | 
						||
| 
								 | 
							
									LAH(rW4, rD3, 2, 4)
							 | 
						||
| 
								 | 
							
									LAL(rW4, rD0, 2, 4)
							 | 
						||
| 
								 | 
							
									LAL(rW6, rD1, 3, 0)
							 | 
						||
| 
								 | 
							
									LAH(rW5, rD1, 2, 4)
							 | 
						||
| 
								 | 
							
									LAH(rW7, rD2, 3, 0)
							 | 
						||
| 
								 | 
							
									LAL(rW7, rD3, 3, 0)
							 | 
						||
| 
								 | 
							
									LAL(rW3, rD1, 1, 8)
							 | 
						||
| 
								 | 
							
									evldw		rD1,16(rKP)
							 | 
						||
| 
								 | 
							
									EAD(rD0, 0)
							 | 
						||
| 
								 | 
							
									evxor		rW4,rW4,rW6
							 | 
						||
| 
								 | 
							
									LWL(rW1, 12)
							 | 
						||
| 
								 | 
							
									evxor		rW0,rW0,rW4
							 | 
						||
| 
								 | 
							
									EAD(rD2, 2)
							 | 
						||
| 
								 | 
							
									evxor		rW0,rW0,rW2
							 | 
						||
| 
								 | 
							
									LWL(rW5, 4)
							 | 
						||
| 
								 | 
							
									evxor		rD1,rD1,rW0
							 | 
						||
| 
								 | 
							
									evldw		rD3,24(rKP)
							 | 
						||
| 
								 | 
							
									evmergehi	rD0,rD0,rD1
							 | 
						||
| 
								 | 
							
									EAD(rD1, 0)
							 | 
						||
| 
								 | 
							
									evxor		rW3,rW3,rW7
							 | 
						||
| 
								 | 
							
									LWH(rW0, 12)
							 | 
						||
| 
								 | 
							
									evxor		rW3,rW3,rW1
							 | 
						||
| 
								 | 
							
									EAD(rD0, 3)
							 | 
						||
| 
								 | 
							
									evxor		rD3,rD3,rW3
							 | 
						||
| 
								 | 
							
									LWH(rW6, 0)
							 | 
						||
| 
								 | 
							
									evxor		rD3,rD3,rW5
							 | 
						||
| 
								 | 
							
									EAD(rD0, 1)
							 | 
						||
| 
								 | 
							
									evmergehi	rD2,rD2,rD3
							 | 
						||
| 
								 | 
							
									LWH(rW3, 8)
							 | 
						||
| 
								 | 
							
									LAH(rW1, rD3, 0, 12)
							 | 
						||
| 
								 | 
							
									LAL(rW0, rD2, 0, 12)
							 | 
						||
| 
								 | 
							
									LAH(rW2, rD2, 1, 8)
							 | 
						||
| 
								 | 
							
									LAL(rW2, rD3, 1, 8)
							 | 
						||
| 
								 | 
							
									LAH(rW4, rD3, 2, 4)
							 | 
						||
| 
								 | 
							
									LAL(rW4, rD0, 2, 4)
							 | 
						||
| 
								 | 
							
									LAL(rW6, rD1, 3, 0)
							 | 
						||
| 
								 | 
							
									LAH(rW5, rD1, 2, 4)
							 | 
						||
| 
								 | 
							
									LAH(rW7, rD2, 3, 0)
							 | 
						||
| 
								 | 
							
									LAL(rW7, rD3, 3, 0)
							 | 
						||
| 
								 | 
							
									LAL(rW3, rD1, 1, 8)
							 | 
						||
| 
								 | 
							
									evldw		 rD1,32(rKP)
							 | 
						||
| 
								 | 
							
									EAD(rD0, 0)
							 | 
						||
| 
								 | 
							
									evxor		rW4,rW4,rW6
							 | 
						||
| 
								 | 
							
									LWL(rW1, 12)
							 | 
						||
| 
								 | 
							
									evxor		rW0,rW0,rW4
							 | 
						||
| 
								 | 
							
									EAD(rD2, 2)
							 | 
						||
| 
								 | 
							
									evxor		rW0,rW0,rW2
							 | 
						||
| 
								 | 
							
									LWL(rW5, 4)
							 | 
						||
| 
								 | 
							
									evxor		rD1,rD1,rW0
							 | 
						||
| 
								 | 
							
									evldw		rD3,40(rKP)
							 | 
						||
| 
								 | 
							
									evmergehi	rD0,rD0,rD1
							 | 
						||
| 
								 | 
							
									EAD(rD1, 0)
							 | 
						||
| 
								 | 
							
									evxor		rW3,rW3,rW7
							 | 
						||
| 
								 | 
							
									LWH(rW0, 12)
							 | 
						||
| 
								 | 
							
									evxor		rW3,rW3,rW1
							 | 
						||
| 
								 | 
							
									EAD(rD0, 3)
							 | 
						||
| 
								 | 
							
									evxor		rD3,rD3,rW3
							 | 
						||
| 
								 | 
							
									LWH(rW6, 0)
							 | 
						||
| 
								 | 
							
									evxor		rD3,rD3,rW5
							 | 
						||
| 
								 | 
							
									EAD(rD0, 1)
							 | 
						||
| 
								 | 
							
									evmergehi	rD2,rD2,rD3
							 | 
						||
| 
								 | 
							
									LWH(rW3, 8)
							 | 
						||
| 
								 | 
							
									addi		rKP,rKP,32
							 | 
						||
| 
								 | 
							
									bdnz		ppc_decrypt_block_loop
							 | 
						||
| 
								 | 
							
									LAH(rW1, rD3, 0, 12)
							 | 
						||
| 
								 | 
							
									LAL(rW0, rD2, 0, 12)
							 | 
						||
| 
								 | 
							
									LAH(rW2, rD2, 1, 8)
							 | 
						||
| 
								 | 
							
									LAL(rW2, rD3, 1, 8)
							 | 
						||
| 
								 | 
							
									LAH(rW4, rD3, 2, 4)
							 | 
						||
| 
								 | 
							
									LAL(rW4, rD0, 2, 4)
							 | 
						||
| 
								 | 
							
									LAL(rW6, rD1, 3, 0)
							 | 
						||
| 
								 | 
							
									LAH(rW5, rD1, 2, 4)
							 | 
						||
| 
								 | 
							
									LAH(rW7, rD2, 3, 0)
							 | 
						||
| 
								 | 
							
									LAL(rW7, rD3, 3, 0)
							 | 
						||
| 
								 | 
							
									LAL(rW3, rD1, 1, 8)
							 | 
						||
| 
								 | 
							
									evldw		 rD1,16(rKP)
							 | 
						||
| 
								 | 
							
									EAD(rD0, 0)
							 | 
						||
| 
								 | 
							
									evxor		rW4,rW4,rW6
							 | 
						||
| 
								 | 
							
									LWL(rW1, 12)
							 | 
						||
| 
								 | 
							
									evxor		rW0,rW0,rW4
							 | 
						||
| 
								 | 
							
									EAD(rD2, 2)
							 | 
						||
| 
								 | 
							
									evxor		rW0,rW0,rW2
							 | 
						||
| 
								 | 
							
									LWL(rW5, 4)
							 | 
						||
| 
								 | 
							
									evxor		rD1,rD1,rW0
							 | 
						||
| 
								 | 
							
									evldw		rD3,24(rKP)
							 | 
						||
| 
								 | 
							
									evmergehi	rD0,rD0,rD1
							 | 
						||
| 
								 | 
							
									DAD(rD1, 0)
							 | 
						||
| 
								 | 
							
									evxor		rW3,rW3,rW7
							 | 
						||
| 
								 | 
							
									LBD(rW0)
							 | 
						||
| 
								 | 
							
									evxor		rW3,rW3,rW1
							 | 
						||
| 
								 | 
							
									DAD(rD0, 1)
							 | 
						||
| 
								 | 
							
									evxor		rD3,rD3,rW3
							 | 
						||
| 
								 | 
							
									LBD(rW6)
							 | 
						||
| 
								 | 
							
									evxor		rD3,rD3,rW5
							 | 
						||
| 
								 | 
							
									DAD(rD0, 0)
							 | 
						||
| 
								 | 
							
									evmergehi	rD2,rD2,rD3
							 | 
						||
| 
								 | 
							
									LBD(rW3)
							 | 
						||
| 
								 | 
							
									LAD(rW2, rD3, 0)
							 | 
						||
| 
								 | 
							
									LAD(rW1, rD2, 0)
							 | 
						||
| 
								 | 
							
									LAD(rW4, rD2, 1)
							 | 
						||
| 
								 | 
							
									LAD(rW5, rD3, 1)
							 | 
						||
| 
								 | 
							
									LAD(rW7, rD1, 1)
							 | 
						||
| 
								 | 
							
									rlwimi		rW0,rW4,8,16,23
							 | 
						||
| 
								 | 
							
									rlwimi		rW1,rW5,8,16,23
							 | 
						||
| 
								 | 
							
									LAD(rW4, rD3, 2)
							 | 
						||
| 
								 | 
							
									LAD(rW5, rD0, 2)
							 | 
						||
| 
								 | 
							
									rlwimi		rW2,rW6,8,16,23
							 | 
						||
| 
								 | 
							
									rlwimi		rW3,rW7,8,16,23
							 | 
						||
| 
								 | 
							
									LAD(rW6, rD1, 2)
							 | 
						||
| 
								 | 
							
									LAD(rW7, rD2, 2)
							 | 
						||
| 
								 | 
							
									rlwimi		rW0,rW4,16,8,15
							 | 
						||
| 
								 | 
							
									rlwimi		rW1,rW5,16,8,15
							 | 
						||
| 
								 | 
							
									LAD(rW4, rD0, 3)
							 | 
						||
| 
								 | 
							
									LAD(rW5, rD1, 3)
							 | 
						||
| 
								 | 
							
									rlwimi		rW2,rW6,16,8,15
							 | 
						||
| 
								 | 
							
									lwz		rD0,32(rKP)
							 | 
						||
| 
								 | 
							
									rlwimi		rW3,rW7,16,8,15
							 | 
						||
| 
								 | 
							
									lwz		rD1,36(rKP)
							 | 
						||
| 
								 | 
							
									LAD(rW6, rD2, 3)
							 | 
						||
| 
								 | 
							
									LAD(rW7, rD3, 3)
							 | 
						||
| 
								 | 
							
									rlwimi		rW0,rW4,24,0,7
							 | 
						||
| 
								 | 
							
									lwz		rD2,40(rKP)
							 | 
						||
| 
								 | 
							
									rlwimi		rW1,rW5,24,0,7
							 | 
						||
| 
								 | 
							
									lwz		rD3,44(rKP)
							 | 
						||
| 
								 | 
							
									rlwimi		rW2,rW6,24,0,7
							 | 
						||
| 
								 | 
							
									rlwimi		rW3,rW7,24,0,7
							 | 
						||
| 
								 | 
							
									blr
							 |