This patch adds AVX optimized XTS-mode helper functions/macros and converts
serpent-avx to use the new facilities. Benefits are slightly improved speed
and reduced stack usage as use of temporary IV-array is avoided.
tcrypt results, with Intel i5-2450M:
        enc     dec
16B     1.00x   1.00x
64B     1.00x   1.00x
256B    1.04x   1.06x
1024B   1.09x   1.09x
8192B   1.10x   1.09x
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
		
	
			
		
			
				
	
	
		
			402 lines
		
	
	
	
		
			10 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			402 lines
		
	
	
	
		
			10 KiB
			
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * Shared glue code for 128bit block ciphers
 | 
						|
 *
 | 
						|
 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
 | 
						|
 *
 | 
						|
 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
 | 
						|
 *   Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
 | 
						|
 * CTR part based on code (crypto/ctr.c) by:
 | 
						|
 *   (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
 | 
						|
 *
 | 
						|
 * This program is free software; you can redistribute it and/or modify
 | 
						|
 * it under the terms of the GNU General Public License as published by
 | 
						|
 * the Free Software Foundation; either version 2 of the License, or
 | 
						|
 * (at your option) any later version.
 | 
						|
 *
 | 
						|
 * This program is distributed in the hope that it will be useful,
 | 
						|
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
 * GNU General Public License for more details.
 | 
						|
 *
 | 
						|
 * You should have received a copy of the GNU General Public License
 | 
						|
 * along with this program; if not, write to the Free Software
 | 
						|
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
 | 
						|
 * USA
 | 
						|
 *
 | 
						|
 */
 | 
						|
 | 
						|
#include <linux/module.h>
 | 
						|
#include <crypto/b128ops.h>
 | 
						|
#include <crypto/lrw.h>
 | 
						|
#include <crypto/xts.h>
 | 
						|
#include <asm/crypto/glue_helper.h>
 | 
						|
#include <crypto/scatterwalk.h>
 | 
						|
 | 
						|
static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
 | 
						|
				   struct blkcipher_desc *desc,
 | 
						|
				   struct blkcipher_walk *walk)
 | 
						|
{
 | 
						|
	void *ctx = crypto_blkcipher_ctx(desc->tfm);
 | 
						|
	const unsigned int bsize = 128 / 8;
 | 
						|
	unsigned int nbytes, i, func_bytes;
 | 
						|
	bool fpu_enabled = false;
 | 
						|
	int err;
 | 
						|
 | 
						|
	err = blkcipher_walk_virt(desc, walk);
 | 
						|
 | 
						|
	while ((nbytes = walk->nbytes)) {
 | 
						|
		u8 *wsrc = walk->src.virt.addr;
 | 
						|
		u8 *wdst = walk->dst.virt.addr;
 | 
						|
 | 
						|
		fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
 | 
						|
					     desc, fpu_enabled, nbytes);
 | 
						|
 | 
						|
		for (i = 0; i < gctx->num_funcs; i++) {
 | 
						|
			func_bytes = bsize * gctx->funcs[i].num_blocks;
 | 
						|
 | 
						|
			/* Process multi-block batch */
 | 
						|
			if (nbytes >= func_bytes) {
 | 
						|
				do {
 | 
						|
					gctx->funcs[i].fn_u.ecb(ctx, wdst,
 | 
						|
								wsrc);
 | 
						|
 | 
						|
					wsrc += func_bytes;
 | 
						|
					wdst += func_bytes;
 | 
						|
					nbytes -= func_bytes;
 | 
						|
				} while (nbytes >= func_bytes);
 | 
						|
 | 
						|
				if (nbytes < bsize)
 | 
						|
					goto done;
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
done:
 | 
						|
		err = blkcipher_walk_done(desc, walk, nbytes);
 | 
						|
	}
 | 
						|
 | 
						|
	glue_fpu_end(fpu_enabled);
 | 
						|
	return err;
 | 
						|
}
 | 
						|
 | 
						|
int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
 | 
						|
			  struct blkcipher_desc *desc, struct scatterlist *dst,
 | 
						|
			  struct scatterlist *src, unsigned int nbytes)
 | 
						|
{
 | 
						|
	struct blkcipher_walk walk;
 | 
						|
 | 
						|
	blkcipher_walk_init(&walk, dst, src, nbytes);
 | 
						|
	return __glue_ecb_crypt_128bit(gctx, desc, &walk);
 | 
						|
}
 | 
						|
EXPORT_SYMBOL_GPL(glue_ecb_crypt_128bit);
 | 
						|
 | 
						|
static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn,
 | 
						|
					      struct blkcipher_desc *desc,
 | 
						|
					      struct blkcipher_walk *walk)
 | 
						|
{
 | 
						|
	void *ctx = crypto_blkcipher_ctx(desc->tfm);
 | 
						|
	const unsigned int bsize = 128 / 8;
 | 
						|
	unsigned int nbytes = walk->nbytes;
 | 
						|
	u128 *src = (u128 *)walk->src.virt.addr;
 | 
						|
	u128 *dst = (u128 *)walk->dst.virt.addr;
 | 
						|
	u128 *iv = (u128 *)walk->iv;
 | 
						|
 | 
						|
	do {
 | 
						|
		u128_xor(dst, src, iv);
 | 
						|
		fn(ctx, (u8 *)dst, (u8 *)dst);
 | 
						|
		iv = dst;
 | 
						|
 | 
						|
		src += 1;
 | 
						|
		dst += 1;
 | 
						|
		nbytes -= bsize;
 | 
						|
	} while (nbytes >= bsize);
 | 
						|
 | 
						|
	*(u128 *)walk->iv = *iv;
 | 
						|
	return nbytes;
 | 
						|
}
 | 
						|
 | 
						|
int glue_cbc_encrypt_128bit(const common_glue_func_t fn,
 | 
						|
			    struct blkcipher_desc *desc,
 | 
						|
			    struct scatterlist *dst,
 | 
						|
			    struct scatterlist *src, unsigned int nbytes)
 | 
						|
{
 | 
						|
	struct blkcipher_walk walk;
 | 
						|
	int err;
 | 
						|
 | 
						|
	blkcipher_walk_init(&walk, dst, src, nbytes);
 | 
						|
	err = blkcipher_walk_virt(desc, &walk);
 | 
						|
 | 
						|
	while ((nbytes = walk.nbytes)) {
 | 
						|
		nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk);
 | 
						|
		err = blkcipher_walk_done(desc, &walk, nbytes);
 | 
						|
	}
 | 
						|
 | 
						|
	return err;
 | 
						|
}
 | 
						|
EXPORT_SYMBOL_GPL(glue_cbc_encrypt_128bit);
 | 
						|
 | 
						|
static unsigned int
 | 
						|
__glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
 | 
						|
			  struct blkcipher_desc *desc,
 | 
						|
			  struct blkcipher_walk *walk)
 | 
						|
{
 | 
						|
	void *ctx = crypto_blkcipher_ctx(desc->tfm);
 | 
						|
	const unsigned int bsize = 128 / 8;
 | 
						|
	unsigned int nbytes = walk->nbytes;
 | 
						|
	u128 *src = (u128 *)walk->src.virt.addr;
 | 
						|
	u128 *dst = (u128 *)walk->dst.virt.addr;
 | 
						|
	u128 last_iv;
 | 
						|
	unsigned int num_blocks, func_bytes;
 | 
						|
	unsigned int i;
 | 
						|
 | 
						|
	/* Start of the last block. */
 | 
						|
	src += nbytes / bsize - 1;
 | 
						|
	dst += nbytes / bsize - 1;
 | 
						|
 | 
						|
	last_iv = *src;
 | 
						|
 | 
						|
	for (i = 0; i < gctx->num_funcs; i++) {
 | 
						|
		num_blocks = gctx->funcs[i].num_blocks;
 | 
						|
		func_bytes = bsize * num_blocks;
 | 
						|
 | 
						|
		/* Process multi-block batch */
 | 
						|
		if (nbytes >= func_bytes) {
 | 
						|
			do {
 | 
						|
				nbytes -= func_bytes - bsize;
 | 
						|
				src -= num_blocks - 1;
 | 
						|
				dst -= num_blocks - 1;
 | 
						|
 | 
						|
				gctx->funcs[i].fn_u.cbc(ctx, dst, src);
 | 
						|
 | 
						|
				nbytes -= bsize;
 | 
						|
				if (nbytes < bsize)
 | 
						|
					goto done;
 | 
						|
 | 
						|
				u128_xor(dst, dst, src - 1);
 | 
						|
				src -= 1;
 | 
						|
				dst -= 1;
 | 
						|
			} while (nbytes >= func_bytes);
 | 
						|
 | 
						|
			if (nbytes < bsize)
 | 
						|
				goto done;
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
done:
 | 
						|
	u128_xor(dst, dst, (u128 *)walk->iv);
 | 
						|
	*(u128 *)walk->iv = last_iv;
 | 
						|
 | 
						|
	return nbytes;
 | 
						|
}
 | 
						|
 | 
						|
int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
 | 
						|
			    struct blkcipher_desc *desc,
 | 
						|
			    struct scatterlist *dst,
 | 
						|
			    struct scatterlist *src, unsigned int nbytes)
 | 
						|
{
 | 
						|
	const unsigned int bsize = 128 / 8;
 | 
						|
	bool fpu_enabled = false;
 | 
						|
	struct blkcipher_walk walk;
 | 
						|
	int err;
 | 
						|
 | 
						|
	blkcipher_walk_init(&walk, dst, src, nbytes);
 | 
						|
	err = blkcipher_walk_virt(desc, &walk);
 | 
						|
 | 
						|
	while ((nbytes = walk.nbytes)) {
 | 
						|
		fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
 | 
						|
					     desc, fpu_enabled, nbytes);
 | 
						|
		nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
 | 
						|
		err = blkcipher_walk_done(desc, &walk, nbytes);
 | 
						|
	}
 | 
						|
 | 
						|
	glue_fpu_end(fpu_enabled);
 | 
						|
	return err;
 | 
						|
}
 | 
						|
EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit);
 | 
						|
 | 
						|
static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr,
 | 
						|
					struct blkcipher_desc *desc,
 | 
						|
					struct blkcipher_walk *walk)
 | 
						|
{
 | 
						|
	void *ctx = crypto_blkcipher_ctx(desc->tfm);
 | 
						|
	u8 *src = (u8 *)walk->src.virt.addr;
 | 
						|
	u8 *dst = (u8 *)walk->dst.virt.addr;
 | 
						|
	unsigned int nbytes = walk->nbytes;
 | 
						|
	le128 ctrblk;
 | 
						|
	u128 tmp;
 | 
						|
 | 
						|
	be128_to_le128(&ctrblk, (be128 *)walk->iv);
 | 
						|
 | 
						|
	memcpy(&tmp, src, nbytes);
 | 
						|
	fn_ctr(ctx, &tmp, &tmp, &ctrblk);
 | 
						|
	memcpy(dst, &tmp, nbytes);
 | 
						|
 | 
						|
	le128_to_be128((be128 *)walk->iv, &ctrblk);
 | 
						|
}
 | 
						|
EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit);
 | 
						|
 | 
						|
static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
 | 
						|
					    struct blkcipher_desc *desc,
 | 
						|
					    struct blkcipher_walk *walk)
 | 
						|
{
 | 
						|
	const unsigned int bsize = 128 / 8;
 | 
						|
	void *ctx = crypto_blkcipher_ctx(desc->tfm);
 | 
						|
	unsigned int nbytes = walk->nbytes;
 | 
						|
	u128 *src = (u128 *)walk->src.virt.addr;
 | 
						|
	u128 *dst = (u128 *)walk->dst.virt.addr;
 | 
						|
	le128 ctrblk;
 | 
						|
	unsigned int num_blocks, func_bytes;
 | 
						|
	unsigned int i;
 | 
						|
 | 
						|
	be128_to_le128(&ctrblk, (be128 *)walk->iv);
 | 
						|
 | 
						|
	/* Process multi-block batch */
 | 
						|
	for (i = 0; i < gctx->num_funcs; i++) {
 | 
						|
		num_blocks = gctx->funcs[i].num_blocks;
 | 
						|
		func_bytes = bsize * num_blocks;
 | 
						|
 | 
						|
		if (nbytes >= func_bytes) {
 | 
						|
			do {
 | 
						|
				gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk);
 | 
						|
 | 
						|
				src += num_blocks;
 | 
						|
				dst += num_blocks;
 | 
						|
				nbytes -= func_bytes;
 | 
						|
			} while (nbytes >= func_bytes);
 | 
						|
 | 
						|
			if (nbytes < bsize)
 | 
						|
				goto done;
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
done:
 | 
						|
	le128_to_be128((be128 *)walk->iv, &ctrblk);
 | 
						|
	return nbytes;
 | 
						|
}
 | 
						|
 | 
						|
int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
 | 
						|
			  struct blkcipher_desc *desc, struct scatterlist *dst,
 | 
						|
			  struct scatterlist *src, unsigned int nbytes)
 | 
						|
{
 | 
						|
	const unsigned int bsize = 128 / 8;
 | 
						|
	bool fpu_enabled = false;
 | 
						|
	struct blkcipher_walk walk;
 | 
						|
	int err;
 | 
						|
 | 
						|
	blkcipher_walk_init(&walk, dst, src, nbytes);
 | 
						|
	err = blkcipher_walk_virt_block(desc, &walk, bsize);
 | 
						|
 | 
						|
	while ((nbytes = walk.nbytes) >= bsize) {
 | 
						|
		fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
 | 
						|
					     desc, fpu_enabled, nbytes);
 | 
						|
		nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
 | 
						|
		err = blkcipher_walk_done(desc, &walk, nbytes);
 | 
						|
	}
 | 
						|
 | 
						|
	glue_fpu_end(fpu_enabled);
 | 
						|
 | 
						|
	if (walk.nbytes) {
 | 
						|
		glue_ctr_crypt_final_128bit(
 | 
						|
			gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
 | 
						|
		err = blkcipher_walk_done(desc, &walk, 0);
 | 
						|
	}
 | 
						|
 | 
						|
	return err;
 | 
						|
}
 | 
						|
EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit);
 | 
						|
 | 
						|
static unsigned int __glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
 | 
						|
					    void *ctx,
 | 
						|
					    struct blkcipher_desc *desc,
 | 
						|
					    struct blkcipher_walk *walk)
 | 
						|
{
 | 
						|
	const unsigned int bsize = 128 / 8;
 | 
						|
	unsigned int nbytes = walk->nbytes;
 | 
						|
	u128 *src = (u128 *)walk->src.virt.addr;
 | 
						|
	u128 *dst = (u128 *)walk->dst.virt.addr;
 | 
						|
	unsigned int num_blocks, func_bytes;
 | 
						|
	unsigned int i;
 | 
						|
 | 
						|
	/* Process multi-block batch */
 | 
						|
	for (i = 0; i < gctx->num_funcs; i++) {
 | 
						|
		num_blocks = gctx->funcs[i].num_blocks;
 | 
						|
		func_bytes = bsize * num_blocks;
 | 
						|
 | 
						|
		if (nbytes >= func_bytes) {
 | 
						|
			do {
 | 
						|
				gctx->funcs[i].fn_u.xts(ctx, dst, src,
 | 
						|
							(le128 *)walk->iv);
 | 
						|
 | 
						|
				src += num_blocks;
 | 
						|
				dst += num_blocks;
 | 
						|
				nbytes -= func_bytes;
 | 
						|
			} while (nbytes >= func_bytes);
 | 
						|
 | 
						|
			if (nbytes < bsize)
 | 
						|
				goto done;
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
done:
 | 
						|
	return nbytes;
 | 
						|
}
 | 
						|
 | 
						|
/* for implementations implementing faster XTS IV generator */
 | 
						|
int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
 | 
						|
			  struct blkcipher_desc *desc, struct scatterlist *dst,
 | 
						|
			  struct scatterlist *src, unsigned int nbytes,
 | 
						|
			  void (*tweak_fn)(void *ctx, u8 *dst, const u8 *src),
 | 
						|
			  void *tweak_ctx, void *crypt_ctx)
 | 
						|
{
 | 
						|
	const unsigned int bsize = 128 / 8;
 | 
						|
	bool fpu_enabled = false;
 | 
						|
	struct blkcipher_walk walk;
 | 
						|
	int err;
 | 
						|
 | 
						|
	blkcipher_walk_init(&walk, dst, src, nbytes);
 | 
						|
 | 
						|
	err = blkcipher_walk_virt(desc, &walk);
 | 
						|
	nbytes = walk.nbytes;
 | 
						|
	if (!nbytes)
 | 
						|
		return err;
 | 
						|
 | 
						|
	/* set minimum length to bsize, for tweak_fn */
 | 
						|
	fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
 | 
						|
				     desc, fpu_enabled,
 | 
						|
				     nbytes < bsize ? bsize : nbytes);
 | 
						|
 | 
						|
	/* calculate first value of T */
 | 
						|
	tweak_fn(tweak_ctx, walk.iv, walk.iv);
 | 
						|
 | 
						|
	while (nbytes) {
 | 
						|
		nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk);
 | 
						|
 | 
						|
		err = blkcipher_walk_done(desc, &walk, nbytes);
 | 
						|
		nbytes = walk.nbytes;
 | 
						|
	}
 | 
						|
 | 
						|
	glue_fpu_end(fpu_enabled);
 | 
						|
 | 
						|
	return err;
 | 
						|
}
 | 
						|
EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
 | 
						|
 | 
						|
void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv,
 | 
						|
			       common_glue_func_t fn)
 | 
						|
{
 | 
						|
	le128 ivblk = *iv;
 | 
						|
 | 
						|
	/* generate next IV */
 | 
						|
	le128_gf128mul_x_ble(iv, &ivblk);
 | 
						|
 | 
						|
	/* CC <- T xor C */
 | 
						|
	u128_xor(dst, src, (u128 *)&ivblk);
 | 
						|
 | 
						|
	/* PP <- D(Key2,CC) */
 | 
						|
	fn(ctx, (u8 *)dst, (u8 *)dst);
 | 
						|
 | 
						|
	/* P <- T xor PP */
 | 
						|
	u128_xor(dst, dst, (u128 *)&ivblk);
 | 
						|
}
 | 
						|
EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit_one);
 | 
						|
 | 
						|
MODULE_LICENSE("GPL");
 |