crypto: aesni-intel - Add AES-NI accelerated CTR mode

To take advantage of the hardware pipeline implementation of AES-NI
instructions. CTR mode cryption is implemented in ASM to schedule
multiple AES-NI instructions one after another. This way, some latency
of AES-NI instruction can be eliminated.

Performance testing based on dm-crypt should 50% reduction of
ecryption/decryption time.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Huang Ying 2010-03-10 18:28:55 +08:00 committed by Herbert Xu
parent 269ab459da
commit 12387a46bb
2 changed files with 238 additions and 7 deletions

View file

@ -32,6 +32,9 @@
#define IN IN1
#define KEY %xmm2
#define IV %xmm3
#define BSWAP_MASK %xmm10
#define CTR %xmm11
#define INC %xmm12
#define KEYP %rdi
#define OUTP %rsi
@ -42,6 +45,7 @@
#define T1 %r10
#define TKEYP T1
#define T2 %r11
#define TCTR_LOW T2
_key_expansion_128:
_key_expansion_256a:
@ -724,3 +728,114 @@ ENTRY(aesni_cbc_dec)
movups IV, (IVP)
.Lcbc_dec_just_ret:
ret
.align 16
.Lbswap_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
/*
* _aesni_inc_init: internal ABI
* setup registers used by _aesni_inc
* input:
* IV
* output:
* CTR: == IV, in little endian
* TCTR_LOW: == lower qword of CTR
* INC: == 1, in little endian
* BSWAP_MASK == endian swapping mask
*/
_aesni_inc_init:
movaps .Lbswap_mask, BSWAP_MASK
movaps IV, CTR
PSHUFB_XMM BSWAP_MASK CTR
mov $1, TCTR_LOW
movq TCTR_LOW, INC
movq CTR, TCTR_LOW
ret
/*
* _aesni_inc: internal ABI
* Increase IV by 1, IV is in big endian
* input:
* IV
* CTR: == IV, in little endian
* TCTR_LOW: == lower qword of CTR
* INC: == 1, in little endian
* BSWAP_MASK == endian swapping mask
* output:
* IV: Increase by 1
* changed:
* CTR: == output IV, in little endian
* TCTR_LOW: == lower qword of CTR
*/
_aesni_inc:
paddq INC, CTR
add $1, TCTR_LOW
jnc .Linc_low
pslldq $8, INC
paddq INC, CTR
psrldq $8, INC
.Linc_low:
movaps CTR, IV
PSHUFB_XMM BSWAP_MASK IV
ret
/*
* void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
* size_t len, u8 *iv)
*/
ENTRY(aesni_ctr_enc)
cmp $16, LEN
jb .Lctr_enc_just_ret
mov 480(KEYP), KLEN
movups (IVP), IV
call _aesni_inc_init
cmp $64, LEN
jb .Lctr_enc_loop1
.align 4
.Lctr_enc_loop4:
movaps IV, STATE1
call _aesni_inc
movups (INP), IN1
movaps IV, STATE2
call _aesni_inc
movups 0x10(INP), IN2
movaps IV, STATE3
call _aesni_inc
movups 0x20(INP), IN3
movaps IV, STATE4
call _aesni_inc
movups 0x30(INP), IN4
call _aesni_enc4
pxor IN1, STATE1
movups STATE1, (OUTP)
pxor IN2, STATE2
movups STATE2, 0x10(OUTP)
pxor IN3, STATE3
movups STATE3, 0x20(OUTP)
pxor IN4, STATE4
movups STATE4, 0x30(OUTP)
sub $64, LEN
add $64, INP
add $64, OUTP
cmp $64, LEN
jge .Lctr_enc_loop4
cmp $16, LEN
jb .Lctr_enc_ret
.align 4
.Lctr_enc_loop1:
movaps IV, STATE
call _aesni_inc
movups (INP), IN
call _aesni_enc1
pxor IN, STATE
movups STATE, (OUTP)
sub $16, LEN
add $16, INP
add $16, OUTP
cmp $16, LEN
jge .Lctr_enc_loop1
.Lctr_enc_ret:
movups IV, (IVP)
.Lctr_enc_just_ret:
ret

View file

@ -18,6 +18,7 @@
#include <crypto/algapi.h>
#include <crypto/aes.h>
#include <crypto/cryptd.h>
#include <crypto/ctr.h>
#include <asm/i387.h>
#include <asm/aes.h>
@ -58,6 +59,8 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
{
@ -321,6 +324,72 @@ static struct crypto_alg blk_cbc_alg = {
},
};
static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
struct blkcipher_walk *walk)
{
u8 *ctrblk = walk->iv;
u8 keystream[AES_BLOCK_SIZE];
u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes;
aesni_enc(ctx, keystream, ctrblk);
crypto_xor(keystream, src, nbytes);
memcpy(dst, keystream, nbytes);
crypto_inc(ctrblk, AES_BLOCK_SIZE);
}
static int ctr_crypt(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
kernel_fpu_begin();
while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK, walk.iv);
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
if (walk.nbytes) {
ctr_crypt_final(ctx, &walk);
err = blkcipher_walk_done(desc, &walk, 0);
}
kernel_fpu_end();
return err;
}
static struct crypto_alg blk_ctr_alg = {
.cra_name = "__ctr-aes-aesni",
.cra_driver_name = "__driver-ctr-aes-aesni",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
.setkey = aes_set_key,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
},
},
};
static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
unsigned int key_len)
{
@ -467,13 +536,11 @@ static struct crypto_alg ablk_cbc_alg = {
},
};
#ifdef HAS_CTR
static int ablk_ctr_init(struct crypto_tfm *tfm)
{
struct cryptd_ablkcipher *cryptd_tfm;
cryptd_tfm = cryptd_alloc_ablkcipher("fpu(ctr(__driver-aes-aesni))",
0, 0);
cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-aes-aesni", 0, 0);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
ablk_init_common(tfm, cryptd_tfm);
@ -500,11 +567,50 @@ static struct crypto_alg ablk_ctr_alg = {
.ivsize = AES_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
.decrypt = ablk_encrypt,
.geniv = "chainiv",
},
},
};
#ifdef HAS_CTR
static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm)
{
struct cryptd_ablkcipher *cryptd_tfm;
cryptd_tfm = cryptd_alloc_ablkcipher(
"rfc3686(__driver-ctr-aes-aesni)", 0, 0);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
ablk_init_common(tfm, cryptd_tfm);
return 0;
}
static struct crypto_alg ablk_rfc3686_ctr_alg = {
.cra_name = "rfc3686(ctr(aes))",
.cra_driver_name = "rfc3686-ctr-aes-aesni",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct async_aes_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(ablk_rfc3686_ctr_alg.cra_list),
.cra_init = ablk_rfc3686_ctr_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = AES_MIN_KEY_SIZE+CTR_RFC3686_NONCE_SIZE,
.max_keysize = AES_MAX_KEY_SIZE+CTR_RFC3686_NONCE_SIZE,
.ivsize = CTR_RFC3686_IV_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
.geniv = "seqiv",
},
},
};
#endif
#ifdef HAS_LRW
@ -640,13 +746,17 @@ static int __init aesni_init(void)
goto blk_ecb_err;
if ((err = crypto_register_alg(&blk_cbc_alg)))
goto blk_cbc_err;
if ((err = crypto_register_alg(&blk_ctr_alg)))
goto blk_ctr_err;
if ((err = crypto_register_alg(&ablk_ecb_alg)))
goto ablk_ecb_err;
if ((err = crypto_register_alg(&ablk_cbc_alg)))
goto ablk_cbc_err;
#ifdef HAS_CTR
if ((err = crypto_register_alg(&ablk_ctr_alg)))
goto ablk_ctr_err;
#ifdef HAS_CTR
if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg)))
goto ablk_rfc3686_ctr_err;
#endif
#ifdef HAS_LRW
if ((err = crypto_register_alg(&ablk_lrw_alg)))
@ -675,13 +785,17 @@ ablk_pcbc_err:
ablk_lrw_err:
#endif
#ifdef HAS_CTR
crypto_unregister_alg(&ablk_rfc3686_ctr_alg);
ablk_rfc3686_ctr_err:
#endif
crypto_unregister_alg(&ablk_ctr_alg);
ablk_ctr_err:
#endif
crypto_unregister_alg(&ablk_cbc_alg);
ablk_cbc_err:
crypto_unregister_alg(&ablk_ecb_alg);
ablk_ecb_err:
crypto_unregister_alg(&blk_ctr_alg);
blk_ctr_err:
crypto_unregister_alg(&blk_cbc_alg);
blk_cbc_err:
crypto_unregister_alg(&blk_ecb_alg);
@ -705,10 +819,12 @@ static void __exit aesni_exit(void)
crypto_unregister_alg(&ablk_lrw_alg);
#endif
#ifdef HAS_CTR
crypto_unregister_alg(&ablk_ctr_alg);
crypto_unregister_alg(&ablk_rfc3686_ctr_alg);
#endif
crypto_unregister_alg(&ablk_ctr_alg);
crypto_unregister_alg(&ablk_cbc_alg);
crypto_unregister_alg(&ablk_ecb_alg);
crypto_unregister_alg(&blk_ctr_alg);
crypto_unregister_alg(&blk_cbc_alg);
crypto_unregister_alg(&blk_ecb_alg);
crypto_unregister_alg(&__aesni_alg);