From: Ard Biesheuvel <ard.biesheuvel@linaro.org> To: herbert@gondor.apana.org.au, jussi.kivilinna@iki.fi, linux-crypto@vger.kernel.org, linux@arm.linux.org.uk, linux-arm-kernel@lists.infradead.org Cc: nico@linaro.org, will.deacon@arm.com, Ard Biesheuvel <ard.biesheuvel@linaro.org> Subject: [PATCH v2 4/5] crypto/arm: AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions Date: Tue, 10 Mar 2015 09:47:47 +0100 [thread overview] Message-ID: <1425977268-27835-5-git-send-email-ard.biesheuvel@linaro.org> (raw) In-Reply-To: <1425977268-27835-1-git-send-email-ard.biesheuvel@linaro.org> This implements the ECB, CBC, CTR and XTS asynchronous block ciphers using the AArch32 versions of the ARMv8 Crypto Extensions for AES. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> --- arch/arm/crypto/Kconfig | 9 + arch/arm/crypto/Makefile | 2 + arch/arm/crypto/aes-ce-core.S | 518 +++++++++++++++++++++++++++++++++++++++++ arch/arm/crypto/aes-ce-glue.c | 520 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 1049 insertions(+) create mode 100644 arch/arm/crypto/aes-ce-core.S create mode 100644 arch/arm/crypto/aes-ce-glue.c diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 9c1478e55a40..63588bdf3b5d 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -101,4 +101,13 @@ config CRYPTO_AES_ARM_BS This implementation does not rely on any lookup tables so it is believed to be invulnerable to cache timing attacks. +config CRYPTO_AES_ARM_CE + tristate "Accelerated AES using ARMv8 Crypto Extensions" + depends on KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_ABLK_HELPER + help + Use an implementation of AES in CBC, CTR and XTS modes that uses + ARMv8 Crypto Extensions + endif diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 4ea9f96c2782..2514c420e8d3 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o +obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o @@ -17,6 +18,7 @@ sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o +aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $(<) > $(@) diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S new file mode 100644 index 000000000000..8cfa468ee570 --- /dev/null +++ b/arch/arm/crypto/aes-ce-core.S @@ -0,0 +1,518 @@ +/* + * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions + * + * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .fpu crypto-neon-fp-armv8 + .align 3 + + .macro enc_round, state, key + aese.8 \state, \key + aesmc.8 \state, \state + .endm + + .macro dec_round, state, key + aesd.8 \state, \key + aesimc.8 \state, \state + .endm + + .macro enc_dround, key1, key2 + enc_round q0, \key1 + enc_round q0, \key2 + .endm + + .macro dec_dround, key1, key2 + dec_round q0, \key1 + dec_round q0, \key2 + .endm + + .macro enc_fround, key1, key2, key3 + enc_round q0, \key1 + aese.8 q0, \key2 + veor q0, q0, \key3 + .endm + + .macro dec_fround, key1, key2, key3 + dec_round q0, \key1 + aesd.8 q0, \key2 + veor q0, q0, \key3 + .endm + + .macro enc_dround_3x, key1, key2 + enc_round q0, \key1 + enc_round q1, \key1 + enc_round q2, \key1 + enc_round q0, \key2 + enc_round q1, \key2 + enc_round q2, \key2 + .endm + + .macro dec_dround_3x, key1, key2 + dec_round q0, \key1 + dec_round q1, \key1 + dec_round q2, \key1 + dec_round q0, \key2 + dec_round q1, \key2 + dec_round q2, \key2 + .endm + + .macro enc_fround_3x, key1, key2, key3 + enc_round q0, \key1 + enc_round q1, \key1 + enc_round q2, \key1 + aese.8 q0, \key2 + aese.8 q1, \key2 + aese.8 q2, \key2 + veor q0, q0, \key3 + veor q1, q1, \key3 + veor q2, q2, \key3 + .endm + + .macro dec_fround_3x, key1, key2, key3 + dec_round q0, \key1 + dec_round q1, \key1 + dec_round q2, \key1 + aesd.8 q0, \key2 + aesd.8 q1, \key2 + aesd.8 q2, \key2 + veor q0, q0, \key3 + veor q1, q1, \key3 + veor q2, q2, \key3 + .endm + + .macro do_block, dround, fround + cmp r3, #12 @ which key size? + vld1.8 {q10-q11}, [ip]! + \dround q8, q9 + vld1.8 {q12-q13}, [ip]! + \dround q10, q11 + vld1.8 {q10-q11}, [ip]! + \dround q12, q13 + vld1.8 {q12-q13}, [ip]! + \dround q10, q11 + blo 0f @ AES-128: 10 rounds + vld1.8 {q10-q11}, [ip]! + beq 1f @ AES-192: 12 rounds + \dround q12, q13 + vld1.8 {q12-q13}, [ip] + \dround q10, q11 +0: \fround q12, q13, q14 + bx lr + +1: \dround q12, q13 + \fround q10, q11, q14 + bx lr + .endm + + /* + * Internal, non-AAPCS compliant functions that implement the core AES + * transforms. These should preserve all registers except q0 - q2 and ip + * Arguments: + * q0 : first in/output block + * q1 : second in/output block (_3x version only) + * q2 : third in/output block (_3x version only) + * q8 : first round key + * q9 : secound round key + * ip : address of 3rd round key + * q14 : final round key + * r3 : number of rounds + */ + .align 6 +aes_encrypt: + add ip, r2, #32 @ 3rd round key +.Laes_encrypt_tweak: + do_block enc_dround, enc_fround +ENDPROC(aes_encrypt) + + .align 6 +aes_decrypt: + add ip, r2, #32 @ 3rd round key + do_block dec_dround, dec_fround +ENDPROC(aes_decrypt) + + .align 6 +aes_encrypt_3x: + add ip, r2, #32 @ 3rd round key + do_block enc_dround_3x, enc_fround_3x +ENDPROC(aes_encrypt_3x) + + .align 6 +aes_decrypt_3x: + add ip, r2, #32 @ 3rd round key + do_block dec_dround_3x, dec_fround_3x +ENDPROC(aes_decrypt_3x) + + .macro prepare_key, rk, rounds + add ip, \rk, \rounds, lsl #4 + vld1.8 {q8-q9}, [\rk] @ load first 2 round keys + vld1.8 {q14}, [ip] @ load last round key + .endm + + /* + * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks) + * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks) + */ +ENTRY(ce_aes_ecb_encrypt) + push {r4, lr} + ldr r4, [sp, #8] + prepare_key r2, r3 +.Lecbencloop3x: + subs r4, r4, #3 + bmi .Lecbenc1x + vld1.8 {q0-q1}, [r1, :64]! + vld1.8 {q2}, [r1, :64]! + bl aes_encrypt_3x + vst1.8 {q0-q1}, [r0, :64]! + vst1.8 {q2}, [r0, :64]! + b .Lecbencloop3x +.Lecbenc1x: + adds r4, r4, #3 + beq .Lecbencout +.Lecbencloop: + vld1.8 {q0}, [r1, :64]! + bl aes_encrypt + vst1.8 {q0}, [r0, :64]! + subs r4, r4, #1 + bne .Lecbencloop +.Lecbencout: + pop {r4, pc} +ENDPROC(ce_aes_ecb_encrypt) + +ENTRY(ce_aes_ecb_decrypt) + push {r4, lr} + ldr r4, [sp, #8] + prepare_key r2, r3 +.Lecbdecloop3x: + subs r4, r4, #3 + bmi .Lecbdec1x + vld1.8 {q0-q1}, [r1, :64]! + vld1.8 {q2}, [r1, :64]! + bl aes_decrypt_3x + vst1.8 {q0-q1}, [r0, :64]! + vst1.8 {q2}, [r0, :64]! + b .Lecbdecloop3x +.Lecbdec1x: + adds r4, r4, #3 + beq .Lecbdecout +.Lecbdecloop: + vld1.8 {q0}, [r1, :64]! + bl aes_decrypt + vst1.8 {q0}, [r0, :64]! + subs r4, r4, #1 + bne .Lecbdecloop +.Lecbdecout: + pop {r4, pc} +ENDPROC(ce_aes_ecb_decrypt) + + /* + * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[]) + * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[]) + */ +ENTRY(ce_aes_cbc_encrypt) + push {r4-r6, lr} + ldrd r4, r5, [sp, #16] + vld1.8 {q0}, [r5] + prepare_key r2, r3 +.Lcbcencloop: + vld1.8 {q1}, [r1, :64]! @ get next pt block + veor q0, q0, q1 @ ..and xor with iv + bl aes_encrypt + vst1.8 {q0}, [r0, :64]! + subs r4, r4, #1 + bne .Lcbcencloop + vst1.8 {q0}, [r5] + pop {r4-r6, pc} +ENDPROC(ce_aes_cbc_encrypt) + +ENTRY(ce_aes_cbc_decrypt) + push {r4-r6, lr} + ldrd r4, r5, [sp, #16] + vld1.8 {q6}, [r5] @ keep iv in q6 + prepare_key r2, r3 +.Lcbcdecloop3x: + subs r4, r4, #3 + bmi .Lcbcdec1x + vld1.8 {q0-q1}, [r1, :64]! + vld1.8 {q2}, [r1, :64]! + vmov q3, q0 + vmov q4, q1 + vmov q5, q2 + bl aes_decrypt_3x + veor q0, q0, q6 + veor q1, q1, q3 + veor q2, q2, q4 + vmov q6, q5 + vst1.8 {q0-q1}, [r0, :64]! + vst1.8 {q2}, [r0, :64]! + b .Lcbcdecloop3x +.Lcbcdec1x: + adds r4, r4, #3 + beq .Lcbcdecout + vmov q15, q14 @ preserve last round key +.Lcbcdecloop: + vld1.8 {q0}, [r1, :64]! @ get next ct block + veor q14, q15, q6 @ combine prev ct with last key + vmov q6, q0 + bl aes_decrypt + vst1.8 {q0}, [r0, :64]! + subs r4, r4, #1 + bne .Lcbcdecloop +.Lcbcdecout: + vst1.8 {q6}, [r5] @ keep iv in q6 + pop {r4-r6, pc} +ENDPROC(ce_aes_cbc_decrypt) + + /* + * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 ctr[]) + */ +ENTRY(ce_aes_ctr_encrypt) + push {r4-r6, lr} + ldrd r4, r5, [sp, #16] + vld1.8 {q6}, [r5] @ load ctr + prepare_key r2, r3 + vmov r6, s27 @ keep swabbed ctr in r6 + rev r6, r6 + cmn r6, r4 @ 32 bit overflow? + bcs .Lctrloop +.Lctrloop3x: + subs r4, r4, #3 + bmi .Lctr1x + add r6, r6, #1 + vmov q0, q6 + vmov q1, q6 + rev ip, r6 + add r6, r6, #1 + vmov q2, q6 + vmov s7, ip + rev ip, r6 + add r6, r6, #1 + vmov s11, ip + vld1.8 {q3-q4}, [r1, :64]! + vld1.8 {q5}, [r1, :64]! + bl aes_encrypt_3x + veor q0, q0, q3 + veor q1, q1, q4 + veor q2, q2, q5 + rev ip, r6 + vst1.8 {q0-q1}, [r0, :64]! + vst1.8 {q2}, [r0, :64]! + vmov s27, ip + b .Lctrloop3x +.Lctr1x: + adds r4, r4, #3 + beq .Lctrout +.Lctrloop: + vmov q0, q6 + bl aes_encrypt + subs r4, r4, #1 + bmi .Lctrhalfblock @ blocks < 0 means 1/2 block + vld1.8 {q3}, [r1, :64]! + veor q3, q0, q3 + vst1.8 {q3}, [r0, :64]! + + adds r6, r6, #1 @ increment BE ctr + rev ip, r6 + vmov s27, ip + bcs .Lctrcarry + teq r4, #0 + bne .Lctrloop +.Lctrout: + vst1.8 {q6}, [r5] + pop {r4-r6, pc} + +.Lctrhalfblock: + vld1.8 {d1}, [r1, :64] + veor d0, d0, d1 + vst1.8 {d0}, [r0, :64] + pop {r4-r6, pc} + +.Lctrcarry: + .irp sreg, s26, s25, s24 + vmov ip, \sreg @ load next word of ctr + rev ip, ip @ ... to handle the carry + adds ip, ip, #1 + rev ip, ip + vmov \sreg, ip + bcc 0f + .endr +0: teq r4, #0 + beq .Lctrout + b .Lctrloop +ENDPROC(ce_aes_ctr_encrypt) + + /* + * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, + * int blocks, u8 iv[], u8 const rk2[], int first) + * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, + * int blocks, u8 iv[], u8 const rk2[], int first) + */ + + .macro next_tweak, out, in, const, tmp + vshr.s64 \tmp, \in, #63 + vand \tmp, \tmp, \const + vadd.u64 \out, \in, \in + vext.8 \tmp, \tmp, \tmp, #8 + veor \out, \out, \tmp + .endm + + .align 3 +.Lxts_mul_x: + .quad 1, 0x87 + +ce_aes_xts_init: + vldr d14, .Lxts_mul_x + vldr d15, .Lxts_mul_x + 8 + + ldrd r4, r5, [sp, #16] @ load args + ldr r6, [sp, #28] + vld1.8 {q0}, [r5] @ load iv + teq r6, #1 @ start of a block? + bxne lr + + @ Encrypt the IV in q0 with the second AES key. This should only + @ be done at the start of a block. + ldr r6, [sp, #24] @ load AES key 2 + prepare_key r6, r3 + add ip, r6, #32 @ 3rd round key of key 2 + b .Laes_encrypt_tweak @ tail call +ENDPROC(ce_aes_xts_init) + +ENTRY(ce_aes_xts_encrypt) + push {r4-r6, lr} + + bl ce_aes_xts_init @ run shared prologue + prepare_key r2, r3 + vmov q3, q0 + + teq r6, #0 @ start of a block? + bne .Lxtsenc3x + +.Lxtsencloop3x: + next_tweak q3, q3, q7, q6 +.Lxtsenc3x: + subs r4, r4, #3 + bmi .Lxtsenc1x + vld1.8 {q0-q1}, [r1, :64]! @ get 3 pt blocks + vld1.8 {q2}, [r1, :64]! + next_tweak q4, q3, q7, q6 + veor q0, q0, q3 + next_tweak q5, q4, q7, q6 + veor q1, q1, q4 + veor q2, q2, q5 + bl aes_encrypt_3x + veor q0, q0, q3 + veor q1, q1, q4 + veor q2, q2, q5 + vst1.8 {q0-q1}, [r0, :64]! @ write 3 ct blocks + vst1.8 {q2}, [r0, :64]! + vmov q3, q5 + teq r4, #0 + beq .Lxtsencout + b .Lxtsencloop3x +.Lxtsenc1x: + adds r4, r4, #3 + beq .Lxtsencout +.Lxtsencloop: + vld1.8 {q0}, [r1, :64]! + veor q0, q0, q3 + bl aes_encrypt + veor q0, q0, q3 + vst1.8 {q0}, [r0, :64]! + subs r4, r4, #1 + beq .Lxtsencout + next_tweak q3, q3, q7, q6 + b .Lxtsencloop +.Lxtsencout: + vst1.8 {q3}, [r5] + pop {r4-r6, pc} +ENDPROC(ce_aes_xts_encrypt) + + +ENTRY(ce_aes_xts_decrypt) + push {r4-r6, lr} + + bl ce_aes_xts_init @ run shared prologue + prepare_key r2, r3 + vmov q3, q0 + + teq r6, #0 @ start of a block? + bne .Lxtsdec3x + +.Lxtsdecloop3x: + next_tweak q3, q3, q7, q6 +.Lxtsdec3x: + subs r4, r4, #3 + bmi .Lxtsdec1x + vld1.8 {q0-q1}, [r1, :64]! @ get 3 ct blocks + vld1.8 {q2}, [r1, :64]! + next_tweak q4, q3, q7, q6 + veor q0, q0, q3 + next_tweak q5, q4, q7, q6 + veor q1, q1, q4 + veor q2, q2, q5 + bl aes_decrypt_3x + veor q0, q0, q3 + veor q1, q1, q4 + veor q2, q2, q5 + vst1.8 {q0-q1}, [r0, :64]! @ write 3 pt blocks + vst1.8 {q2}, [r0, :64]! + vmov q3, q5 + teq r4, #0 + beq .Lxtsdecout + b .Lxtsdecloop3x +.Lxtsdec1x: + adds r4, r4, #3 + beq .Lxtsdecout +.Lxtsdecloop: + vld1.8 {q0}, [r1, :64]! + veor q0, q0, q3 + add ip, r2, #32 @ 3rd round key + bl aes_decrypt + veor q0, q0, q3 + vst1.8 {q0}, [r0, :64]! + subs r4, r4, #1 + beq .Lxtsdecout + next_tweak q3, q3, q7, q6 + b .Lxtsdecloop +.Lxtsdecout: + vst1.8 {q3}, [r5] + pop {r4-r6, pc} +ENDPROC(ce_aes_xts_decrypt) + + /* + * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the + * AES sbox substitution on each byte in + * 'input' + */ +ENTRY(ce_aes_sub) + vdup.32 q1, r0 + veor q0, q0, q0 + aese.8 q0, q1 + vmov r0, s0 + bx lr +ENDPROC(ce_aes_sub) + + /* + * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns + * operation on round key *src + */ +ENTRY(ce_aes_invert) + vld1.8 {q0}, [r1] + aesimc.8 q0, q0 + vst1.8 {q0}, [r0] + bx lr +ENDPROC(ce_aes_invert) diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c new file mode 100644 index 000000000000..d2ee59157ec7 --- /dev/null +++ b/arch/arm/crypto/aes-ce-glue.c @@ -0,0 +1,520 @@ +/* + * aes-ce-glue.c - wrapper code for ARMv8 AES + * + * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <asm/hwcap.h> +#include <crypto/aes.h> +#include <crypto/ablk_helper.h> +#include <crypto/algapi.h> +#include <linux/module.h> + +MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); + +/* defined in aes-ce-core.S */ +asmlinkage u32 ce_aes_sub(u32 input); +asmlinkage void ce_aes_invert(void *dst, void *src); + +asmlinkage void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks); +asmlinkage void ce_aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks); + +asmlinkage void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); +asmlinkage void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); + +asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 ctr[]); + +asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], + int rounds, int blocks, u8 iv[], + u8 const rk2[], int first); +asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], + int rounds, int blocks, u8 iv[], + u8 const rk2[], int first); + +struct aes_block { + u8 b[AES_BLOCK_SIZE]; +}; + +static int num_rounds(struct crypto_aes_ctx *ctx) +{ + /* + * # of rounds specified by AES: + * 128 bit key 10 rounds + * 192 bit key 12 rounds + * 256 bit key 14 rounds + * => n byte key => 6 + (n/4) rounds + */ + return 6 + ctx->key_length / 4; +} + +static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, + unsigned int key_len) +{ + /* + * The AES key schedule round constants + */ + static u8 const rcon[] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, + }; + + u32 kwords = key_len / sizeof(u32); + struct aes_block *key_enc, *key_dec; + int i, j; + + if (key_len != AES_KEYSIZE_128 && + key_len != AES_KEYSIZE_192 && + key_len != AES_KEYSIZE_256) + return -EINVAL; + + memcpy(ctx->key_enc, in_key, key_len); + ctx->key_length = key_len; + + kernel_neon_begin(); + for (i = 0; i < sizeof(rcon); i++) { + u32 *rki = ctx->key_enc + (i * kwords); + u32 *rko = rki + kwords; + + rko[0] = ror32(ce_aes_sub(rki[kwords - 1]), 8); + rko[0] = rko[0] ^ rki[0] ^ rcon[i]; + rko[1] = rko[0] ^ rki[1]; + rko[2] = rko[1] ^ rki[2]; + rko[3] = rko[2] ^ rki[3]; + + if (key_len == AES_KEYSIZE_192) { + if (i >= 7) + break; + rko[4] = rko[3] ^ rki[4]; + rko[5] = rko[4] ^ rki[5]; + } else if (key_len == AES_KEYSIZE_256) { + if (i >= 6) + break; + rko[4] = ce_aes_sub(rko[3]) ^ rki[4]; + rko[5] = rko[4] ^ rki[5]; + rko[6] = rko[5] ^ rki[6]; + rko[7] = rko[6] ^ rki[7]; + } + } + + /* + * Generate the decryption keys for the Equivalent Inverse Cipher. + * This involves reversing the order of the round keys, and applying + * the Inverse Mix Columns transformation on all but the first and + * the last one. + */ + key_enc = (struct aes_block *)ctx->key_enc; + key_dec = (struct aes_block *)ctx->key_dec; + j = num_rounds(ctx); + + key_dec[0] = key_enc[j]; + for (i = 1, j--; j > 0; i++, j--) + ce_aes_invert(key_dec + i, key_enc + j); + key_dec[i] = key_enc[0]; + + kernel_neon_end(); + return 0; +} + +static int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + int ret; + + ret = ce_aes_expandkey(ctx, in_key, key_len); + if (!ret) + return 0; + + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; +} + +struct crypto_aes_xts_ctx { + struct crypto_aes_ctx key1; + struct crypto_aes_ctx __aligned(8) key2; +}; + +static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + int ret; + + ret = ce_aes_expandkey(&ctx->key1, in_key, key_len / 2); + if (!ret) + ret = ce_aes_expandkey(&ctx->key2, &in_key[key_len / 2], + key_len / 2); + if (!ret) + return 0; + + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + unsigned int blocks; + int err; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + ce_aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_enc, num_rounds(ctx), blocks); + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + kernel_neon_end(); + return err; +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + unsigned int blocks; + int err; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_dec, num_rounds(ctx), blocks); + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + kernel_neon_end(); + return err; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + unsigned int blocks; + int err; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_enc, num_rounds(ctx), blocks, + walk.iv); + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + kernel_neon_end(); + return err; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + unsigned int blocks; + int err; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_dec, num_rounds(ctx), blocks, + walk.iv); + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + kernel_neon_end(); + return err; +} + +static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err, blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); + + kernel_neon_begin(); + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + ce_aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_enc, num_rounds(ctx), blocks, + walk.iv); + nbytes -= blocks * AES_BLOCK_SIZE; + if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE) + break; + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + if (nbytes) { + u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; + u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; + u8 __aligned(8) tail[AES_BLOCK_SIZE]; + + /* + * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need + * to tell aes_ctr_encrypt() to only read half a block. + */ + blocks = (nbytes <= 8) ? -1 : 1; + + ce_aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, + num_rounds(ctx), blocks, walk.iv); + memcpy(tdst, tail, nbytes); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + + return err; +} + +static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = num_rounds(&ctx->key1); + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key1.key_enc, rounds, blocks, + walk.iv, (u8 *)ctx->key2.key_enc, first); + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = num_rounds(&ctx->key1); + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key1.key_dec, rounds, blocks, + walk.iv, (u8 *)ctx->key2.key_enc, first); + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static struct crypto_alg aes_algs[] = { { + .cra_name = "__ecb-aes-ce", + .cra_driver_name = "__driver-ecb-aes-ce", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ce_aes_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, +}, { + .cra_name = "__cbc-aes-ce", + .cra_driver_name = "__driver-cbc-aes-ce", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ce_aes_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, +}, { + .cra_name = "__ctr-aes-ce", + .cra_driver_name = "__driver-ctr-aes-ce", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ce_aes_setkey, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, + }, +}, { + .cra_name = "__xts-aes-ce", + .cra_driver_name = "__driver-xts-aes-ce", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_set_key, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + }, +}, { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +} }; + +static int __init aes_init(void) +{ + if (!(elf_hwcap2 & HWCAP2_AES)) + return -ENODEV; + return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs)); +} + +static void __exit aes_exit(void) +{ + crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs)); +} + +module_init(aes_init); +module_exit(aes_exit); -- 1.8.3.2
WARNING: multiple messages have this Message-ID (diff)
From: ard.biesheuvel@linaro.org (Ard Biesheuvel) To: linux-arm-kernel@lists.infradead.org Subject: [PATCH v2 4/5] crypto/arm: AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions Date: Tue, 10 Mar 2015 09:47:47 +0100 [thread overview] Message-ID: <1425977268-27835-5-git-send-email-ard.biesheuvel@linaro.org> (raw) In-Reply-To: <1425977268-27835-1-git-send-email-ard.biesheuvel@linaro.org> This implements the ECB, CBC, CTR and XTS asynchronous block ciphers using the AArch32 versions of the ARMv8 Crypto Extensions for AES. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> --- arch/arm/crypto/Kconfig | 9 + arch/arm/crypto/Makefile | 2 + arch/arm/crypto/aes-ce-core.S | 518 +++++++++++++++++++++++++++++++++++++++++ arch/arm/crypto/aes-ce-glue.c | 520 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 1049 insertions(+) create mode 100644 arch/arm/crypto/aes-ce-core.S create mode 100644 arch/arm/crypto/aes-ce-glue.c diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 9c1478e55a40..63588bdf3b5d 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -101,4 +101,13 @@ config CRYPTO_AES_ARM_BS This implementation does not rely on any lookup tables so it is believed to be invulnerable to cache timing attacks. +config CRYPTO_AES_ARM_CE + tristate "Accelerated AES using ARMv8 Crypto Extensions" + depends on KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_ABLK_HELPER + help + Use an implementation of AES in CBC, CTR and XTS modes that uses + ARMv8 Crypto Extensions + endif diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 4ea9f96c2782..2514c420e8d3 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o +obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o @@ -17,6 +18,7 @@ sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o +aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $(<) > $(@) diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S new file mode 100644 index 000000000000..8cfa468ee570 --- /dev/null +++ b/arch/arm/crypto/aes-ce-core.S @@ -0,0 +1,518 @@ +/* + * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions + * + * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .fpu crypto-neon-fp-armv8 + .align 3 + + .macro enc_round, state, key + aese.8 \state, \key + aesmc.8 \state, \state + .endm + + .macro dec_round, state, key + aesd.8 \state, \key + aesimc.8 \state, \state + .endm + + .macro enc_dround, key1, key2 + enc_round q0, \key1 + enc_round q0, \key2 + .endm + + .macro dec_dround, key1, key2 + dec_round q0, \key1 + dec_round q0, \key2 + .endm + + .macro enc_fround, key1, key2, key3 + enc_round q0, \key1 + aese.8 q0, \key2 + veor q0, q0, \key3 + .endm + + .macro dec_fround, key1, key2, key3 + dec_round q0, \key1 + aesd.8 q0, \key2 + veor q0, q0, \key3 + .endm + + .macro enc_dround_3x, key1, key2 + enc_round q0, \key1 + enc_round q1, \key1 + enc_round q2, \key1 + enc_round q0, \key2 + enc_round q1, \key2 + enc_round q2, \key2 + .endm + + .macro dec_dround_3x, key1, key2 + dec_round q0, \key1 + dec_round q1, \key1 + dec_round q2, \key1 + dec_round q0, \key2 + dec_round q1, \key2 + dec_round q2, \key2 + .endm + + .macro enc_fround_3x, key1, key2, key3 + enc_round q0, \key1 + enc_round q1, \key1 + enc_round q2, \key1 + aese.8 q0, \key2 + aese.8 q1, \key2 + aese.8 q2, \key2 + veor q0, q0, \key3 + veor q1, q1, \key3 + veor q2, q2, \key3 + .endm + + .macro dec_fround_3x, key1, key2, key3 + dec_round q0, \key1 + dec_round q1, \key1 + dec_round q2, \key1 + aesd.8 q0, \key2 + aesd.8 q1, \key2 + aesd.8 q2, \key2 + veor q0, q0, \key3 + veor q1, q1, \key3 + veor q2, q2, \key3 + .endm + + .macro do_block, dround, fround + cmp r3, #12 @ which key size? + vld1.8 {q10-q11}, [ip]! + \dround q8, q9 + vld1.8 {q12-q13}, [ip]! + \dround q10, q11 + vld1.8 {q10-q11}, [ip]! + \dround q12, q13 + vld1.8 {q12-q13}, [ip]! + \dround q10, q11 + blo 0f @ AES-128: 10 rounds + vld1.8 {q10-q11}, [ip]! + beq 1f @ AES-192: 12 rounds + \dround q12, q13 + vld1.8 {q12-q13}, [ip] + \dround q10, q11 +0: \fround q12, q13, q14 + bx lr + +1: \dround q12, q13 + \fround q10, q11, q14 + bx lr + .endm + + /* + * Internal, non-AAPCS compliant functions that implement the core AES + * transforms. These should preserve all registers except q0 - q2 and ip + * Arguments: + * q0 : first in/output block + * q1 : second in/output block (_3x version only) + * q2 : third in/output block (_3x version only) + * q8 : first round key + * q9 : secound round key + * ip : address of 3rd round key + * q14 : final round key + * r3 : number of rounds + */ + .align 6 +aes_encrypt: + add ip, r2, #32 @ 3rd round key +.Laes_encrypt_tweak: + do_block enc_dround, enc_fround +ENDPROC(aes_encrypt) + + .align 6 +aes_decrypt: + add ip, r2, #32 @ 3rd round key + do_block dec_dround, dec_fround +ENDPROC(aes_decrypt) + + .align 6 +aes_encrypt_3x: + add ip, r2, #32 @ 3rd round key + do_block enc_dround_3x, enc_fround_3x +ENDPROC(aes_encrypt_3x) + + .align 6 +aes_decrypt_3x: + add ip, r2, #32 @ 3rd round key + do_block dec_dround_3x, dec_fround_3x +ENDPROC(aes_decrypt_3x) + + .macro prepare_key, rk, rounds + add ip, \rk, \rounds, lsl #4 + vld1.8 {q8-q9}, [\rk] @ load first 2 round keys + vld1.8 {q14}, [ip] @ load last round key + .endm + + /* + * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks) + * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks) + */ +ENTRY(ce_aes_ecb_encrypt) + push {r4, lr} + ldr r4, [sp, #8] + prepare_key r2, r3 +.Lecbencloop3x: + subs r4, r4, #3 + bmi .Lecbenc1x + vld1.8 {q0-q1}, [r1, :64]! + vld1.8 {q2}, [r1, :64]! + bl aes_encrypt_3x + vst1.8 {q0-q1}, [r0, :64]! + vst1.8 {q2}, [r0, :64]! + b .Lecbencloop3x +.Lecbenc1x: + adds r4, r4, #3 + beq .Lecbencout +.Lecbencloop: + vld1.8 {q0}, [r1, :64]! + bl aes_encrypt + vst1.8 {q0}, [r0, :64]! + subs r4, r4, #1 + bne .Lecbencloop +.Lecbencout: + pop {r4, pc} +ENDPROC(ce_aes_ecb_encrypt) + +ENTRY(ce_aes_ecb_decrypt) + push {r4, lr} + ldr r4, [sp, #8] + prepare_key r2, r3 +.Lecbdecloop3x: + subs r4, r4, #3 + bmi .Lecbdec1x + vld1.8 {q0-q1}, [r1, :64]! + vld1.8 {q2}, [r1, :64]! + bl aes_decrypt_3x + vst1.8 {q0-q1}, [r0, :64]! + vst1.8 {q2}, [r0, :64]! + b .Lecbdecloop3x +.Lecbdec1x: + adds r4, r4, #3 + beq .Lecbdecout +.Lecbdecloop: + vld1.8 {q0}, [r1, :64]! + bl aes_decrypt + vst1.8 {q0}, [r0, :64]! + subs r4, r4, #1 + bne .Lecbdecloop +.Lecbdecout: + pop {r4, pc} +ENDPROC(ce_aes_ecb_decrypt) + + /* + * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[]) + * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[]) + */ +ENTRY(ce_aes_cbc_encrypt) + push {r4-r6, lr} + ldrd r4, r5, [sp, #16] + vld1.8 {q0}, [r5] + prepare_key r2, r3 +.Lcbcencloop: + vld1.8 {q1}, [r1, :64]! @ get next pt block + veor q0, q0, q1 @ ..and xor with iv + bl aes_encrypt + vst1.8 {q0}, [r0, :64]! + subs r4, r4, #1 + bne .Lcbcencloop + vst1.8 {q0}, [r5] + pop {r4-r6, pc} +ENDPROC(ce_aes_cbc_encrypt) + +ENTRY(ce_aes_cbc_decrypt) + push {r4-r6, lr} + ldrd r4, r5, [sp, #16] + vld1.8 {q6}, [r5] @ keep iv in q6 + prepare_key r2, r3 +.Lcbcdecloop3x: + subs r4, r4, #3 + bmi .Lcbcdec1x + vld1.8 {q0-q1}, [r1, :64]! + vld1.8 {q2}, [r1, :64]! + vmov q3, q0 + vmov q4, q1 + vmov q5, q2 + bl aes_decrypt_3x + veor q0, q0, q6 + veor q1, q1, q3 + veor q2, q2, q4 + vmov q6, q5 + vst1.8 {q0-q1}, [r0, :64]! + vst1.8 {q2}, [r0, :64]! + b .Lcbcdecloop3x +.Lcbcdec1x: + adds r4, r4, #3 + beq .Lcbcdecout + vmov q15, q14 @ preserve last round key +.Lcbcdecloop: + vld1.8 {q0}, [r1, :64]! @ get next ct block + veor q14, q15, q6 @ combine prev ct with last key + vmov q6, q0 + bl aes_decrypt + vst1.8 {q0}, [r0, :64]! + subs r4, r4, #1 + bne .Lcbcdecloop +.Lcbcdecout: + vst1.8 {q6}, [r5] @ keep iv in q6 + pop {r4-r6, pc} +ENDPROC(ce_aes_cbc_decrypt) + + /* + * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 ctr[]) + */ +ENTRY(ce_aes_ctr_encrypt) + push {r4-r6, lr} + ldrd r4, r5, [sp, #16] + vld1.8 {q6}, [r5] @ load ctr + prepare_key r2, r3 + vmov r6, s27 @ keep swabbed ctr in r6 + rev r6, r6 + cmn r6, r4 @ 32 bit overflow? + bcs .Lctrloop +.Lctrloop3x: + subs r4, r4, #3 + bmi .Lctr1x + add r6, r6, #1 + vmov q0, q6 + vmov q1, q6 + rev ip, r6 + add r6, r6, #1 + vmov q2, q6 + vmov s7, ip + rev ip, r6 + add r6, r6, #1 + vmov s11, ip + vld1.8 {q3-q4}, [r1, :64]! + vld1.8 {q5}, [r1, :64]! + bl aes_encrypt_3x + veor q0, q0, q3 + veor q1, q1, q4 + veor q2, q2, q5 + rev ip, r6 + vst1.8 {q0-q1}, [r0, :64]! + vst1.8 {q2}, [r0, :64]! + vmov s27, ip + b .Lctrloop3x +.Lctr1x: + adds r4, r4, #3 + beq .Lctrout +.Lctrloop: + vmov q0, q6 + bl aes_encrypt + subs r4, r4, #1 + bmi .Lctrhalfblock @ blocks < 0 means 1/2 block + vld1.8 {q3}, [r1, :64]! + veor q3, q0, q3 + vst1.8 {q3}, [r0, :64]! + + adds r6, r6, #1 @ increment BE ctr + rev ip, r6 + vmov s27, ip + bcs .Lctrcarry + teq r4, #0 + bne .Lctrloop +.Lctrout: + vst1.8 {q6}, [r5] + pop {r4-r6, pc} + +.Lctrhalfblock: + vld1.8 {d1}, [r1, :64] + veor d0, d0, d1 + vst1.8 {d0}, [r0, :64] + pop {r4-r6, pc} + +.Lctrcarry: + .irp sreg, s26, s25, s24 + vmov ip, \sreg @ load next word of ctr + rev ip, ip @ ... to handle the carry + adds ip, ip, #1 + rev ip, ip + vmov \sreg, ip + bcc 0f + .endr +0: teq r4, #0 + beq .Lctrout + b .Lctrloop +ENDPROC(ce_aes_ctr_encrypt) + + /* + * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, + * int blocks, u8 iv[], u8 const rk2[], int first) + * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, + * int blocks, u8 iv[], u8 const rk2[], int first) + */ + + .macro next_tweak, out, in, const, tmp + vshr.s64 \tmp, \in, #63 + vand \tmp, \tmp, \const + vadd.u64 \out, \in, \in + vext.8 \tmp, \tmp, \tmp, #8 + veor \out, \out, \tmp + .endm + + .align 3 +.Lxts_mul_x: + .quad 1, 0x87 + +ce_aes_xts_init: + vldr d14, .Lxts_mul_x + vldr d15, .Lxts_mul_x + 8 + + ldrd r4, r5, [sp, #16] @ load args + ldr r6, [sp, #28] + vld1.8 {q0}, [r5] @ load iv + teq r6, #1 @ start of a block? + bxne lr + + @ Encrypt the IV in q0 with the second AES key. This should only + @ be done at the start of a block. + ldr r6, [sp, #24] @ load AES key 2 + prepare_key r6, r3 + add ip, r6, #32 @ 3rd round key of key 2 + b .Laes_encrypt_tweak @ tail call +ENDPROC(ce_aes_xts_init) + +ENTRY(ce_aes_xts_encrypt) + push {r4-r6, lr} + + bl ce_aes_xts_init @ run shared prologue + prepare_key r2, r3 + vmov q3, q0 + + teq r6, #0 @ start of a block? + bne .Lxtsenc3x + +.Lxtsencloop3x: + next_tweak q3, q3, q7, q6 +.Lxtsenc3x: + subs r4, r4, #3 + bmi .Lxtsenc1x + vld1.8 {q0-q1}, [r1, :64]! @ get 3 pt blocks + vld1.8 {q2}, [r1, :64]! + next_tweak q4, q3, q7, q6 + veor q0, q0, q3 + next_tweak q5, q4, q7, q6 + veor q1, q1, q4 + veor q2, q2, q5 + bl aes_encrypt_3x + veor q0, q0, q3 + veor q1, q1, q4 + veor q2, q2, q5 + vst1.8 {q0-q1}, [r0, :64]! @ write 3 ct blocks + vst1.8 {q2}, [r0, :64]! + vmov q3, q5 + teq r4, #0 + beq .Lxtsencout + b .Lxtsencloop3x +.Lxtsenc1x: + adds r4, r4, #3 + beq .Lxtsencout +.Lxtsencloop: + vld1.8 {q0}, [r1, :64]! + veor q0, q0, q3 + bl aes_encrypt + veor q0, q0, q3 + vst1.8 {q0}, [r0, :64]! + subs r4, r4, #1 + beq .Lxtsencout + next_tweak q3, q3, q7, q6 + b .Lxtsencloop +.Lxtsencout: + vst1.8 {q3}, [r5] + pop {r4-r6, pc} +ENDPROC(ce_aes_xts_encrypt) + + +ENTRY(ce_aes_xts_decrypt) + push {r4-r6, lr} + + bl ce_aes_xts_init @ run shared prologue + prepare_key r2, r3 + vmov q3, q0 + + teq r6, #0 @ start of a block? + bne .Lxtsdec3x + +.Lxtsdecloop3x: + next_tweak q3, q3, q7, q6 +.Lxtsdec3x: + subs r4, r4, #3 + bmi .Lxtsdec1x + vld1.8 {q0-q1}, [r1, :64]! @ get 3 ct blocks + vld1.8 {q2}, [r1, :64]! + next_tweak q4, q3, q7, q6 + veor q0, q0, q3 + next_tweak q5, q4, q7, q6 + veor q1, q1, q4 + veor q2, q2, q5 + bl aes_decrypt_3x + veor q0, q0, q3 + veor q1, q1, q4 + veor q2, q2, q5 + vst1.8 {q0-q1}, [r0, :64]! @ write 3 pt blocks + vst1.8 {q2}, [r0, :64]! + vmov q3, q5 + teq r4, #0 + beq .Lxtsdecout + b .Lxtsdecloop3x +.Lxtsdec1x: + adds r4, r4, #3 + beq .Lxtsdecout +.Lxtsdecloop: + vld1.8 {q0}, [r1, :64]! + veor q0, q0, q3 + add ip, r2, #32 @ 3rd round key + bl aes_decrypt + veor q0, q0, q3 + vst1.8 {q0}, [r0, :64]! + subs r4, r4, #1 + beq .Lxtsdecout + next_tweak q3, q3, q7, q6 + b .Lxtsdecloop +.Lxtsdecout: + vst1.8 {q3}, [r5] + pop {r4-r6, pc} +ENDPROC(ce_aes_xts_decrypt) + + /* + * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the + * AES sbox substitution on each byte in + * 'input' + */ +ENTRY(ce_aes_sub) + vdup.32 q1, r0 + veor q0, q0, q0 + aese.8 q0, q1 + vmov r0, s0 + bx lr +ENDPROC(ce_aes_sub) + + /* + * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns + * operation on round key *src + */ +ENTRY(ce_aes_invert) + vld1.8 {q0}, [r1] + aesimc.8 q0, q0 + vst1.8 {q0}, [r0] + bx lr +ENDPROC(ce_aes_invert) diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c new file mode 100644 index 000000000000..d2ee59157ec7 --- /dev/null +++ b/arch/arm/crypto/aes-ce-glue.c @@ -0,0 +1,520 @@ +/* + * aes-ce-glue.c - wrapper code for ARMv8 AES + * + * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <asm/hwcap.h> +#include <crypto/aes.h> +#include <crypto/ablk_helper.h> +#include <crypto/algapi.h> +#include <linux/module.h> + +MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); + +/* defined in aes-ce-core.S */ +asmlinkage u32 ce_aes_sub(u32 input); +asmlinkage void ce_aes_invert(void *dst, void *src); + +asmlinkage void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks); +asmlinkage void ce_aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks); + +asmlinkage void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); +asmlinkage void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[]); + +asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 ctr[]); + +asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], + int rounds, int blocks, u8 iv[], + u8 const rk2[], int first); +asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], + int rounds, int blocks, u8 iv[], + u8 const rk2[], int first); + +struct aes_block { + u8 b[AES_BLOCK_SIZE]; +}; + +static int num_rounds(struct crypto_aes_ctx *ctx) +{ + /* + * # of rounds specified by AES: + * 128 bit key 10 rounds + * 192 bit key 12 rounds + * 256 bit key 14 rounds + * => n byte key => 6 + (n/4) rounds + */ + return 6 + ctx->key_length / 4; +} + +static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, + unsigned int key_len) +{ + /* + * The AES key schedule round constants + */ + static u8 const rcon[] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, + }; + + u32 kwords = key_len / sizeof(u32); + struct aes_block *key_enc, *key_dec; + int i, j; + + if (key_len != AES_KEYSIZE_128 && + key_len != AES_KEYSIZE_192 && + key_len != AES_KEYSIZE_256) + return -EINVAL; + + memcpy(ctx->key_enc, in_key, key_len); + ctx->key_length = key_len; + + kernel_neon_begin(); + for (i = 0; i < sizeof(rcon); i++) { + u32 *rki = ctx->key_enc + (i * kwords); + u32 *rko = rki + kwords; + + rko[0] = ror32(ce_aes_sub(rki[kwords - 1]), 8); + rko[0] = rko[0] ^ rki[0] ^ rcon[i]; + rko[1] = rko[0] ^ rki[1]; + rko[2] = rko[1] ^ rki[2]; + rko[3] = rko[2] ^ rki[3]; + + if (key_len == AES_KEYSIZE_192) { + if (i >= 7) + break; + rko[4] = rko[3] ^ rki[4]; + rko[5] = rko[4] ^ rki[5]; + } else if (key_len == AES_KEYSIZE_256) { + if (i >= 6) + break; + rko[4] = ce_aes_sub(rko[3]) ^ rki[4]; + rko[5] = rko[4] ^ rki[5]; + rko[6] = rko[5] ^ rki[6]; + rko[7] = rko[6] ^ rki[7]; + } + } + + /* + * Generate the decryption keys for the Equivalent Inverse Cipher. + * This involves reversing the order of the round keys, and applying + * the Inverse Mix Columns transformation on all but the first and + * the last one. + */ + key_enc = (struct aes_block *)ctx->key_enc; + key_dec = (struct aes_block *)ctx->key_dec; + j = num_rounds(ctx); + + key_dec[0] = key_enc[j]; + for (i = 1, j--; j > 0; i++, j--) + ce_aes_invert(key_dec + i, key_enc + j); + key_dec[i] = key_enc[0]; + + kernel_neon_end(); + return 0; +} + +static int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + int ret; + + ret = ce_aes_expandkey(ctx, in_key, key_len); + if (!ret) + return 0; + + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; +} + +struct crypto_aes_xts_ctx { + struct crypto_aes_ctx key1; + struct crypto_aes_ctx __aligned(8) key2; +}; + +static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + int ret; + + ret = ce_aes_expandkey(&ctx->key1, in_key, key_len / 2); + if (!ret) + ret = ce_aes_expandkey(&ctx->key2, &in_key[key_len / 2], + key_len / 2); + if (!ret) + return 0; + + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + unsigned int blocks; + int err; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + ce_aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_enc, num_rounds(ctx), blocks); + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + kernel_neon_end(); + return err; +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + unsigned int blocks; + int err; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_dec, num_rounds(ctx), blocks); + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + kernel_neon_end(); + return err; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + unsigned int blocks; + int err; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_enc, num_rounds(ctx), blocks, + walk.iv); + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + kernel_neon_end(); + return err; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + unsigned int blocks; + int err; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_dec, num_rounds(ctx), blocks, + walk.iv); + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + kernel_neon_end(); + return err; +} + +static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct blkcipher_walk walk; + int err, blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); + + kernel_neon_begin(); + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + ce_aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_enc, num_rounds(ctx), blocks, + walk.iv); + nbytes -= blocks * AES_BLOCK_SIZE; + if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE) + break; + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + if (nbytes) { + u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; + u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; + u8 __aligned(8) tail[AES_BLOCK_SIZE]; + + /* + * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need + * to tell aes_ctr_encrypt() to only read half a block. + */ + blocks = (nbytes <= 8) ? -1 : 1; + + ce_aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, + num_rounds(ctx), blocks, walk.iv); + memcpy(tdst, tail, nbytes); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + + return err; +} + +static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = num_rounds(&ctx->key1); + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key1.key_enc, rounds, blocks, + walk.iv, (u8 *)ctx->key2.key_enc, first); + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = num_rounds(&ctx->key1); + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key1.key_dec, rounds, blocks, + walk.iv, (u8 *)ctx->key2.key_enc, first); + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + kernel_neon_end(); + + return err; +} + +static struct crypto_alg aes_algs[] = { { + .cra_name = "__ecb-aes-ce", + .cra_driver_name = "__driver-ecb-aes-ce", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ce_aes_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, +}, { + .cra_name = "__cbc-aes-ce", + .cra_driver_name = "__driver-cbc-aes-ce", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ce_aes_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, +}, { + .cra_name = "__ctr-aes-ce", + .cra_driver_name = "__driver-ctr-aes-ce", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ce_aes_setkey, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, + }, +}, { + .cra_name = "__xts-aes-ce", + .cra_driver_name = "__driver-xts-aes-ce", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_set_key, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + }, +}, { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +} }; + +static int __init aes_init(void) +{ + if (!(elf_hwcap2 & HWCAP2_AES)) + return -ENODEV; + return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs)); +} + +static void __exit aes_exit(void) +{ + crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs)); +} + +module_init(aes_init); +module_exit(aes_exit); -- 1.8.3.2
next prev parent reply other threads:[~2015-03-10 8:48 UTC|newest] Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top 2015-03-10 8:47 [PATCH v2 0/5] ARM: crypto: ARMv8 Crypto Extensions Ard Biesheuvel 2015-03-10 8:47 ` Ard Biesheuvel 2015-03-10 8:47 ` [PATCH v2 1/5] crypto/arm: move ARM specific Kconfig definitions to a dedicated file Ard Biesheuvel 2015-03-10 8:47 ` Ard Biesheuvel 2015-03-10 8:47 ` [PATCH v2 2/5] crypto/arm: add support for SHA1 using ARMv8 Crypto Instructions Ard Biesheuvel 2015-03-10 8:47 ` Ard Biesheuvel 2015-03-10 8:47 ` [PATCH v2 3/5] crypto/arm: add support for SHA-224/256 using ARMv8 Crypto Extensions Ard Biesheuvel 2015-03-10 8:47 ` Ard Biesheuvel 2015-03-10 8:47 ` Ard Biesheuvel [this message] 2015-03-10 8:47 ` [PATCH v2 4/5] crypto/arm: AES in ECB/CBC/CTR/XTS modes " Ard Biesheuvel 2015-03-10 8:47 ` [PATCH v2 5/5] crypto/arm: add support for GHASH " Ard Biesheuvel 2015-03-10 8:47 ` Ard Biesheuvel 2015-03-12 10:19 ` [PATCH v2 0/5] ARM: crypto: " Herbert Xu 2015-03-12 10:19 ` Herbert Xu
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=1425977268-27835-5-git-send-email-ard.biesheuvel@linaro.org \ --to=ard.biesheuvel@linaro.org \ --cc=herbert@gondor.apana.org.au \ --cc=jussi.kivilinna@iki.fi \ --cc=linux-arm-kernel@lists.infradead.org \ --cc=linux-crypto@vger.kernel.org \ --cc=linux@arm.linux.org.uk \ --cc=nico@linaro.org \ --cc=will.deacon@arm.com \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.