* [PATCH v2 01/17] crypto: arm/aes - fix round key prototypes
2019-09-03 16:43 [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates Ard Biesheuvel
@ 2019-09-03 16:43 ` Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 02/17] crypto: arm/aes-ce - yield the SIMD unit between scatterwalk steps Ard Biesheuvel
` (16 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Ard Biesheuvel @ 2019-09-03 16:43 UTC (permalink / raw)
To: linux-crypto; +Cc: herbert, ebiggers, Ard Biesheuvel
The AES round keys are arrays of u32s in native endianness now, so
update the function prototypes accordingly.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm/crypto/aes-ce-core.S | 18 ++++-----
arch/arm/crypto/aes-ce-glue.c | 40 ++++++++++----------
2 files changed, 29 insertions(+), 29 deletions(-)
diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S
index 425000232d49..1e0d45183590 100644
--- a/arch/arm/crypto/aes-ce-core.S
+++ b/arch/arm/crypto/aes-ce-core.S
@@ -154,9 +154,9 @@ ENDPROC(aes_decrypt_3x)
.endm
/*
- * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
* int blocks)
- * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
* int blocks)
*/
ENTRY(ce_aes_ecb_encrypt)
@@ -212,9 +212,9 @@ ENTRY(ce_aes_ecb_decrypt)
ENDPROC(ce_aes_ecb_decrypt)
/*
- * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
* int blocks, u8 iv[])
- * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
* int blocks, u8 iv[])
*/
ENTRY(ce_aes_cbc_encrypt)
@@ -272,7 +272,7 @@ ENTRY(ce_aes_cbc_decrypt)
ENDPROC(ce_aes_cbc_decrypt)
/*
- * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
* int blocks, u8 ctr[])
*/
ENTRY(ce_aes_ctr_encrypt)
@@ -349,10 +349,10 @@ ENTRY(ce_aes_ctr_encrypt)
ENDPROC(ce_aes_ctr_encrypt)
/*
- * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
- * int blocks, u8 iv[], u8 const rk2[], int first)
- * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
- * int blocks, u8 iv[], u8 const rk2[], int first)
+ * aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
+ * int blocks, u8 iv[], u32 const rk2[], int first)
+ * aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
+ * int blocks, u8 iv[], u32 const rk2[], int first)
*/
.macro next_tweak, out, in, const, tmp
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index a7265d0a7063..75d2ff03a63e 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -25,25 +25,25 @@ MODULE_LICENSE("GPL v2");
asmlinkage u32 ce_aes_sub(u32 input);
asmlinkage void ce_aes_invert(void *dst, void *src);
-asmlinkage void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks);
-asmlinkage void ce_aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void ce_aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks);
-asmlinkage void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks, u8 iv[]);
-asmlinkage void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks, u8 iv[]);
-asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks, u8 ctr[]);
-asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
+asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
int rounds, int blocks, u8 iv[],
- u8 const rk2[], int first);
-asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
+ u32 const rk2[], int first);
+asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[],
int rounds, int blocks, u8 iv[],
- u8 const rk2[], int first);
+ u32 const rk2[], int first);
struct aes_block {
u8 b[AES_BLOCK_SIZE];
@@ -182,7 +182,7 @@ static int ecb_encrypt(struct skcipher_request *req)
kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
ce_aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- (u8 *)ctx->key_enc, num_rounds(ctx), blocks);
+ ctx->key_enc, num_rounds(ctx), blocks);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
kernel_neon_end();
@@ -202,7 +202,7 @@ static int ecb_decrypt(struct skcipher_request *req)
kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- (u8 *)ctx->key_dec, num_rounds(ctx), blocks);
+ ctx->key_dec, num_rounds(ctx), blocks);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
kernel_neon_end();
@@ -222,7 +222,7 @@ static int cbc_encrypt(struct skcipher_request *req)
kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- (u8 *)ctx->key_enc, num_rounds(ctx), blocks,
+ ctx->key_enc, num_rounds(ctx), blocks,
walk.iv);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
@@ -243,7 +243,7 @@ static int cbc_decrypt(struct skcipher_request *req)
kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- (u8 *)ctx->key_dec, num_rounds(ctx), blocks,
+ ctx->key_dec, num_rounds(ctx), blocks,
walk.iv);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
@@ -263,7 +263,7 @@ static int ctr_encrypt(struct skcipher_request *req)
kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
ce_aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- (u8 *)ctx->key_enc, num_rounds(ctx), blocks,
+ ctx->key_enc, num_rounds(ctx), blocks,
walk.iv);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
@@ -278,8 +278,8 @@ static int ctr_encrypt(struct skcipher_request *req)
*/
blocks = -1;
- ce_aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc,
- num_rounds(ctx), blocks, walk.iv);
+ ce_aes_ctr_encrypt(tail, NULL, ctx->key_enc, num_rounds(ctx),
+ blocks, walk.iv);
crypto_xor_cpy(tdst, tsrc, tail, nbytes);
err = skcipher_walk_done(&walk, 0);
}
@@ -324,8 +324,8 @@ static int xts_encrypt(struct skcipher_request *req)
kernel_neon_begin();
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- (u8 *)ctx->key1.key_enc, rounds, blocks,
- walk.iv, (u8 *)ctx->key2.key_enc, first);
+ ctx->key1.key_enc, rounds, blocks, walk.iv,
+ ctx->key2.key_enc, first);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
kernel_neon_end();
@@ -346,8 +346,8 @@ static int xts_decrypt(struct skcipher_request *req)
kernel_neon_begin();
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- (u8 *)ctx->key1.key_dec, rounds, blocks,
- walk.iv, (u8 *)ctx->key2.key_enc, first);
+ ctx->key1.key_dec, rounds, blocks, walk.iv,
+ ctx->key2.key_enc, first);
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
kernel_neon_end();
--
2.17.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 02/17] crypto: arm/aes-ce - yield the SIMD unit between scatterwalk steps
2019-09-03 16:43 [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 01/17] crypto: arm/aes - fix round key prototypes Ard Biesheuvel
@ 2019-09-03 16:43 ` Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 03/17] crypto: arm/aes-ce - switch to 4x interleave Ard Biesheuvel
` (15 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Ard Biesheuvel @ 2019-09-03 16:43 UTC (permalink / raw)
To: linux-crypto; +Cc: herbert, ebiggers, Ard Biesheuvel
Reduce the scope of the kernel_neon_begin/end regions so that the SIMD
unit is released (and thus preemption re-enabled) if the crypto operation
cannot be completed in a single scatterwalk step. This avoids scheduling
blackouts due to preemption being enabled for unbounded periods, resulting
in a more responsive system.
After this change, we can also permit the cipher_walk infrastructure to
sleep, so set the 'atomic' parameter to skcipher_walk_virt() to false as
well.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm/crypto/aes-ce-glue.c | 47 ++++++++++----------
arch/arm/crypto/aes-neonbs-glue.c | 22 ++++-----
2 files changed, 34 insertions(+), 35 deletions(-)
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index 75d2ff03a63e..486e862ae34a 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -177,15 +177,15 @@ static int ecb_encrypt(struct skcipher_request *req)
unsigned int blocks;
int err;
- err = skcipher_walk_virt(&walk, req, true);
+ err = skcipher_walk_virt(&walk, req, false);
- kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+ kernel_neon_begin();
ce_aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key_enc, num_rounds(ctx), blocks);
+ kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
- kernel_neon_end();
return err;
}
@@ -197,15 +197,15 @@ static int ecb_decrypt(struct skcipher_request *req)
unsigned int blocks;
int err;
- err = skcipher_walk_virt(&walk, req, true);
+ err = skcipher_walk_virt(&walk, req, false);
- kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+ kernel_neon_begin();
ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key_dec, num_rounds(ctx), blocks);
+ kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
- kernel_neon_end();
return err;
}
@@ -217,16 +217,16 @@ static int cbc_encrypt(struct skcipher_request *req)
unsigned int blocks;
int err;
- err = skcipher_walk_virt(&walk, req, true);
+ err = skcipher_walk_virt(&walk, req, false);
- kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+ kernel_neon_begin();
ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key_enc, num_rounds(ctx), blocks,
walk.iv);
+ kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
- kernel_neon_end();
return err;
}
@@ -238,16 +238,16 @@ static int cbc_decrypt(struct skcipher_request *req)
unsigned int blocks;
int err;
- err = skcipher_walk_virt(&walk, req, true);
+ err = skcipher_walk_virt(&walk, req, false);
- kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+ kernel_neon_begin();
ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key_dec, num_rounds(ctx), blocks,
walk.iv);
+ kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
- kernel_neon_end();
return err;
}
@@ -258,13 +258,14 @@ static int ctr_encrypt(struct skcipher_request *req)
struct skcipher_walk walk;
int err, blocks;
- err = skcipher_walk_virt(&walk, req, true);
+ err = skcipher_walk_virt(&walk, req, false);
- kernel_neon_begin();
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+ kernel_neon_begin();
ce_aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key_enc, num_rounds(ctx), blocks,
walk.iv);
+ kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
if (walk.nbytes) {
@@ -278,13 +279,13 @@ static int ctr_encrypt(struct skcipher_request *req)
*/
blocks = -1;
+ kernel_neon_begin();
ce_aes_ctr_encrypt(tail, NULL, ctx->key_enc, num_rounds(ctx),
blocks, walk.iv);
+ kernel_neon_end();
crypto_xor_cpy(tdst, tsrc, tail, nbytes);
err = skcipher_walk_done(&walk, 0);
}
- kernel_neon_end();
-
return err;
}
@@ -319,17 +320,16 @@ static int xts_encrypt(struct skcipher_request *req)
struct skcipher_walk walk;
unsigned int blocks;
- err = skcipher_walk_virt(&walk, req, true);
+ err = skcipher_walk_virt(&walk, req, false);
- kernel_neon_begin();
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ kernel_neon_begin();
ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key1.key_enc, rounds, blocks, walk.iv,
ctx->key2.key_enc, first);
+ kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
- kernel_neon_end();
-
return err;
}
@@ -341,17 +341,16 @@ static int xts_decrypt(struct skcipher_request *req)
struct skcipher_walk walk;
unsigned int blocks;
- err = skcipher_walk_virt(&walk, req, true);
+ err = skcipher_walk_virt(&walk, req, false);
- kernel_neon_begin();
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ kernel_neon_begin();
ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key1.key_dec, rounds, blocks, walk.iv,
ctx->key2.key_enc, first);
+ kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
- kernel_neon_end();
-
return err;
}
diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c
index 45cd9818791e..9000d0796d5e 100644
--- a/arch/arm/crypto/aes-neonbs-glue.c
+++ b/arch/arm/crypto/aes-neonbs-glue.c
@@ -90,9 +90,8 @@ static int __ecb_crypt(struct skcipher_request *req,
struct skcipher_walk walk;
int err;
- err = skcipher_walk_virt(&walk, req, true);
+ err = skcipher_walk_virt(&walk, req, false);
- kernel_neon_begin();
while (walk.nbytes >= AES_BLOCK_SIZE) {
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
@@ -100,12 +99,13 @@ static int __ecb_crypt(struct skcipher_request *req,
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
+ kernel_neon_begin();
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
ctx->rounds, blocks);
+ kernel_neon_end();
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
- kernel_neon_end();
return err;
}
@@ -159,9 +159,8 @@ static int cbc_decrypt(struct skcipher_request *req)
struct skcipher_walk walk;
int err;
- err = skcipher_walk_virt(&walk, req, true);
+ err = skcipher_walk_virt(&walk, req, false);
- kernel_neon_begin();
while (walk.nbytes >= AES_BLOCK_SIZE) {
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
@@ -169,13 +168,14 @@ static int cbc_decrypt(struct skcipher_request *req)
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
+ kernel_neon_begin();
aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key.rk, ctx->key.rounds, blocks,
walk.iv);
+ kernel_neon_end();
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
- kernel_neon_end();
return err;
}
@@ -223,9 +223,8 @@ static int ctr_encrypt(struct skcipher_request *req)
u8 buf[AES_BLOCK_SIZE];
int err;
- err = skcipher_walk_virt(&walk, req, true);
+ err = skcipher_walk_virt(&walk, req, false);
- kernel_neon_begin();
while (walk.nbytes > 0) {
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
@@ -236,8 +235,10 @@ static int ctr_encrypt(struct skcipher_request *req)
final = NULL;
}
+ kernel_neon_begin();
aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->rk, ctx->rounds, blocks, walk.iv, final);
+ kernel_neon_end();
if (final) {
u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
@@ -252,7 +253,6 @@ static int ctr_encrypt(struct skcipher_request *req)
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
- kernel_neon_end();
return err;
}
@@ -329,7 +329,6 @@ static int __xts_crypt(struct skcipher_request *req,
crypto_cipher_encrypt_one(ctx->tweak_tfm, walk.iv, walk.iv);
- kernel_neon_begin();
while (walk.nbytes >= AES_BLOCK_SIZE) {
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
@@ -337,12 +336,13 @@ static int __xts_crypt(struct skcipher_request *req,
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
+ kernel_neon_begin();
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk,
ctx->key.rounds, blocks, walk.iv);
+ kernel_neon_end();
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
- kernel_neon_end();
return err;
}
--
2.17.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 03/17] crypto: arm/aes-ce - switch to 4x interleave
2019-09-03 16:43 [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 01/17] crypto: arm/aes - fix round key prototypes Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 02/17] crypto: arm/aes-ce - yield the SIMD unit between scatterwalk steps Ard Biesheuvel
@ 2019-09-03 16:43 ` Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 04/17] crypto: arm/aes-ce - replace tweak mask literal with composition Ard Biesheuvel
` (14 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Ard Biesheuvel @ 2019-09-03 16:43 UTC (permalink / raw)
To: linux-crypto; +Cc: herbert, ebiggers, Ard Biesheuvel
When the ARM AES instruction based crypto driver was introduced, there
were no known implementations that could benefit from a 4-way interleave,
and so a 3-way interleave was used instead. Since we have sufficient
space in the SIMD register file, let's switch to a 4-way interleave to
align with the 64-bit driver, and to ensure that we can reach optimum
performance when running under emulation on high end 64-bit cores.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm/crypto/aes-ce-core.S | 263 +++++++++++---------
1 file changed, 144 insertions(+), 119 deletions(-)
diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S
index 1e0d45183590..a3ca4ac2d7bb 100644
--- a/arch/arm/crypto/aes-ce-core.S
+++ b/arch/arm/crypto/aes-ce-core.S
@@ -44,46 +44,56 @@
veor q0, q0, \key3
.endm
- .macro enc_dround_3x, key1, key2
+ .macro enc_dround_4x, key1, key2
enc_round q0, \key1
enc_round q1, \key1
enc_round q2, \key1
+ enc_round q3, \key1
enc_round q0, \key2
enc_round q1, \key2
enc_round q2, \key2
+ enc_round q3, \key2
.endm
- .macro dec_dround_3x, key1, key2
+ .macro dec_dround_4x, key1, key2
dec_round q0, \key1
dec_round q1, \key1
dec_round q2, \key1
+ dec_round q3, \key1
dec_round q0, \key2
dec_round q1, \key2
dec_round q2, \key2
+ dec_round q3, \key2
.endm
- .macro enc_fround_3x, key1, key2, key3
+ .macro enc_fround_4x, key1, key2, key3
enc_round q0, \key1
enc_round q1, \key1
enc_round q2, \key1
+ enc_round q3, \key1
aese.8 q0, \key2
aese.8 q1, \key2
aese.8 q2, \key2
+ aese.8 q3, \key2
veor q0, q0, \key3
veor q1, q1, \key3
veor q2, q2, \key3
+ veor q3, q3, \key3
.endm
- .macro dec_fround_3x, key1, key2, key3
+ .macro dec_fround_4x, key1, key2, key3
dec_round q0, \key1
dec_round q1, \key1
dec_round q2, \key1
+ dec_round q3, \key1
aesd.8 q0, \key2
aesd.8 q1, \key2
aesd.8 q2, \key2
+ aesd.8 q3, \key2
veor q0, q0, \key3
veor q1, q1, \key3
veor q2, q2, \key3
+ veor q3, q3, \key3
.endm
.macro do_block, dround, fround
@@ -114,8 +124,9 @@
* transforms. These should preserve all registers except q0 - q2 and ip
* Arguments:
* q0 : first in/output block
- * q1 : second in/output block (_3x version only)
- * q2 : third in/output block (_3x version only)
+ * q1 : second in/output block (_4x version only)
+ * q2 : third in/output block (_4x version only)
+ * q3 : fourth in/output block (_4x version only)
* q8 : first round key
* q9 : secound round key
* q14 : final round key
@@ -136,16 +147,16 @@ aes_decrypt:
ENDPROC(aes_decrypt)
.align 6
-aes_encrypt_3x:
+aes_encrypt_4x:
add ip, r2, #32 @ 3rd round key
- do_block enc_dround_3x, enc_fround_3x
-ENDPROC(aes_encrypt_3x)
+ do_block enc_dround_4x, enc_fround_4x
+ENDPROC(aes_encrypt_4x)
.align 6
-aes_decrypt_3x:
+aes_decrypt_4x:
add ip, r2, #32 @ 3rd round key
- do_block dec_dround_3x, dec_fround_3x
-ENDPROC(aes_decrypt_3x)
+ do_block dec_dround_4x, dec_fround_4x
+ENDPROC(aes_decrypt_4x)
.macro prepare_key, rk, rounds
add ip, \rk, \rounds, lsl #4
@@ -163,17 +174,17 @@ ENTRY(ce_aes_ecb_encrypt)
push {r4, lr}
ldr r4, [sp, #8]
prepare_key r2, r3
-.Lecbencloop3x:
- subs r4, r4, #3
+.Lecbencloop4x:
+ subs r4, r4, #4
bmi .Lecbenc1x
vld1.8 {q0-q1}, [r1]!
- vld1.8 {q2}, [r1]!
- bl aes_encrypt_3x
+ vld1.8 {q2-q3}, [r1]!
+ bl aes_encrypt_4x
vst1.8 {q0-q1}, [r0]!
- vst1.8 {q2}, [r0]!
- b .Lecbencloop3x
+ vst1.8 {q2-q3}, [r0]!
+ b .Lecbencloop4x
.Lecbenc1x:
- adds r4, r4, #3
+ adds r4, r4, #4
beq .Lecbencout
.Lecbencloop:
vld1.8 {q0}, [r1]!
@@ -189,17 +200,17 @@ ENTRY(ce_aes_ecb_decrypt)
push {r4, lr}
ldr r4, [sp, #8]
prepare_key r2, r3
-.Lecbdecloop3x:
- subs r4, r4, #3
+.Lecbdecloop4x:
+ subs r4, r4, #4
bmi .Lecbdec1x
vld1.8 {q0-q1}, [r1]!
- vld1.8 {q2}, [r1]!
- bl aes_decrypt_3x
+ vld1.8 {q2-q3}, [r1]!
+ bl aes_decrypt_4x
vst1.8 {q0-q1}, [r0]!
- vst1.8 {q2}, [r0]!
- b .Lecbdecloop3x
+ vst1.8 {q2-q3}, [r0]!
+ b .Lecbdecloop4x
.Lecbdec1x:
- adds r4, r4, #3
+ adds r4, r4, #4
beq .Lecbdecout
.Lecbdecloop:
vld1.8 {q0}, [r1]!
@@ -236,38 +247,40 @@ ENDPROC(ce_aes_cbc_encrypt)
ENTRY(ce_aes_cbc_decrypt)
push {r4-r6, lr}
ldrd r4, r5, [sp, #16]
- vld1.8 {q6}, [r5] @ keep iv in q6
+ vld1.8 {q15}, [r5] @ keep iv in q15
prepare_key r2, r3
-.Lcbcdecloop3x:
- subs r4, r4, #3
+.Lcbcdecloop4x:
+ subs r4, r4, #4
bmi .Lcbcdec1x
vld1.8 {q0-q1}, [r1]!
- vld1.8 {q2}, [r1]!
- vmov q3, q0
- vmov q4, q1
- vmov q5, q2
- bl aes_decrypt_3x
- veor q0, q0, q6
- veor q1, q1, q3
- veor q2, q2, q4
- vmov q6, q5
+ vld1.8 {q2-q3}, [r1]!
+ vmov q4, q0
+ vmov q5, q1
+ vmov q6, q2
+ vmov q7, q3
+ bl aes_decrypt_4x
+ veor q0, q0, q15
+ veor q1, q1, q4
+ veor q2, q2, q5
+ veor q3, q3, q6
+ vmov q15, q7
vst1.8 {q0-q1}, [r0]!
- vst1.8 {q2}, [r0]!
- b .Lcbcdecloop3x
+ vst1.8 {q2-q3}, [r0]!
+ b .Lcbcdecloop4x
.Lcbcdec1x:
- adds r4, r4, #3
+ adds r4, r4, #4
beq .Lcbcdecout
- vmov q15, q14 @ preserve last round key
+ vmov q6, q14 @ preserve last round key
.Lcbcdecloop:
vld1.8 {q0}, [r1]! @ get next ct block
veor q14, q15, q6 @ combine prev ct with last key
- vmov q6, q0
+ vmov q15, q0
bl aes_decrypt
vst1.8 {q0}, [r0]!
subs r4, r4, #1
bne .Lcbcdecloop
.Lcbcdecout:
- vst1.8 {q6}, [r5] @ keep iv in q6
+ vst1.8 {q15}, [r5] @ keep iv in q15
pop {r4-r6, pc}
ENDPROC(ce_aes_cbc_decrypt)
@@ -278,46 +291,52 @@ ENDPROC(ce_aes_cbc_decrypt)
ENTRY(ce_aes_ctr_encrypt)
push {r4-r6, lr}
ldrd r4, r5, [sp, #16]
- vld1.8 {q6}, [r5] @ load ctr
+ vld1.8 {q7}, [r5] @ load ctr
prepare_key r2, r3
- vmov r6, s27 @ keep swabbed ctr in r6
+ vmov r6, s31 @ keep swabbed ctr in r6
rev r6, r6
cmn r6, r4 @ 32 bit overflow?
bcs .Lctrloop
-.Lctrloop3x:
- subs r4, r4, #3
+.Lctrloop4x:
+ subs r4, r4, #4
bmi .Lctr1x
add r6, r6, #1
- vmov q0, q6
- vmov q1, q6
+ vmov q0, q7
+ vmov q1, q7
rev ip, r6
add r6, r6, #1
- vmov q2, q6
+ vmov q2, q7
vmov s7, ip
rev ip, r6
add r6, r6, #1
+ vmov q3, q7
vmov s11, ip
- vld1.8 {q3-q4}, [r1]!
- vld1.8 {q5}, [r1]!
- bl aes_encrypt_3x
- veor q0, q0, q3
- veor q1, q1, q4
- veor q2, q2, q5
+ rev ip, r6
+ add r6, r6, #1
+ vmov s15, ip
+ vld1.8 {q4-q5}, [r1]!
+ vld1.8 {q6}, [r1]!
+ vld1.8 {q15}, [r1]!
+ bl aes_encrypt_4x
+ veor q0, q0, q4
+ veor q1, q1, q5
+ veor q2, q2, q6
+ veor q3, q3, q15
rev ip, r6
vst1.8 {q0-q1}, [r0]!
- vst1.8 {q2}, [r0]!
- vmov s27, ip
- b .Lctrloop3x
+ vst1.8 {q2-q3}, [r0]!
+ vmov s31, ip
+ b .Lctrloop4x
.Lctr1x:
- adds r4, r4, #3
+ adds r4, r4, #4
beq .Lctrout
.Lctrloop:
- vmov q0, q6
+ vmov q0, q7
bl aes_encrypt
adds r6, r6, #1 @ increment BE ctr
rev ip, r6
- vmov s27, ip
+ vmov s31, ip
bcs .Lctrcarry
.Lctrcarrydone:
@@ -329,7 +348,7 @@ ENTRY(ce_aes_ctr_encrypt)
bne .Lctrloop
.Lctrout:
- vst1.8 {q6}, [r5] @ return next CTR value
+ vst1.8 {q7}, [r5] @ return next CTR value
pop {r4-r6, pc}
.Lctrtailblock:
@@ -337,7 +356,7 @@ ENTRY(ce_aes_ctr_encrypt)
b .Lctrout
.Lctrcarry:
- .irp sreg, s26, s25, s24
+ .irp sreg, s30, s29, s28
vmov ip, \sreg @ load next word of ctr
rev ip, ip @ ... to handle the carry
adds ip, ip, #1
@@ -368,8 +387,8 @@ ENDPROC(ce_aes_ctr_encrypt)
.quad 1, 0x87
ce_aes_xts_init:
- vldr d14, .Lxts_mul_x
- vldr d15, .Lxts_mul_x + 8
+ vldr d30, .Lxts_mul_x
+ vldr d31, .Lxts_mul_x + 8
ldrd r4, r5, [sp, #16] @ load args
ldr r6, [sp, #28]
@@ -390,48 +409,51 @@ ENTRY(ce_aes_xts_encrypt)
bl ce_aes_xts_init @ run shared prologue
prepare_key r2, r3
- vmov q3, q0
+ vmov q4, q0
teq r6, #0 @ start of a block?
- bne .Lxtsenc3x
+ bne .Lxtsenc4x
-.Lxtsencloop3x:
- next_tweak q3, q3, q7, q6
-.Lxtsenc3x:
- subs r4, r4, #3
+.Lxtsencloop4x:
+ next_tweak q4, q4, q15, q10
+.Lxtsenc4x:
+ subs r4, r4, #4
bmi .Lxtsenc1x
- vld1.8 {q0-q1}, [r1]! @ get 3 pt blocks
- vld1.8 {q2}, [r1]!
- next_tweak q4, q3, q7, q6
- veor q0, q0, q3
- next_tweak q5, q4, q7, q6
- veor q1, q1, q4
- veor q2, q2, q5
- bl aes_encrypt_3x
- veor q0, q0, q3
- veor q1, q1, q4
- veor q2, q2, q5
- vst1.8 {q0-q1}, [r0]! @ write 3 ct blocks
- vst1.8 {q2}, [r0]!
- vmov q3, q5
+ vld1.8 {q0-q1}, [r1]! @ get 4 pt blocks
+ vld1.8 {q2-q3}, [r1]!
+ next_tweak q5, q4, q15, q10
+ veor q0, q0, q4
+ next_tweak q6, q5, q15, q10
+ veor q1, q1, q5
+ next_tweak q7, q6, q15, q10
+ veor q2, q2, q6
+ veor q3, q3, q7
+ bl aes_encrypt_4x
+ veor q0, q0, q4
+ veor q1, q1, q5
+ veor q2, q2, q6
+ veor q3, q3, q7
+ vst1.8 {q0-q1}, [r0]! @ write 4 ct blocks
+ vst1.8 {q2-q3}, [r0]!
+ vmov q4, q7
teq r4, #0
beq .Lxtsencout
- b .Lxtsencloop3x
+ b .Lxtsencloop4x
.Lxtsenc1x:
- adds r4, r4, #3
+ adds r4, r4, #4
beq .Lxtsencout
.Lxtsencloop:
vld1.8 {q0}, [r1]!
- veor q0, q0, q3
+ veor q0, q0, q4
bl aes_encrypt
- veor q0, q0, q3
+ veor q0, q0, q4
vst1.8 {q0}, [r0]!
subs r4, r4, #1
beq .Lxtsencout
- next_tweak q3, q3, q7, q6
+ next_tweak q4, q4, q15, q6
b .Lxtsencloop
.Lxtsencout:
- vst1.8 {q3}, [r5]
+ vst1.8 {q4}, [r5]
pop {r4-r6, pc}
ENDPROC(ce_aes_xts_encrypt)
@@ -441,49 +463,52 @@ ENTRY(ce_aes_xts_decrypt)
bl ce_aes_xts_init @ run shared prologue
prepare_key r2, r3
- vmov q3, q0
+ vmov q4, q0
teq r6, #0 @ start of a block?
- bne .Lxtsdec3x
+ bne .Lxtsdec4x
-.Lxtsdecloop3x:
- next_tweak q3, q3, q7, q6
-.Lxtsdec3x:
- subs r4, r4, #3
+.Lxtsdecloop4x:
+ next_tweak q4, q4, q15, q10
+.Lxtsdec4x:
+ subs r4, r4, #4
bmi .Lxtsdec1x
- vld1.8 {q0-q1}, [r1]! @ get 3 ct blocks
- vld1.8 {q2}, [r1]!
- next_tweak q4, q3, q7, q6
- veor q0, q0, q3
- next_tweak q5, q4, q7, q6
- veor q1, q1, q4
- veor q2, q2, q5
- bl aes_decrypt_3x
- veor q0, q0, q3
- veor q1, q1, q4
- veor q2, q2, q5
- vst1.8 {q0-q1}, [r0]! @ write 3 pt blocks
- vst1.8 {q2}, [r0]!
- vmov q3, q5
+ vld1.8 {q0-q1}, [r1]! @ get 4 ct blocks
+ vld1.8 {q2-q3}, [r1]!
+ next_tweak q5, q4, q15, q10
+ veor q0, q0, q4
+ next_tweak q6, q5, q15, q10
+ veor q1, q1, q5
+ next_tweak q7, q6, q15, q10
+ veor q2, q2, q6
+ veor q3, q3, q7
+ bl aes_decrypt_4x
+ veor q0, q0, q4
+ veor q1, q1, q5
+ veor q2, q2, q6
+ veor q3, q3, q7
+ vst1.8 {q0-q1}, [r0]! @ write 4 pt blocks
+ vst1.8 {q2-q3}, [r0]!
+ vmov q4, q7
teq r4, #0
beq .Lxtsdecout
- b .Lxtsdecloop3x
+ b .Lxtsdecloop4x
.Lxtsdec1x:
- adds r4, r4, #3
+ adds r4, r4, #4
beq .Lxtsdecout
.Lxtsdecloop:
vld1.8 {q0}, [r1]!
- veor q0, q0, q3
+ veor q0, q0, q4
add ip, r2, #32 @ 3rd round key
bl aes_decrypt
- veor q0, q0, q3
+ veor q0, q0, q4
vst1.8 {q0}, [r0]!
subs r4, r4, #1
beq .Lxtsdecout
- next_tweak q3, q3, q7, q6
+ next_tweak q4, q4, q15, q6
b .Lxtsdecloop
.Lxtsdecout:
- vst1.8 {q3}, [r5]
+ vst1.8 {q4}, [r5]
pop {r4-r6, pc}
ENDPROC(ce_aes_xts_decrypt)
--
2.17.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 04/17] crypto: arm/aes-ce - replace tweak mask literal with composition
2019-09-03 16:43 [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates Ard Biesheuvel
` (2 preceding siblings ...)
2019-09-03 16:43 ` [PATCH v2 03/17] crypto: arm/aes-ce - switch to 4x interleave Ard Biesheuvel
@ 2019-09-03 16:43 ` Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 05/17] crypto: arm/aes-neonbs " Ard Biesheuvel
` (13 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Ard Biesheuvel @ 2019-09-03 16:43 UTC (permalink / raw)
To: linux-crypto; +Cc: herbert, ebiggers, Ard Biesheuvel
Replace the vector load from memory sequence with a simple instruction
sequence to compose the tweak vector directly.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm/crypto/aes-ce-core.S | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S
index a3ca4ac2d7bb..bb6ec1844370 100644
--- a/arch/arm/crypto/aes-ce-core.S
+++ b/arch/arm/crypto/aes-ce-core.S
@@ -382,13 +382,10 @@ ENDPROC(ce_aes_ctr_encrypt)
veor \out, \out, \tmp
.endm
- .align 3
-.Lxts_mul_x:
- .quad 1, 0x87
-
ce_aes_xts_init:
- vldr d30, .Lxts_mul_x
- vldr d31, .Lxts_mul_x + 8
+ vmov.i32 d30, #0x87 @ compose tweak mask vector
+ vmovl.u32 q15, d30
+ vshr.u64 d30, d31, #7
ldrd r4, r5, [sp, #16] @ load args
ldr r6, [sp, #28]
--
2.17.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 05/17] crypto: arm/aes-neonbs - replace tweak mask literal with composition
2019-09-03 16:43 [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates Ard Biesheuvel
` (3 preceding siblings ...)
2019-09-03 16:43 ` [PATCH v2 04/17] crypto: arm/aes-ce - replace tweak mask literal with composition Ard Biesheuvel
@ 2019-09-03 16:43 ` Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 06/17] crypto: arm64/aes-neonbs " Ard Biesheuvel
` (12 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Ard Biesheuvel @ 2019-09-03 16:43 UTC (permalink / raw)
To: linux-crypto; +Cc: herbert, ebiggers, Ard Biesheuvel
Replace the vector load from memory sequence with a simple instruction
sequence to compose the tweak vector directly.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm/crypto/aes-neonbs-core.S | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/arch/arm/crypto/aes-neonbs-core.S b/arch/arm/crypto/aes-neonbs-core.S
index d3eab76b6e1b..bb75918e4984 100644
--- a/arch/arm/crypto/aes-neonbs-core.S
+++ b/arch/arm/crypto/aes-neonbs-core.S
@@ -887,10 +887,6 @@ ENDPROC(aesbs_ctr_encrypt)
veor \out, \out, \tmp
.endm
- .align 4
-.Lxts_mul_x:
- .quad 1, 0x87
-
/*
* aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks, u8 iv[])
@@ -899,7 +895,9 @@ ENDPROC(aesbs_ctr_encrypt)
*/
__xts_prepare8:
vld1.8 {q14}, [r7] // load iv
- __ldr q15, .Lxts_mul_x // load tweak mask
+ vmov.i32 d30, #0x87 // compose tweak mask vector
+ vmovl.u32 q15, d30
+ vshr.u64 d30, d31, #7
vmov q12, q14
__adr ip, 0f
--
2.17.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 06/17] crypto: arm64/aes-neonbs - replace tweak mask literal with composition
2019-09-03 16:43 [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates Ard Biesheuvel
` (4 preceding siblings ...)
2019-09-03 16:43 ` [PATCH v2 05/17] crypto: arm/aes-neonbs " Ard Biesheuvel
@ 2019-09-03 16:43 ` Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 07/17] crypto: arm64/aes-neon - limit exposed routines if faster driver is enabled Ard Biesheuvel
` (11 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Ard Biesheuvel @ 2019-09-03 16:43 UTC (permalink / raw)
To: linux-crypto; +Cc: herbert, ebiggers, Ard Biesheuvel
Replace the vector load from memory sequence with a simple instruction
sequence to compose the tweak vector directly.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm64/crypto/aes-neonbs-core.S | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S
index cf10ff8878a3..65982039fa36 100644
--- a/arch/arm64/crypto/aes-neonbs-core.S
+++ b/arch/arm64/crypto/aes-neonbs-core.S
@@ -730,11 +730,6 @@ ENDPROC(aesbs_cbc_decrypt)
eor \out\().16b, \out\().16b, \tmp\().16b
.endm
- .align 4
-.Lxts_mul_x:
-CPU_LE( .quad 1, 0x87 )
-CPU_BE( .quad 0x87, 1 )
-
/*
* aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks, u8 iv[])
@@ -806,7 +801,9 @@ ENDPROC(__xts_crypt8)
mov x23, x4
mov x24, x5
-0: ldr q30, .Lxts_mul_x
+0: movi v30.2s, #0x1
+ movi v25.2s, #0x87
+ uzp1 v30.4s, v30.4s, v25.4s
ld1 {v25.16b}, [x24]
99: adr x7, \do8
--
2.17.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 07/17] crypto: arm64/aes-neon - limit exposed routines if faster driver is enabled
2019-09-03 16:43 [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates Ard Biesheuvel
` (5 preceding siblings ...)
2019-09-03 16:43 ` [PATCH v2 06/17] crypto: arm64/aes-neonbs " Ard Biesheuvel
@ 2019-09-03 16:43 ` Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 08/17] crypto: skcipher - add the ability to abort a skcipher walk Ard Biesheuvel
` (10 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Ard Biesheuvel @ 2019-09-03 16:43 UTC (permalink / raw)
To: linux-crypto; +Cc: herbert, ebiggers, Ard Biesheuvel
The pure NEON AES implementation predates the bit-slicing one, and is
generally slower, unless the algorithm in question can only execute
sequentially.
So advertising the skciphers that the bit-slicing driver implements as
well serves no real purpose, and we can just disable them. Note that the
bit-slicing driver also has a link time dependency on the pure NEON
driver, for CBC encryption and for XTS tweak calculation, so we still
need both drivers on systems that do not implement the Crypto Extensions.
At the same time, expose those modaliases for the AES instruction based
driver. This is necessary since otherwise, we may end up loading the
wrong driver when any of the skciphers are instantiated before the CPU
capability based module loading has completed.
Finally, add the missing modalias for cts(cbc(aes)) so requests for
this algorithm will autoload the correct module.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm64/crypto/aes-glue.c | 112 +++++++++++---------
1 file changed, 59 insertions(+), 53 deletions(-)
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index ca0c84d56cba..4154bb93a85b 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -54,15 +54,18 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
#define aes_xts_decrypt neon_aes_xts_decrypt
#define aes_mac_update neon_aes_mac_update
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
+#endif
+#if defined(USE_V8_CRYPTO_EXTENSIONS) || !defined(CONFIG_CRYPTO_AES_ARM64_BS)
MODULE_ALIAS_CRYPTO("ecb(aes)");
MODULE_ALIAS_CRYPTO("cbc(aes)");
-MODULE_ALIAS_CRYPTO("essiv(cbc(aes),sha256)");
MODULE_ALIAS_CRYPTO("ctr(aes)");
MODULE_ALIAS_CRYPTO("xts(aes)");
+#endif
+MODULE_ALIAS_CRYPTO("cts(cbc(aes))");
+MODULE_ALIAS_CRYPTO("essiv(cbc(aes),sha256)");
MODULE_ALIAS_CRYPTO("cmac(aes)");
MODULE_ALIAS_CRYPTO("xcbc(aes)");
MODULE_ALIAS_CRYPTO("cbcmac(aes)");
-#endif
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
@@ -144,8 +147,8 @@ static int skcipher_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
return ret;
}
-static int xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
- unsigned int key_len)
+static int __maybe_unused xts_set_key(struct crypto_skcipher *tfm,
+ const u8 *in_key, unsigned int key_len)
{
struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int ret;
@@ -165,8 +168,9 @@ static int xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
return -EINVAL;
}
-static int essiv_cbc_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
- unsigned int key_len)
+static int __maybe_unused essiv_cbc_set_key(struct crypto_skcipher *tfm,
+ const u8 *in_key,
+ unsigned int key_len)
{
struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
SHASH_DESC_ON_STACK(desc, ctx->hash);
@@ -190,7 +194,7 @@ static int essiv_cbc_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
return -EINVAL;
}
-static int ecb_encrypt(struct skcipher_request *req)
+static int __maybe_unused ecb_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -210,7 +214,7 @@ static int ecb_encrypt(struct skcipher_request *req)
return err;
}
-static int ecb_decrypt(struct skcipher_request *req)
+static int __maybe_unused ecb_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -248,7 +252,7 @@ static int cbc_encrypt_walk(struct skcipher_request *req,
return err;
}
-static int cbc_encrypt(struct skcipher_request *req)
+static int __maybe_unused cbc_encrypt(struct skcipher_request *req)
{
struct skcipher_walk walk;
int err;
@@ -277,7 +281,7 @@ static int cbc_decrypt_walk(struct skcipher_request *req,
return err;
}
-static int cbc_decrypt(struct skcipher_request *req)
+static int __maybe_unused cbc_decrypt(struct skcipher_request *req)
{
struct skcipher_walk walk;
int err;
@@ -404,7 +408,7 @@ static int cts_cbc_decrypt(struct skcipher_request *req)
return skcipher_walk_done(&walk, 0);
}
-static int essiv_cbc_init_tfm(struct crypto_skcipher *tfm)
+static int __maybe_unused essiv_cbc_init_tfm(struct crypto_skcipher *tfm)
{
struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -415,14 +419,14 @@ static int essiv_cbc_init_tfm(struct crypto_skcipher *tfm)
return 0;
}
-static void essiv_cbc_exit_tfm(struct crypto_skcipher *tfm)
+static void __maybe_unused essiv_cbc_exit_tfm(struct crypto_skcipher *tfm)
{
struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
crypto_free_shash(ctx->hash);
}
-static int essiv_cbc_encrypt(struct skcipher_request *req)
+static int __maybe_unused essiv_cbc_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -444,7 +448,7 @@ static int essiv_cbc_encrypt(struct skcipher_request *req)
return err ?: cbc_encrypt_walk(req, &walk);
}
-static int essiv_cbc_decrypt(struct skcipher_request *req)
+static int __maybe_unused essiv_cbc_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -520,7 +524,7 @@ static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst)
local_irq_restore(flags);
}
-static int ctr_encrypt_sync(struct skcipher_request *req)
+static int __maybe_unused ctr_encrypt_sync(struct skcipher_request *req)
{
if (!crypto_simd_usable())
return crypto_ctr_encrypt_walk(req, ctr_encrypt_one);
@@ -528,7 +532,7 @@ static int ctr_encrypt_sync(struct skcipher_request *req)
return ctr_encrypt(req);
}
-static int xts_encrypt(struct skcipher_request *req)
+static int __maybe_unused xts_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -550,7 +554,7 @@ static int xts_encrypt(struct skcipher_request *req)
return err;
}
-static int xts_decrypt(struct skcipher_request *req)
+static int __maybe_unused xts_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -573,6 +577,7 @@ static int xts_decrypt(struct skcipher_request *req)
}
static struct skcipher_alg aes_algs[] = { {
+#if defined(USE_V8_CRYPTO_EXTENSIONS) || !defined(CONFIG_CRYPTO_AES_ARM64_BS)
.base = {
.cra_name = "__ecb(aes)",
.cra_driver_name = "__ecb-aes-" MODE,
@@ -603,42 +608,6 @@ static struct skcipher_alg aes_algs[] = { {
.setkey = skcipher_aes_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
-}, {
- .base = {
- .cra_name = "__cts(cbc(aes))",
- .cra_driver_name = "__cts-cbc-aes-" MODE,
- .cra_priority = PRIO,
- .cra_flags = CRYPTO_ALG_INTERNAL,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypto_aes_ctx),
- .cra_module = THIS_MODULE,
- },
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .walksize = 2 * AES_BLOCK_SIZE,
- .setkey = skcipher_aes_setkey,
- .encrypt = cts_cbc_encrypt,
- .decrypt = cts_cbc_decrypt,
- .init = cts_cbc_init_tfm,
-}, {
- .base = {
- .cra_name = "__essiv(cbc(aes),sha256)",
- .cra_driver_name = "__essiv-cbc-aes-sha256-" MODE,
- .cra_priority = PRIO + 1,
- .cra_flags = CRYPTO_ALG_INTERNAL,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypto_aes_essiv_cbc_ctx),
- .cra_module = THIS_MODULE,
- },
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = essiv_cbc_set_key,
- .encrypt = essiv_cbc_encrypt,
- .decrypt = essiv_cbc_decrypt,
- .init = essiv_cbc_init_tfm,
- .exit = essiv_cbc_exit_tfm,
}, {
.base = {
.cra_name = "__ctr(aes)",
@@ -688,6 +657,43 @@ static struct skcipher_alg aes_algs[] = { {
.setkey = xts_set_key,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
+}, {
+#endif
+ .base = {
+ .cra_name = "__cts(cbc(aes))",
+ .cra_driver_name = "__cts-cbc-aes-" MODE,
+ .cra_priority = PRIO,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .walksize = 2 * AES_BLOCK_SIZE,
+ .setkey = skcipher_aes_setkey,
+ .encrypt = cts_cbc_encrypt,
+ .decrypt = cts_cbc_decrypt,
+ .init = cts_cbc_init_tfm,
+}, {
+ .base = {
+ .cra_name = "__essiv(cbc(aes),sha256)",
+ .cra_driver_name = "__essiv-cbc-aes-sha256-" MODE,
+ .cra_priority = PRIO + 1,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_essiv_cbc_ctx),
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = essiv_cbc_set_key,
+ .encrypt = essiv_cbc_encrypt,
+ .decrypt = essiv_cbc_decrypt,
+ .init = essiv_cbc_init_tfm,
+ .exit = essiv_cbc_exit_tfm,
} };
static int cbcmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
--
2.17.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 08/17] crypto: skcipher - add the ability to abort a skcipher walk
2019-09-03 16:43 [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates Ard Biesheuvel
` (6 preceding siblings ...)
2019-09-03 16:43 ` [PATCH v2 07/17] crypto: arm64/aes-neon - limit exposed routines if faster driver is enabled Ard Biesheuvel
@ 2019-09-03 16:43 ` Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 09/17] crypto: arm64/aes-cts-cbc-ce - performance tweak Ard Biesheuvel
` (9 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Ard Biesheuvel @ 2019-09-03 16:43 UTC (permalink / raw)
To: linux-crypto; +Cc: herbert, ebiggers, Ard Biesheuvel
After starting a skcipher walk, the only way to ensure that all
resources it has tied up are released is to complete it. In some
cases, it will be useful to be able to abort a walk cleanly after
it has started, so add this ability to the skcipher walk API.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
include/crypto/internal/skcipher.h | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h
index d68faa5759ad..734b6f7081b8 100644
--- a/include/crypto/internal/skcipher.h
+++ b/include/crypto/internal/skcipher.h
@@ -148,6 +148,11 @@ int skcipher_walk_aead_decrypt(struct skcipher_walk *walk,
struct aead_request *req, bool atomic);
void skcipher_walk_complete(struct skcipher_walk *walk, int err);
+static inline void skcipher_walk_abort(struct skcipher_walk *walk)
+{
+ skcipher_walk_done(walk, -ECANCELED);
+}
+
static inline void ablkcipher_request_complete(struct ablkcipher_request *req,
int err)
{
--
2.17.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 09/17] crypto: arm64/aes-cts-cbc-ce - performance tweak
2019-09-03 16:43 [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates Ard Biesheuvel
` (7 preceding siblings ...)
2019-09-03 16:43 ` [PATCH v2 08/17] crypto: skcipher - add the ability to abort a skcipher walk Ard Biesheuvel
@ 2019-09-03 16:43 ` Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 10/17] crypto: arm64/aes-cts-cbc - move request context data to the stack Ard Biesheuvel
` (8 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Ard Biesheuvel @ 2019-09-03 16:43 UTC (permalink / raw)
To: linux-crypto; +Cc: herbert, ebiggers, Ard Biesheuvel
Optimize away one of the tbl instructions in the decryption path,
which turns out to be unnecessary.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm64/crypto/aes-modes.S | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index 2879f030a749..38cd5a2091a8 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -293,12 +293,11 @@ AES_ENTRY(aes_cbc_cts_decrypt)
ld1 {v5.16b}, [x5] /* get iv */
dec_prepare w3, x2, x6
- tbl v2.16b, {v1.16b}, v4.16b
decrypt_block v0, w3, x2, x6, w7
- eor v2.16b, v2.16b, v0.16b
+ tbl v2.16b, {v0.16b}, v3.16b
+ eor v2.16b, v2.16b, v1.16b
tbx v0.16b, {v1.16b}, v4.16b
- tbl v2.16b, {v2.16b}, v3.16b
decrypt_block v0, w3, x2, x6, w7
eor v0.16b, v0.16b, v5.16b /* xor with iv */
--
2.17.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 10/17] crypto: arm64/aes-cts-cbc - move request context data to the stack
2019-09-03 16:43 [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates Ard Biesheuvel
` (8 preceding siblings ...)
2019-09-03 16:43 ` [PATCH v2 09/17] crypto: arm64/aes-cts-cbc-ce - performance tweak Ard Biesheuvel
@ 2019-09-03 16:43 ` Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 11/17] crypto: arm64/aes - implement support for XTS ciphertext stealing Ard Biesheuvel
` (7 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Ard Biesheuvel @ 2019-09-03 16:43 UTC (permalink / raw)
To: linux-crypto; +Cc: herbert, ebiggers, Ard Biesheuvel
Since the CTS-CBC code completes synchronously, there is no point in
keeping part of the scratch data it uses in the request context, so
move it to the stack instead.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm64/crypto/aes-glue.c | 61 +++++++++-----------
1 file changed, 26 insertions(+), 35 deletions(-)
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 4154bb93a85b..5ee980c5a5c2 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -107,12 +107,6 @@ asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds,
int blocks, u8 dg[], int enc_before,
int enc_after);
-struct cts_cbc_req_ctx {
- struct scatterlist sg_src[2];
- struct scatterlist sg_dst[2];
- struct skcipher_request subreq;
-};
-
struct crypto_aes_xts_ctx {
struct crypto_aes_ctx key1;
struct crypto_aes_ctx __aligned(8) key2;
@@ -292,23 +286,20 @@ static int __maybe_unused cbc_decrypt(struct skcipher_request *req)
return cbc_decrypt_walk(req, &walk);
}
-static int cts_cbc_init_tfm(struct crypto_skcipher *tfm)
-{
- crypto_skcipher_set_reqsize(tfm, sizeof(struct cts_cbc_req_ctx));
- return 0;
-}
-
static int cts_cbc_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req);
int err, rounds = 6 + ctx->key_length / 4;
int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
struct scatterlist *src = req->src, *dst = req->dst;
+ struct scatterlist sg_src[2], sg_dst[2];
+ struct skcipher_request subreq;
struct skcipher_walk walk;
- skcipher_request_set_tfm(&rctx->subreq, tfm);
+ skcipher_request_set_tfm(&subreq, tfm);
+ skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
+ NULL, NULL);
if (req->cryptlen <= AES_BLOCK_SIZE) {
if (req->cryptlen < AES_BLOCK_SIZE)
@@ -317,31 +308,30 @@ static int cts_cbc_encrypt(struct skcipher_request *req)
}
if (cbc_blocks > 0) {
- skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst,
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
cbc_blocks * AES_BLOCK_SIZE,
req->iv);
- err = skcipher_walk_virt(&walk, &rctx->subreq, false) ?:
- cbc_encrypt_walk(&rctx->subreq, &walk);
+ err = skcipher_walk_virt(&walk, &subreq, false) ?:
+ cbc_encrypt_walk(&subreq, &walk);
if (err)
return err;
if (req->cryptlen == AES_BLOCK_SIZE)
return 0;
- dst = src = scatterwalk_ffwd(rctx->sg_src, req->src,
- rctx->subreq.cryptlen);
+ dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
if (req->dst != req->src)
- dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
- rctx->subreq.cryptlen);
+ dst = scatterwalk_ffwd(sg_dst, req->dst,
+ subreq.cryptlen);
}
/* handle ciphertext stealing */
- skcipher_request_set_crypt(&rctx->subreq, src, dst,
+ skcipher_request_set_crypt(&subreq, src, dst,
req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
req->iv);
- err = skcipher_walk_virt(&walk, &rctx->subreq, false);
+ err = skcipher_walk_virt(&walk, &subreq, false);
if (err)
return err;
@@ -357,13 +347,16 @@ static int cts_cbc_decrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req);
int err, rounds = 6 + ctx->key_length / 4;
int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
struct scatterlist *src = req->src, *dst = req->dst;
+ struct scatterlist sg_src[2], sg_dst[2];
+ struct skcipher_request subreq;
struct skcipher_walk walk;
- skcipher_request_set_tfm(&rctx->subreq, tfm);
+ skcipher_request_set_tfm(&subreq, tfm);
+ skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
+ NULL, NULL);
if (req->cryptlen <= AES_BLOCK_SIZE) {
if (req->cryptlen < AES_BLOCK_SIZE)
@@ -372,31 +365,30 @@ static int cts_cbc_decrypt(struct skcipher_request *req)
}
if (cbc_blocks > 0) {
- skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst,
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
cbc_blocks * AES_BLOCK_SIZE,
req->iv);
- err = skcipher_walk_virt(&walk, &rctx->subreq, false) ?:
- cbc_decrypt_walk(&rctx->subreq, &walk);
+ err = skcipher_walk_virt(&walk, &subreq, false) ?:
+ cbc_decrypt_walk(&subreq, &walk);
if (err)
return err;
if (req->cryptlen == AES_BLOCK_SIZE)
return 0;
- dst = src = scatterwalk_ffwd(rctx->sg_src, req->src,
- rctx->subreq.cryptlen);
+ dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
if (req->dst != req->src)
- dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
- rctx->subreq.cryptlen);
+ dst = scatterwalk_ffwd(sg_dst, req->dst,
+ subreq.cryptlen);
}
/* handle ciphertext stealing */
- skcipher_request_set_crypt(&rctx->subreq, src, dst,
+ skcipher_request_set_crypt(&subreq, src, dst,
req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
req->iv);
- err = skcipher_walk_virt(&walk, &rctx->subreq, false);
+ err = skcipher_walk_virt(&walk, &subreq, false);
if (err)
return err;
@@ -675,7 +667,6 @@ static struct skcipher_alg aes_algs[] = { {
.setkey = skcipher_aes_setkey,
.encrypt = cts_cbc_encrypt,
.decrypt = cts_cbc_decrypt,
- .init = cts_cbc_init_tfm,
}, {
.base = {
.cra_name = "__essiv(cbc(aes),sha256)",
--
2.17.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 11/17] crypto: arm64/aes - implement support for XTS ciphertext stealing
2019-09-03 16:43 [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates Ard Biesheuvel
` (9 preceding siblings ...)
2019-09-03 16:43 ` [PATCH v2 10/17] crypto: arm64/aes-cts-cbc - move request context data to the stack Ard Biesheuvel
@ 2019-09-03 16:43 ` Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 12/17] crypto: arm64/aes-neonbs - implement ciphertext stealing for XTS Ard Biesheuvel
` (6 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Ard Biesheuvel @ 2019-09-03 16:43 UTC (permalink / raw)
To: linux-crypto; +Cc: herbert, ebiggers, Ard Biesheuvel
Add the missing support for ciphertext stealing in the implementation
of AES-XTS, which is part of the XTS specification but was omitted up
until now due to lack of a need for it.
The asm helpers are updated so they can deal with any input size, as
long as the last full block and the final partial block are presented
at the same time. The glue code is updated so that the common case of
operating on a sector or page is mostly as before. When CTS is needed,
the walk is split up into two pieces, unless the entire input is covered
by a single step.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm64/crypto/aes-glue.c | 126 ++++++++++++++++++--
arch/arm64/crypto/aes-modes.S | 99 ++++++++++++---
2 files changed, 195 insertions(+), 30 deletions(-)
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 5ee980c5a5c2..eecb74fd2f61 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -90,10 +90,10 @@ asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks, u8 ctr[]);
asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
- int rounds, int blocks, u32 const rk2[], u8 iv[],
+ int rounds, int bytes, u32 const rk2[], u8 iv[],
int first);
asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[],
- int rounds, int blocks, u32 const rk2[], u8 iv[],
+ int rounds, int bytes, u32 const rk2[], u8 iv[],
int first);
asmlinkage void aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[],
@@ -529,21 +529,71 @@ static int __maybe_unused xts_encrypt(struct skcipher_request *req)
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int err, first, rounds = 6 + ctx->key1.key_length / 4;
+ int tail = req->cryptlen % AES_BLOCK_SIZE;
+ struct scatterlist sg_src[2], sg_dst[2];
+ struct skcipher_request subreq;
+ struct scatterlist *src, *dst;
struct skcipher_walk walk;
- unsigned int blocks;
+
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
err = skcipher_walk_virt(&walk, req, false);
- for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
+ int xts_blocks = DIV_ROUND_UP(req->cryptlen,
+ AES_BLOCK_SIZE) - 2;
+
+ skcipher_walk_abort(&walk);
+
+ skcipher_request_set_tfm(&subreq, tfm);
+ skcipher_request_set_callback(&subreq,
+ skcipher_request_flags(req),
+ NULL, NULL);
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ xts_blocks * AES_BLOCK_SIZE,
+ req->iv);
+ req = &subreq;
+ err = skcipher_walk_virt(&walk, req, false);
+ } else {
+ tail = 0;
+ }
+
+ for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) {
+ int nbytes = walk.nbytes;
+
+ if (walk.nbytes < walk.total)
+ nbytes &= ~(AES_BLOCK_SIZE - 1);
+
kernel_neon_begin();
aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_enc, rounds, blocks,
+ ctx->key1.key_enc, rounds, nbytes,
ctx->key2.key_enc, walk.iv, first);
kernel_neon_end();
- err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
- return err;
+ if (err || likely(!tail))
+ return err;
+
+ dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
+ if (req->dst != req->src)
+ dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
+
+ skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, &subreq, false);
+ if (err)
+ return err;
+
+ kernel_neon_begin();
+ aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key1.key_enc, rounds, walk.nbytes,
+ ctx->key2.key_enc, walk.iv, first);
+ kernel_neon_end();
+
+ return skcipher_walk_done(&walk, 0);
}
static int __maybe_unused xts_decrypt(struct skcipher_request *req)
@@ -551,21 +601,72 @@ static int __maybe_unused xts_decrypt(struct skcipher_request *req)
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int err, first, rounds = 6 + ctx->key1.key_length / 4;
+ int tail = req->cryptlen % AES_BLOCK_SIZE;
+ struct scatterlist sg_src[2], sg_dst[2];
+ struct skcipher_request subreq;
+ struct scatterlist *src, *dst;
struct skcipher_walk walk;
- unsigned int blocks;
+
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
err = skcipher_walk_virt(&walk, req, false);
- for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
+ int xts_blocks = DIV_ROUND_UP(req->cryptlen,
+ AES_BLOCK_SIZE) - 2;
+
+ skcipher_walk_abort(&walk);
+
+ skcipher_request_set_tfm(&subreq, tfm);
+ skcipher_request_set_callback(&subreq,
+ skcipher_request_flags(req),
+ NULL, NULL);
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ xts_blocks * AES_BLOCK_SIZE,
+ req->iv);
+ req = &subreq;
+ err = skcipher_walk_virt(&walk, req, false);
+ } else {
+ tail = 0;
+ }
+
+ for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) {
+ int nbytes = walk.nbytes;
+
+ if (walk.nbytes < walk.total)
+ nbytes &= ~(AES_BLOCK_SIZE - 1);
+
kernel_neon_begin();
aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_dec, rounds, blocks,
+ ctx->key1.key_dec, rounds, nbytes,
ctx->key2.key_enc, walk.iv, first);
kernel_neon_end();
- err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
- return err;
+ if (err || likely(!tail))
+ return err;
+
+ dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
+ if (req->dst != req->src)
+ dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
+
+ skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, &subreq, false);
+ if (err)
+ return err;
+
+
+ kernel_neon_begin();
+ aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key1.key_dec, rounds, walk.nbytes,
+ ctx->key2.key_enc, walk.iv, first);
+ kernel_neon_end();
+
+ return skcipher_walk_done(&walk, 0);
}
static struct skcipher_alg aes_algs[] = { {
@@ -646,6 +747,7 @@ static struct skcipher_alg aes_algs[] = { {
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
+ .walksize = 2 * AES_BLOCK_SIZE,
.setkey = xts_set_key,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index 38cd5a2091a8..f2c2ba739f36 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -413,10 +413,10 @@ AES_ENDPROC(aes_ctr_encrypt)
/*
+ * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
+ * int bytes, u8 const rk2[], u8 iv[], int first)
* aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
- * int blocks, u8 const rk2[], u8 iv[], int first)
- * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
- * int blocks, u8 const rk2[], u8 iv[], int first)
+ * int bytes, u8 const rk2[], u8 iv[], int first)
*/
.macro next_tweak, out, in, tmp
@@ -451,7 +451,7 @@ AES_ENTRY(aes_xts_encrypt)
.LxtsencloopNx:
next_tweak v4, v4, v8
.LxtsencNx:
- subs w4, w4, #4
+ subs w4, w4, #64
bmi .Lxtsenc1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
next_tweak v5, v4, v8
@@ -468,33 +468,66 @@ AES_ENTRY(aes_xts_encrypt)
eor v2.16b, v2.16b, v6.16b
st1 {v0.16b-v3.16b}, [x0], #64
mov v4.16b, v7.16b
- cbz w4, .Lxtsencout
+ cbz w4, .Lxtsencret
xts_reload_mask v8
b .LxtsencloopNx
.Lxtsenc1x:
- adds w4, w4, #4
+ adds w4, w4, #64
beq .Lxtsencout
+ subs w4, w4, #16
+ bmi .LxtsencctsNx
.Lxtsencloop:
- ld1 {v1.16b}, [x1], #16
- eor v0.16b, v1.16b, v4.16b
+ ld1 {v0.16b}, [x1], #16
+.Lxtsencctsout:
+ eor v0.16b, v0.16b, v4.16b
encrypt_block v0, w3, x2, x8, w7
eor v0.16b, v0.16b, v4.16b
- st1 {v0.16b}, [x0], #16
- subs w4, w4, #1
- beq .Lxtsencout
+ cbz w4, .Lxtsencout
+ subs w4, w4, #16
next_tweak v4, v4, v8
+ bmi .Lxtsenccts
+ st1 {v0.16b}, [x0], #16
b .Lxtsencloop
.Lxtsencout:
+ st1 {v0.16b}, [x0]
+.Lxtsencret:
st1 {v4.16b}, [x6]
ldp x29, x30, [sp], #16
ret
-AES_ENDPROC(aes_xts_encrypt)
+.LxtsencctsNx:
+ mov v0.16b, v3.16b
+ sub x0, x0, #16
+.Lxtsenccts:
+ adr_l x8, .Lcts_permute_table
+
+ add x1, x1, w4, sxtw /* rewind input pointer */
+ add w4, w4, #16 /* # bytes in final block */
+ add x9, x8, #32
+ add x8, x8, x4
+ sub x9, x9, x4
+ add x4, x0, x4 /* output address of final block */
+
+ ld1 {v1.16b}, [x1] /* load final block */
+ ld1 {v2.16b}, [x8]
+ ld1 {v3.16b}, [x9]
+
+ tbl v2.16b, {v0.16b}, v2.16b
+ tbx v0.16b, {v1.16b}, v3.16b
+ st1 {v2.16b}, [x4] /* overlapping stores */
+ mov w4, wzr
+ b .Lxtsencctsout
+AES_ENDPROC(aes_xts_encrypt)
AES_ENTRY(aes_xts_decrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
+ /* subtract 16 bytes if we are doing CTS */
+ sub w8, w4, #0x10
+ tst w4, #0xf
+ csel w4, w4, w8, eq
+
ld1 {v4.16b}, [x6]
xts_load_mask v8
cbz w7, .Lxtsdecnotfirst
@@ -509,7 +542,7 @@ AES_ENTRY(aes_xts_decrypt)
.LxtsdecloopNx:
next_tweak v4, v4, v8
.LxtsdecNx:
- subs w4, w4, #4
+ subs w4, w4, #64
bmi .Lxtsdec1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
next_tweak v5, v4, v8
@@ -530,22 +563,52 @@ AES_ENTRY(aes_xts_decrypt)
xts_reload_mask v8
b .LxtsdecloopNx
.Lxtsdec1x:
- adds w4, w4, #4
+ adds w4, w4, #64
beq .Lxtsdecout
+ subs w4, w4, #16
.Lxtsdecloop:
- ld1 {v1.16b}, [x1], #16
- eor v0.16b, v1.16b, v4.16b
+ ld1 {v0.16b}, [x1], #16
+ bmi .Lxtsdeccts
+.Lxtsdecctsout:
+ eor v0.16b, v0.16b, v4.16b
decrypt_block v0, w3, x2, x8, w7
eor v0.16b, v0.16b, v4.16b
st1 {v0.16b}, [x0], #16
- subs w4, w4, #1
- beq .Lxtsdecout
+ cbz w4, .Lxtsdecout
+ subs w4, w4, #16
next_tweak v4, v4, v8
b .Lxtsdecloop
.Lxtsdecout:
st1 {v4.16b}, [x6]
ldp x29, x30, [sp], #16
ret
+
+.Lxtsdeccts:
+ adr_l x8, .Lcts_permute_table
+
+ add x1, x1, w4, sxtw /* rewind input pointer */
+ add w4, w4, #16 /* # bytes in final block */
+ add x9, x8, #32
+ add x8, x8, x4
+ sub x9, x9, x4
+ add x4, x0, x4 /* output address of final block */
+
+ next_tweak v5, v4, v8
+
+ ld1 {v1.16b}, [x1] /* load final block */
+ ld1 {v2.16b}, [x8]
+ ld1 {v3.16b}, [x9]
+
+ eor v0.16b, v0.16b, v5.16b
+ decrypt_block v0, w3, x2, x8, w7
+ eor v0.16b, v0.16b, v5.16b
+
+ tbl v2.16b, {v0.16b}, v2.16b
+ tbx v0.16b, {v1.16b}, v3.16b
+
+ st1 {v2.16b}, [x4] /* overlapping stores */
+ mov w4, wzr
+ b .Lxtsdecctsout
AES_ENDPROC(aes_xts_decrypt)
/*
--
2.17.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 12/17] crypto: arm64/aes-neonbs - implement ciphertext stealing for XTS
2019-09-03 16:43 [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates Ard Biesheuvel
` (10 preceding siblings ...)
2019-09-03 16:43 ` [PATCH v2 11/17] crypto: arm64/aes - implement support for XTS ciphertext stealing Ard Biesheuvel
@ 2019-09-03 16:43 ` Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 13/17] crypto: arm/aes-ce " Ard Biesheuvel
` (5 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Ard Biesheuvel @ 2019-09-03 16:43 UTC (permalink / raw)
To: linux-crypto; +Cc: herbert, ebiggers, Ard Biesheuvel
Update the AES-XTS implementation based on NEON instructions so that it
can deal with inputs whose size is not a multiple of the cipher block
size. This is part of the original XTS specification, but was never
implemented before in the Linux kernel.
Since the bit slicing driver is only faster if it can operate on at
least 7 blocks of input at the same time, let's reuse the alternate
path we are adding for CTS to process any data tail whose size is
not a multiple of 128 bytes.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm64/crypto/aes-ce.S | 3 +
arch/arm64/crypto/aes-glue.c | 2 +
arch/arm64/crypto/aes-modes.S | 3 +
arch/arm64/crypto/aes-neon.S | 5 +
arch/arm64/crypto/aes-neonbs-glue.c | 111 +++++++++++++++++---
5 files changed, 110 insertions(+), 14 deletions(-)
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
index 00bd2885feaa..c132c49c89a8 100644
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -21,6 +21,9 @@
.macro xts_reload_mask, tmp
.endm
+ .macro xts_cts_skip_tw, reg, lbl
+ .endm
+
/* preload all round keys */
.macro load_round_keys, rounds, rk
cmp \rounds, #12
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index eecb74fd2f61..327ac8d1489e 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -1073,5 +1073,7 @@ module_cpu_feature_match(AES, aes_init);
module_init(aes_init);
EXPORT_SYMBOL(neon_aes_ecb_encrypt);
EXPORT_SYMBOL(neon_aes_cbc_encrypt);
+EXPORT_SYMBOL(neon_aes_xts_encrypt);
+EXPORT_SYMBOL(neon_aes_xts_decrypt);
#endif
module_exit(aes_exit);
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index f2c2ba739f36..131618389f1f 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -442,6 +442,7 @@ AES_ENTRY(aes_xts_encrypt)
cbz w7, .Lxtsencnotfirst
enc_prepare w3, x5, x8
+ xts_cts_skip_tw w7, .LxtsencNx
encrypt_block v4, w3, x5, x8, w7 /* first tweak */
enc_switch_key w3, x2, x8
b .LxtsencNx
@@ -530,10 +531,12 @@ AES_ENTRY(aes_xts_decrypt)
ld1 {v4.16b}, [x6]
xts_load_mask v8
+ xts_cts_skip_tw w7, .Lxtsdecskiptw
cbz w7, .Lxtsdecnotfirst
enc_prepare w3, x5, x8
encrypt_block v4, w3, x5, x8, w7 /* first tweak */
+.Lxtsdecskiptw:
dec_prepare w3, x2, x8
b .LxtsdecNx
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
index 0cac5df6c901..22d9b110cf78 100644
--- a/arch/arm64/crypto/aes-neon.S
+++ b/arch/arm64/crypto/aes-neon.S
@@ -19,6 +19,11 @@
xts_load_mask \tmp
.endm
+ /* special case for the neon-bs driver calling into this one for CTS */
+ .macro xts_cts_skip_tw, reg, lbl
+ tbnz \reg, #1, \lbl
+ .endm
+
/* multiply by polynomial 'x' in GF(2^8) */
.macro mul_by_x, out, in, temp, const
sshr \temp, \in, #7
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index bafd2ebef8f1..ea873b8904c4 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -11,6 +11,7 @@
#include <crypto/ctr.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
#include <crypto/xts.h>
#include <linux/module.h>
@@ -45,6 +46,12 @@ asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks);
asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks, u8 iv[]);
+asmlinkage void neon_aes_xts_encrypt(u8 out[], u8 const in[],
+ u32 const rk1[], int rounds, int bytes,
+ u32 const rk2[], u8 iv[], int first);
+asmlinkage void neon_aes_xts_decrypt(u8 out[], u8 const in[],
+ u32 const rk1[], int rounds, int bytes,
+ u32 const rk2[], u8 iv[], int first);
struct aesbs_ctx {
u8 rk[13 * (8 * AES_BLOCK_SIZE) + 32];
@@ -64,6 +71,7 @@ struct aesbs_ctr_ctx {
struct aesbs_xts_ctx {
struct aesbs_ctx key;
u32 twkey[AES_MAX_KEYLENGTH_U32];
+ struct crypto_aes_ctx cts;
};
static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
@@ -270,6 +278,10 @@ static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
return err;
key_len /= 2;
+ err = aes_expandkey(&ctx->cts, in_key, key_len);
+ if (err)
+ return err;
+
err = aes_expandkey(&rk, in_key + key_len, key_len);
if (err)
return err;
@@ -302,48 +314,119 @@ static int ctr_encrypt_sync(struct skcipher_request *req)
return ctr_encrypt(req);
}
-static int __xts_crypt(struct skcipher_request *req,
+static int __xts_crypt(struct skcipher_request *req, bool encrypt,
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 iv[]))
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int tail = req->cryptlen % (8 * AES_BLOCK_SIZE);
+ struct scatterlist sg_src[2], sg_dst[2];
+ struct skcipher_request subreq;
+ struct scatterlist *src, *dst;
struct skcipher_walk walk;
- int err;
+ int nbytes, err;
+ int first = 1;
+ u8 *out, *in;
+
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+
+ /* ensure that the cts tail is covered by a single step */
+ if (unlikely(tail > 0 && tail < AES_BLOCK_SIZE)) {
+ int xts_blocks = DIV_ROUND_UP(req->cryptlen,
+ AES_BLOCK_SIZE) - 2;
+
+ skcipher_request_set_tfm(&subreq, tfm);
+ skcipher_request_set_callback(&subreq,
+ skcipher_request_flags(req),
+ NULL, NULL);
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ xts_blocks * AES_BLOCK_SIZE,
+ req->iv);
+ req = &subreq;
+ } else {
+ tail = 0;
+ }
err = skcipher_walk_virt(&walk, req, false);
if (err)
return err;
- kernel_neon_begin();
- neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey, ctx->key.rounds, 1);
- kernel_neon_end();
-
while (walk.nbytes >= AES_BLOCK_SIZE) {
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
- if (walk.nbytes < walk.total)
+ if (walk.nbytes < walk.total || walk.nbytes % AES_BLOCK_SIZE)
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
+ out = walk.dst.virt.addr;
+ in = walk.src.virt.addr;
+ nbytes = walk.nbytes;
+
kernel_neon_begin();
- fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk,
- ctx->key.rounds, blocks, walk.iv);
+ if (likely(blocks > 6)) { /* plain NEON is faster otherwise */
+ if (first)
+ neon_aes_ecb_encrypt(walk.iv, walk.iv,
+ ctx->twkey,
+ ctx->key.rounds, 1);
+ first = 0;
+
+ fn(out, in, ctx->key.rk, ctx->key.rounds, blocks,
+ walk.iv);
+
+ out += blocks * AES_BLOCK_SIZE;
+ in += blocks * AES_BLOCK_SIZE;
+ nbytes -= blocks * AES_BLOCK_SIZE;
+ }
+
+ if (walk.nbytes == walk.total && nbytes > 0)
+ goto xts_tail;
+
kernel_neon_end();
- err = skcipher_walk_done(&walk,
- walk.nbytes - blocks * AES_BLOCK_SIZE);
+ skcipher_walk_done(&walk, nbytes);
}
- return err;
+
+ if (err || likely(!tail))
+ return err;
+
+ /* handle ciphertext stealing */
+ dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
+ if (req->dst != req->src)
+ dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
+
+ skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, req, false);
+ if (err)
+ return err;
+
+ out = walk.dst.virt.addr;
+ in = walk.src.virt.addr;
+ nbytes = walk.nbytes;
+
+ kernel_neon_begin();
+xts_tail:
+ if (encrypt)
+ neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, ctx->key.rounds,
+ nbytes, ctx->twkey, walk.iv, first ?: 2);
+ else
+ neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, ctx->key.rounds,
+ nbytes, ctx->twkey, walk.iv, first ?: 2);
+ kernel_neon_end();
+
+ return skcipher_walk_done(&walk, 0);
}
static int xts_encrypt(struct skcipher_request *req)
{
- return __xts_crypt(req, aesbs_xts_encrypt);
+ return __xts_crypt(req, true, aesbs_xts_encrypt);
}
static int xts_decrypt(struct skcipher_request *req)
{
- return __xts_crypt(req, aesbs_xts_decrypt);
+ return __xts_crypt(req, false, aesbs_xts_decrypt);
}
static struct skcipher_alg aes_algs[] = { {
--
2.17.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 13/17] crypto: arm/aes-ce - implement ciphertext stealing for XTS
2019-09-03 16:43 [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates Ard Biesheuvel
` (11 preceding siblings ...)
2019-09-03 16:43 ` [PATCH v2 12/17] crypto: arm64/aes-neonbs - implement ciphertext stealing for XTS Ard Biesheuvel
@ 2019-09-03 16:43 ` Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 14/17] crypto: arm/aes-neonbs " Ard Biesheuvel
` (4 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Ard Biesheuvel @ 2019-09-03 16:43 UTC (permalink / raw)
To: linux-crypto; +Cc: herbert, ebiggers, Ard Biesheuvel
Update the AES-XTS implementation based on AES instructions so that it
can deal with inputs whose size is not a multiple of the cipher block
size. This is part of the original XTS specification, but was never
implemented before in the Linux kernel.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm/crypto/aes-ce-core.S | 103 ++++++++++++++--
arch/arm/crypto/aes-ce-glue.c | 128 ++++++++++++++++++--
2 files changed, 208 insertions(+), 23 deletions(-)
diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S
index bb6ec1844370..763e51604ab6 100644
--- a/arch/arm/crypto/aes-ce-core.S
+++ b/arch/arm/crypto/aes-ce-core.S
@@ -369,9 +369,9 @@ ENDPROC(ce_aes_ctr_encrypt)
/*
* aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
- * int blocks, u8 iv[], u32 const rk2[], int first)
+ * int bytes, u8 iv[], u32 const rk2[], int first)
* aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
- * int blocks, u8 iv[], u32 const rk2[], int first)
+ * int bytes, u8 iv[], u32 const rk2[], int first)
*/
.macro next_tweak, out, in, const, tmp
@@ -414,7 +414,7 @@ ENTRY(ce_aes_xts_encrypt)
.Lxtsencloop4x:
next_tweak q4, q4, q15, q10
.Lxtsenc4x:
- subs r4, r4, #4
+ subs r4, r4, #64
bmi .Lxtsenc1x
vld1.8 {q0-q1}, [r1]! @ get 4 pt blocks
vld1.8 {q2-q3}, [r1]!
@@ -434,24 +434,58 @@ ENTRY(ce_aes_xts_encrypt)
vst1.8 {q2-q3}, [r0]!
vmov q4, q7
teq r4, #0
- beq .Lxtsencout
+ beq .Lxtsencret
b .Lxtsencloop4x
.Lxtsenc1x:
- adds r4, r4, #4
+ adds r4, r4, #64
beq .Lxtsencout
+ subs r4, r4, #16
+ bmi .LxtsencctsNx
.Lxtsencloop:
vld1.8 {q0}, [r1]!
+.Lxtsencctsout:
veor q0, q0, q4
bl aes_encrypt
veor q0, q0, q4
- vst1.8 {q0}, [r0]!
- subs r4, r4, #1
+ teq r4, #0
beq .Lxtsencout
+ subs r4, r4, #16
next_tweak q4, q4, q15, q6
+ bmi .Lxtsenccts
+ vst1.8 {q0}, [r0]!
b .Lxtsencloop
.Lxtsencout:
+ vst1.8 {q0}, [r0]
+.Lxtsencret:
vst1.8 {q4}, [r5]
pop {r4-r6, pc}
+
+.LxtsencctsNx:
+ vmov q0, q3
+ sub r0, r0, #16
+.Lxtsenccts:
+ movw ip, :lower16:.Lcts_permute_table
+ movt ip, :upper16:.Lcts_permute_table
+
+ add r1, r1, r4 @ rewind input pointer
+ add r4, r4, #16 @ # bytes in final block
+ add lr, ip, #32
+ add ip, ip, r4
+ sub lr, lr, r4
+ add r4, r0, r4 @ output address of final block
+
+ vld1.8 {q1}, [r1] @ load final partial block
+ vld1.8 {q2}, [ip]
+ vld1.8 {q3}, [lr]
+
+ vtbl.8 d4, {d0-d1}, d4
+ vtbl.8 d5, {d0-d1}, d5
+ vtbx.8 d0, {d2-d3}, d6
+ vtbx.8 d1, {d2-d3}, d7
+
+ vst1.8 {q2}, [r4] @ overlapping stores
+ mov r4, #0
+ b .Lxtsencctsout
ENDPROC(ce_aes_xts_encrypt)
@@ -462,13 +496,17 @@ ENTRY(ce_aes_xts_decrypt)
prepare_key r2, r3
vmov q4, q0
+ /* subtract 16 bytes if we are doing CTS */
+ tst r4, #0xf
+ subne r4, r4, #0x10
+
teq r6, #0 @ start of a block?
bne .Lxtsdec4x
.Lxtsdecloop4x:
next_tweak q4, q4, q15, q10
.Lxtsdec4x:
- subs r4, r4, #4
+ subs r4, r4, #64
bmi .Lxtsdec1x
vld1.8 {q0-q1}, [r1]! @ get 4 ct blocks
vld1.8 {q2-q3}, [r1]!
@@ -491,22 +529,55 @@ ENTRY(ce_aes_xts_decrypt)
beq .Lxtsdecout
b .Lxtsdecloop4x
.Lxtsdec1x:
- adds r4, r4, #4
+ adds r4, r4, #64
beq .Lxtsdecout
+ subs r4, r4, #16
.Lxtsdecloop:
vld1.8 {q0}, [r1]!
+ bmi .Lxtsdeccts
+.Lxtsdecctsout:
veor q0, q0, q4
- add ip, r2, #32 @ 3rd round key
bl aes_decrypt
veor q0, q0, q4
vst1.8 {q0}, [r0]!
- subs r4, r4, #1
+ teq r4, #0
beq .Lxtsdecout
+ subs r4, r4, #16
next_tweak q4, q4, q15, q6
b .Lxtsdecloop
.Lxtsdecout:
vst1.8 {q4}, [r5]
pop {r4-r6, pc}
+
+.Lxtsdeccts:
+ movw ip, :lower16:.Lcts_permute_table
+ movt ip, :upper16:.Lcts_permute_table
+
+ add r1, r1, r4 @ rewind input pointer
+ add r4, r4, #16 @ # bytes in final block
+ add lr, ip, #32
+ add ip, ip, r4
+ sub lr, lr, r4
+ add r4, r0, r4 @ output address of final block
+
+ next_tweak q5, q4, q15, q6
+
+ vld1.8 {q1}, [r1] @ load final partial block
+ vld1.8 {q2}, [ip]
+ vld1.8 {q3}, [lr]
+
+ veor q0, q0, q5
+ bl aes_decrypt
+ veor q0, q0, q5
+
+ vtbl.8 d4, {d0-d1}, d4
+ vtbl.8 d5, {d0-d1}, d5
+ vtbx.8 d0, {d2-d3}, d6
+ vtbx.8 d1, {d2-d3}, d7
+
+ vst1.8 {q2}, [r4] @ overlapping stores
+ mov r4, #0
+ b .Lxtsdecctsout
ENDPROC(ce_aes_xts_decrypt)
/*
@@ -532,3 +603,13 @@ ENTRY(ce_aes_invert)
vst1.32 {q0}, [r0]
bx lr
ENDPROC(ce_aes_invert)
+
+ .section ".rodata", "a"
+ .align 6
+.Lcts_permute_table:
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
+ .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index 486e862ae34a..c215792a2494 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -13,6 +13,7 @@
#include <crypto/ctr.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
#include <linux/cpufeature.h>
#include <linux/module.h>
#include <crypto/xts.h>
@@ -39,10 +40,10 @@ asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks, u8 ctr[]);
asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
- int rounds, int blocks, u8 iv[],
+ int rounds, int bytes, u8 iv[],
u32 const rk2[], int first);
asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[],
- int rounds, int blocks, u8 iv[],
+ int rounds, int bytes, u8 iv[],
u32 const rk2[], int first);
struct aes_block {
@@ -317,20 +318,71 @@ static int xts_encrypt(struct skcipher_request *req)
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int err, first, rounds = num_rounds(&ctx->key1);
+ int tail = req->cryptlen % AES_BLOCK_SIZE;
+ struct scatterlist sg_src[2], sg_dst[2];
+ struct skcipher_request subreq;
+ struct scatterlist *src, *dst;
struct skcipher_walk walk;
- unsigned int blocks;
+
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
err = skcipher_walk_virt(&walk, req, false);
- for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
+ int xts_blocks = DIV_ROUND_UP(req->cryptlen,
+ AES_BLOCK_SIZE) - 2;
+
+ skcipher_walk_abort(&walk);
+
+ skcipher_request_set_tfm(&subreq, tfm);
+ skcipher_request_set_callback(&subreq,
+ skcipher_request_flags(req),
+ NULL, NULL);
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ xts_blocks * AES_BLOCK_SIZE,
+ req->iv);
+ req = &subreq;
+ err = skcipher_walk_virt(&walk, req, false);
+ } else {
+ tail = 0;
+ }
+
+ for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) {
+ int nbytes = walk.nbytes;
+
+ if (walk.nbytes < walk.total)
+ nbytes &= ~(AES_BLOCK_SIZE - 1);
+
kernel_neon_begin();
ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_enc, rounds, blocks, walk.iv,
+ ctx->key1.key_enc, rounds, nbytes, walk.iv,
ctx->key2.key_enc, first);
kernel_neon_end();
- err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
- return err;
+
+ if (err || likely(!tail))
+ return err;
+
+ dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
+ if (req->dst != req->src)
+ dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
+
+ skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, req, false);
+ if (err)
+ return err;
+
+ kernel_neon_begin();
+ ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key1.key_enc, rounds, walk.nbytes, walk.iv,
+ ctx->key2.key_enc, first);
+ kernel_neon_end();
+
+ return skcipher_walk_done(&walk, 0);
}
static int xts_decrypt(struct skcipher_request *req)
@@ -338,20 +390,71 @@ static int xts_decrypt(struct skcipher_request *req)
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int err, first, rounds = num_rounds(&ctx->key1);
+ int tail = req->cryptlen % AES_BLOCK_SIZE;
+ struct scatterlist sg_src[2], sg_dst[2];
+ struct skcipher_request subreq;
+ struct scatterlist *src, *dst;
struct skcipher_walk walk;
- unsigned int blocks;
+
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
err = skcipher_walk_virt(&walk, req, false);
- for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
+ int xts_blocks = DIV_ROUND_UP(req->cryptlen,
+ AES_BLOCK_SIZE) - 2;
+
+ skcipher_walk_abort(&walk);
+
+ skcipher_request_set_tfm(&subreq, tfm);
+ skcipher_request_set_callback(&subreq,
+ skcipher_request_flags(req),
+ NULL, NULL);
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ xts_blocks * AES_BLOCK_SIZE,
+ req->iv);
+ req = &subreq;
+ err = skcipher_walk_virt(&walk, req, false);
+ } else {
+ tail = 0;
+ }
+
+ for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) {
+ int nbytes = walk.nbytes;
+
+ if (walk.nbytes < walk.total)
+ nbytes &= ~(AES_BLOCK_SIZE - 1);
+
kernel_neon_begin();
ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_dec, rounds, blocks, walk.iv,
+ ctx->key1.key_dec, rounds, nbytes, walk.iv,
ctx->key2.key_enc, first);
kernel_neon_end();
- err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
- return err;
+
+ if (err || likely(!tail))
+ return err;
+
+ dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
+ if (req->dst != req->src)
+ dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
+
+ skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, req, false);
+ if (err)
+ return err;
+
+ kernel_neon_begin();
+ ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key1.key_dec, rounds, walk.nbytes, walk.iv,
+ ctx->key2.key_enc, first);
+ kernel_neon_end();
+
+ return skcipher_walk_done(&walk, 0);
}
static struct skcipher_alg aes_algs[] = { {
@@ -426,6 +529,7 @@ static struct skcipher_alg aes_algs[] = { {
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
+ .walksize = 2 * AES_BLOCK_SIZE,
.setkey = xts_set_key,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
--
2.17.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 14/17] crypto: arm/aes-neonbs - implement ciphertext stealing for XTS
2019-09-03 16:43 [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates Ard Biesheuvel
` (12 preceding siblings ...)
2019-09-03 16:43 ` [PATCH v2 13/17] crypto: arm/aes-ce " Ard Biesheuvel
@ 2019-09-03 16:43 ` Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 15/17] crypto: arm/aes-ce - implement ciphertext stealing for CBC Ard Biesheuvel
` (3 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Ard Biesheuvel @ 2019-09-03 16:43 UTC (permalink / raw)
To: linux-crypto; +Cc: herbert, ebiggers, Ard Biesheuvel
Update the AES-XTS implementation based on NEON instructions so that it
can deal with inputs whose size is not a multiple of the cipher block
size. This is part of the original XTS specification, but was never
implemented before in the Linux kernel.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm/crypto/aes-neonbs-core.S | 16 +++--
arch/arm/crypto/aes-neonbs-glue.c | 69 +++++++++++++++++---
2 files changed, 72 insertions(+), 13 deletions(-)
diff --git a/arch/arm/crypto/aes-neonbs-core.S b/arch/arm/crypto/aes-neonbs-core.S
index bb75918e4984..cfaed4e67535 100644
--- a/arch/arm/crypto/aes-neonbs-core.S
+++ b/arch/arm/crypto/aes-neonbs-core.S
@@ -889,9 +889,9 @@ ENDPROC(aesbs_ctr_encrypt)
/*
* aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
- * int blocks, u8 iv[])
+ * int blocks, u8 iv[], int reorder_last_tweak)
* aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
- * int blocks, u8 iv[])
+ * int blocks, u8 iv[], int reorder_last_tweak)
*/
__xts_prepare8:
vld1.8 {q14}, [r7] // load iv
@@ -944,17 +944,25 @@ __xts_prepare8:
vld1.8 {q7}, [r1]!
next_tweak q14, q12, q15, q13
- veor q7, q7, q12
+THUMB( itt le )
+ W(cmple) r8, #0
+ ble 1f
+0: veor q7, q7, q12
vst1.8 {q12}, [r4, :128]
-0: vst1.8 {q14}, [r7] // store next iv
+ vst1.8 {q14}, [r7] // store next iv
bx lr
+
+1: vswp q12, q14
+ b 0b
ENDPROC(__xts_prepare8)
.macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
push {r4-r8, lr}
mov r5, sp // preserve sp
ldrd r6, r7, [sp, #24] // get blocks and iv args
+ ldr r8, [sp, #32] // reorder final tweak?
+ rsb r8, r8, #1
sub ip, sp, #128 // make room for 8x tweak
bic ip, ip, #0xf // align sp to 16 bytes
mov sp, ip
diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c
index 9000d0796d5e..e85839a8aaeb 100644
--- a/arch/arm/crypto/aes-neonbs-glue.c
+++ b/arch/arm/crypto/aes-neonbs-glue.c
@@ -12,6 +12,7 @@
#include <crypto/ctr.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
#include <crypto/xts.h>
#include <linux/module.h>
@@ -37,9 +38,9 @@ asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
int rounds, int blocks, u8 ctr[], u8 final[]);
asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
- int rounds, int blocks, u8 iv[]);
+ int rounds, int blocks, u8 iv[], int);
asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[],
- int rounds, int blocks, u8 iv[]);
+ int rounds, int blocks, u8 iv[], int);
struct aesbs_ctx {
int rounds;
@@ -53,6 +54,7 @@ struct aesbs_cbc_ctx {
struct aesbs_xts_ctx {
struct aesbs_ctx key;
+ struct crypto_cipher *cts_tfm;
struct crypto_cipher *tweak_tfm;
};
@@ -291,6 +293,9 @@ static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
return err;
key_len /= 2;
+ err = crypto_cipher_setkey(ctx->cts_tfm, in_key, key_len);
+ if (err)
+ return err;
err = crypto_cipher_setkey(ctx->tweak_tfm, in_key + key_len, key_len);
if (err)
return err;
@@ -302,7 +307,13 @@ static int xts_init(struct crypto_tfm *tfm)
{
struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+ ctx->cts_tfm = crypto_alloc_cipher("aes", 0, 0);
+ if (IS_ERR(ctx->cts_tfm))
+ return PTR_ERR(ctx->cts_tfm);
+
ctx->tweak_tfm = crypto_alloc_cipher("aes", 0, 0);
+ if (IS_ERR(ctx->tweak_tfm))
+ crypto_free_cipher(ctx->cts_tfm);
return PTR_ERR_OR_ZERO(ctx->tweak_tfm);
}
@@ -312,17 +323,34 @@ static void xts_exit(struct crypto_tfm *tfm)
struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm);
crypto_free_cipher(ctx->tweak_tfm);
+ crypto_free_cipher(ctx->cts_tfm);
}
-static int __xts_crypt(struct skcipher_request *req,
+static int __xts_crypt(struct skcipher_request *req, bool encrypt,
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
- int rounds, int blocks, u8 iv[]))
+ int rounds, int blocks, u8 iv[], int))
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int tail = req->cryptlen % AES_BLOCK_SIZE;
+ struct skcipher_request subreq;
+ u8 buf[2 * AES_BLOCK_SIZE];
struct skcipher_walk walk;
int err;
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+
+ if (unlikely(tail)) {
+ skcipher_request_set_tfm(&subreq, tfm);
+ skcipher_request_set_callback(&subreq,
+ skcipher_request_flags(req),
+ NULL, NULL);
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ req->cryptlen - tail, req->iv);
+ req = &subreq;
+ }
+
err = skcipher_walk_virt(&walk, req, true);
if (err)
return err;
@@ -331,30 +359,53 @@ static int __xts_crypt(struct skcipher_request *req,
while (walk.nbytes >= AES_BLOCK_SIZE) {
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
+ int reorder_last_tweak = !encrypt && tail > 0;
- if (walk.nbytes < walk.total)
+ if (walk.nbytes < walk.total) {
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
+ reorder_last_tweak = 0;
+ }
kernel_neon_begin();
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk,
- ctx->key.rounds, blocks, walk.iv);
+ ctx->key.rounds, blocks, walk.iv, reorder_last_tweak);
kernel_neon_end();
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
- return err;
+ if (err || likely(!tail))
+ return err;
+
+ /* handle ciphertext stealing */
+ scatterwalk_map_and_copy(buf, req->dst, req->cryptlen - AES_BLOCK_SIZE,
+ AES_BLOCK_SIZE, 0);
+ memcpy(buf + AES_BLOCK_SIZE, buf, tail);
+ scatterwalk_map_and_copy(buf, req->src, req->cryptlen, tail, 0);
+
+ crypto_xor(buf, req->iv, AES_BLOCK_SIZE);
+
+ if (encrypt)
+ crypto_cipher_encrypt_one(ctx->cts_tfm, buf, buf);
+ else
+ crypto_cipher_decrypt_one(ctx->cts_tfm, buf, buf);
+
+ crypto_xor(buf, req->iv, AES_BLOCK_SIZE);
+
+ scatterwalk_map_and_copy(buf, req->dst, req->cryptlen - AES_BLOCK_SIZE,
+ AES_BLOCK_SIZE + tail, 1);
+ return 0;
}
static int xts_encrypt(struct skcipher_request *req)
{
- return __xts_crypt(req, aesbs_xts_encrypt);
+ return __xts_crypt(req, true, aesbs_xts_encrypt);
}
static int xts_decrypt(struct skcipher_request *req)
{
- return __xts_crypt(req, aesbs_xts_decrypt);
+ return __xts_crypt(req, false, aesbs_xts_decrypt);
}
static struct skcipher_alg aes_algs[] = { {
--
2.17.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 15/17] crypto: arm/aes-ce - implement ciphertext stealing for CBC
2019-09-03 16:43 [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates Ard Biesheuvel
` (13 preceding siblings ...)
2019-09-03 16:43 ` [PATCH v2 14/17] crypto: arm/aes-neonbs " Ard Biesheuvel
@ 2019-09-03 16:43 ` Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 16/17] crypto: testmgr - add test vectors for XTS ciphertext stealing Ard Biesheuvel
` (2 subsequent siblings)
17 siblings, 0 replies; 19+ messages in thread
From: Ard Biesheuvel @ 2019-09-03 16:43 UTC (permalink / raw)
To: linux-crypto; +Cc: herbert, ebiggers, Ard Biesheuvel
Instead of relying on the CTS template to wrap the accelerated CBC
skcipher, implement the ciphertext stealing part directly.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm/crypto/aes-ce-core.S | 85 +++++++++
arch/arm/crypto/aes-ce-glue.c | 188 ++++++++++++++++++--
2 files changed, 256 insertions(+), 17 deletions(-)
diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S
index 763e51604ab6..b978cdf133af 100644
--- a/arch/arm/crypto/aes-ce-core.S
+++ b/arch/arm/crypto/aes-ce-core.S
@@ -284,6 +284,91 @@ ENTRY(ce_aes_cbc_decrypt)
pop {r4-r6, pc}
ENDPROC(ce_aes_cbc_decrypt)
+
+ /*
+ * ce_aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
+ * int rounds, int bytes, u8 const iv[])
+ * ce_aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
+ * int rounds, int bytes, u8 const iv[])
+ */
+
+ENTRY(ce_aes_cbc_cts_encrypt)
+ push {r4-r6, lr}
+ ldrd r4, r5, [sp, #16]
+
+ movw ip, :lower16:.Lcts_permute_table
+ movt ip, :upper16:.Lcts_permute_table
+ sub r4, r4, #16
+ add lr, ip, #32
+ add ip, ip, r4
+ sub lr, lr, r4
+ vld1.8 {q5}, [ip]
+ vld1.8 {q6}, [lr]
+
+ add ip, r1, r4
+ vld1.8 {q0}, [r1] @ overlapping loads
+ vld1.8 {q3}, [ip]
+
+ vld1.8 {q1}, [r5] @ get iv
+ prepare_key r2, r3
+
+ veor q0, q0, q1 @ xor with iv
+ bl aes_encrypt
+
+ vtbl.8 d4, {d0-d1}, d10
+ vtbl.8 d5, {d0-d1}, d11
+ vtbl.8 d2, {d6-d7}, d12
+ vtbl.8 d3, {d6-d7}, d13
+
+ veor q0, q0, q1
+ bl aes_encrypt
+
+ add r4, r0, r4
+ vst1.8 {q2}, [r4] @ overlapping stores
+ vst1.8 {q0}, [r0]
+
+ pop {r4-r6, pc}
+ENDPROC(ce_aes_cbc_cts_encrypt)
+
+ENTRY(ce_aes_cbc_cts_decrypt)
+ push {r4-r6, lr}
+ ldrd r4, r5, [sp, #16]
+
+ movw ip, :lower16:.Lcts_permute_table
+ movt ip, :upper16:.Lcts_permute_table
+ sub r4, r4, #16
+ add lr, ip, #32
+ add ip, ip, r4
+ sub lr, lr, r4
+ vld1.8 {q5}, [ip]
+ vld1.8 {q6}, [lr]
+
+ add ip, r1, r4
+ vld1.8 {q0}, [r1] @ overlapping loads
+ vld1.8 {q1}, [ip]
+
+ vld1.8 {q3}, [r5] @ get iv
+ prepare_key r2, r3
+
+ bl aes_decrypt
+
+ vtbl.8 d4, {d0-d1}, d10
+ vtbl.8 d5, {d0-d1}, d11
+ vtbx.8 d0, {d2-d3}, d12
+ vtbx.8 d1, {d2-d3}, d13
+
+ veor q1, q1, q2
+ bl aes_decrypt
+ veor q0, q0, q3 @ xor with iv
+
+ add r4, r0, r4
+ vst1.8 {q1}, [r4] @ overlapping stores
+ vst1.8 {q0}, [r0]
+
+ pop {r4-r6, pc}
+ENDPROC(ce_aes_cbc_cts_decrypt)
+
+
/*
* aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
* int blocks, u8 ctr[])
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index c215792a2494..cdb1a07e7ad0 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -35,6 +35,10 @@ asmlinkage void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks, u8 iv[]);
asmlinkage void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks, u8 iv[]);
+asmlinkage void ce_aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
+ int rounds, int bytes, u8 const iv[]);
+asmlinkage void ce_aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
+ int rounds, int bytes, u8 const iv[]);
asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks, u8 ctr[]);
@@ -210,48 +214,182 @@ static int ecb_decrypt(struct skcipher_request *req)
return err;
}
-static int cbc_encrypt(struct skcipher_request *req)
+static int cbc_encrypt_walk(struct skcipher_request *req,
+ struct skcipher_walk *walk)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
unsigned int blocks;
- int err;
+ int err = 0;
- err = skcipher_walk_virt(&walk, req, false);
-
- while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+ while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) {
kernel_neon_begin();
- ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ce_aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr,
ctx->key_enc, num_rounds(ctx), blocks,
- walk.iv);
+ walk->iv);
kernel_neon_end();
- err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
+ err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE);
}
return err;
}
-static int cbc_decrypt(struct skcipher_request *req)
+static int cbc_encrypt(struct skcipher_request *req)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
struct skcipher_walk walk;
- unsigned int blocks;
int err;
err = skcipher_walk_virt(&walk, req, false);
+ if (err)
+ return err;
+ return cbc_encrypt_walk(req, &walk);
+}
- while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+static int cbc_decrypt_walk(struct skcipher_request *req,
+ struct skcipher_walk *walk)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ unsigned int blocks;
+ int err = 0;
+
+ while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) {
kernel_neon_begin();
- ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ce_aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr,
ctx->key_dec, num_rounds(ctx), blocks,
- walk.iv);
+ walk->iv);
kernel_neon_end();
- err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
+ err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE);
}
return err;
}
+static int cbc_decrypt(struct skcipher_request *req)
+{
+ struct skcipher_walk walk;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+ if (err)
+ return err;
+ return cbc_decrypt_walk(req, &walk);
+}
+
+static int cts_cbc_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
+ struct scatterlist *src = req->src, *dst = req->dst;
+ struct scatterlist sg_src[2], sg_dst[2];
+ struct skcipher_request subreq;
+ struct skcipher_walk walk;
+ int err;
+
+ skcipher_request_set_tfm(&subreq, tfm);
+ skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
+ NULL, NULL);
+
+ if (req->cryptlen <= AES_BLOCK_SIZE) {
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+ cbc_blocks = 1;
+ }
+
+ if (cbc_blocks > 0) {
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ cbc_blocks * AES_BLOCK_SIZE,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, &subreq, false) ?:
+ cbc_encrypt_walk(&subreq, &walk);
+ if (err)
+ return err;
+
+ if (req->cryptlen == AES_BLOCK_SIZE)
+ return 0;
+
+ dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
+ if (req->dst != req->src)
+ dst = scatterwalk_ffwd(sg_dst, req->dst,
+ subreq.cryptlen);
+ }
+
+ /* handle ciphertext stealing */
+ skcipher_request_set_crypt(&subreq, src, dst,
+ req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, &subreq, false);
+ if (err)
+ return err;
+
+ kernel_neon_begin();
+ ce_aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, num_rounds(ctx), walk.nbytes,
+ walk.iv);
+ kernel_neon_end();
+
+ return skcipher_walk_done(&walk, 0);
+}
+
+static int cts_cbc_decrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
+ struct scatterlist *src = req->src, *dst = req->dst;
+ struct scatterlist sg_src[2], sg_dst[2];
+ struct skcipher_request subreq;
+ struct skcipher_walk walk;
+ int err;
+
+ skcipher_request_set_tfm(&subreq, tfm);
+ skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
+ NULL, NULL);
+
+ if (req->cryptlen <= AES_BLOCK_SIZE) {
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+ cbc_blocks = 1;
+ }
+
+ if (cbc_blocks > 0) {
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ cbc_blocks * AES_BLOCK_SIZE,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, &subreq, false) ?:
+ cbc_decrypt_walk(&subreq, &walk);
+ if (err)
+ return err;
+
+ if (req->cryptlen == AES_BLOCK_SIZE)
+ return 0;
+
+ dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
+ if (req->dst != req->src)
+ dst = scatterwalk_ffwd(sg_dst, req->dst,
+ subreq.cryptlen);
+ }
+
+ /* handle ciphertext stealing */
+ skcipher_request_set_crypt(&subreq, src, dst,
+ req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, &subreq, false);
+ if (err)
+ return err;
+
+ kernel_neon_begin();
+ ce_aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, num_rounds(ctx), walk.nbytes,
+ walk.iv);
+ kernel_neon_end();
+
+ return skcipher_walk_done(&walk, 0);
+}
+
static int ctr_encrypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -486,6 +624,22 @@ static struct skcipher_alg aes_algs[] = { {
.setkey = ce_aes_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
+}, {
+ .base.cra_name = "__cts(cbc(aes))",
+ .base.cra_driver_name = "__cts-cbc-aes-ce",
+ .base.cra_priority = 300,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .walksize = 2 * AES_BLOCK_SIZE,
+ .setkey = ce_aes_setkey,
+ .encrypt = cts_cbc_encrypt,
+ .decrypt = cts_cbc_decrypt,
}, {
.base.cra_name = "__ctr(aes)",
.base.cra_driver_name = "__ctr-aes-ce",
--
2.17.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 16/17] crypto: testmgr - add test vectors for XTS ciphertext stealing
2019-09-03 16:43 [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates Ard Biesheuvel
` (14 preceding siblings ...)
2019-09-03 16:43 ` [PATCH v2 15/17] crypto: arm/aes-ce - implement ciphertext stealing for CBC Ard Biesheuvel
@ 2019-09-03 16:43 ` Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 17/17] crypto: testmgr - Add additional AES-XTS vectors for covering CTS Ard Biesheuvel
2019-09-09 7:52 ` [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates Herbert Xu
17 siblings, 0 replies; 19+ messages in thread
From: Ard Biesheuvel @ 2019-09-03 16:43 UTC (permalink / raw)
To: linux-crypto; +Cc: herbert, ebiggers, Ard Biesheuvel
Import the AES-XTS test vectors from IEEE publication P1619/D16
that exercise the ciphertext stealing part of the XTS algorithm,
which we haven't supported in the Linux kernel implementation up
till now.
Tested-by: Pascal van Leeuwen <pvanleeuwen@verimatrix.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
crypto/testmgr.h | 60 ++++++++++++++++++++
1 file changed, 60 insertions(+)
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index ef7d21f39d4a..86f2db6ae571 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -15291,6 +15291,66 @@ static const struct cipher_testvec aes_xts_tv_template[] = {
"\xc4\xf3\x6f\xfd\xa9\xfc\xea\x70"
"\xb9\xc6\xe6\x93\xe1\x48\xc1\x51",
.len = 512,
+ }, { /* XTS-AES 15 */
+ .key = "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
+ "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
+ "\xbf\xbe\xbd\xbc\xbb\xba\xb9\xb8"
+ "\xb7\xb6\xb5\xb4\xb3\xb2\xb1\xb0",
+ .klen = 32,
+ .iv = "\x9a\x78\x56\x34\x12\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .ptext = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+ "\x10",
+ .ctext = "\x6c\x16\x25\xdb\x46\x71\x52\x2d"
+ "\x3d\x75\x99\x60\x1d\xe7\xca\x09"
+ "\xed",
+ .len = 17,
+ }, { /* XTS-AES 16 */
+ .key = "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
+ "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
+ "\xbf\xbe\xbd\xbc\xbb\xba\xb9\xb8"
+ "\xb7\xb6\xb5\xb4\xb3\xb2\xb1\xb0",
+ .klen = 32,
+ .iv = "\x9a\x78\x56\x34\x12\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .ptext = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+ "\x10\x11",
+ .ctext = "\xd0\x69\x44\x4b\x7a\x7e\x0c\xab"
+ "\x09\xe2\x44\x47\xd2\x4d\xeb\x1f"
+ "\xed\xbf",
+ .len = 18,
+ }, { /* XTS-AES 17 */
+ .key = "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
+ "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
+ "\xbf\xbe\xbd\xbc\xbb\xba\xb9\xb8"
+ "\xb7\xb6\xb5\xb4\xb3\xb2\xb1\xb0",
+ .klen = 32,
+ .iv = "\x9a\x78\x56\x34\x12\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .ptext = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+ "\x10\x11\x12",
+ .ctext = "\xe5\xdf\x13\x51\xc0\x54\x4b\xa1"
+ "\x35\x0b\x33\x63\xcd\x8e\xf4\xbe"
+ "\xed\xbf\x9d",
+ .len = 19,
+ }, { /* XTS-AES 18 */
+ .key = "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
+ "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
+ "\xbf\xbe\xbd\xbc\xbb\xba\xb9\xb8"
+ "\xb7\xb6\xb5\xb4\xb3\xb2\xb1\xb0",
+ .klen = 32,
+ .iv = "\x9a\x78\x56\x34\x12\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .ptext = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+ "\x10\x11\x12\x13",
+ .ctext = "\x9d\x84\xc8\x13\xf7\x19\xaa\x2c"
+ "\x7b\xe3\xf6\x61\x71\xc7\xc5\xc2"
+ "\xed\xbf\x9d\xac",
+ .len = 20,
}
};
--
2.17.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [PATCH v2 17/17] crypto: testmgr - Add additional AES-XTS vectors for covering CTS
2019-09-03 16:43 [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates Ard Biesheuvel
` (15 preceding siblings ...)
2019-09-03 16:43 ` [PATCH v2 16/17] crypto: testmgr - add test vectors for XTS ciphertext stealing Ard Biesheuvel
@ 2019-09-03 16:43 ` Ard Biesheuvel
2019-09-09 7:52 ` [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates Herbert Xu
17 siblings, 0 replies; 19+ messages in thread
From: Ard Biesheuvel @ 2019-09-03 16:43 UTC (permalink / raw)
To: linux-crypto; +Cc: herbert, ebiggers, Pascal van Leeuwen, Ard Biesheuvel
From: Pascal van Leeuwen <pvanleeuwen@verimatrix.com>
This patch adds test vectors for AES-XTS that cover data inputs that are
not a multiple of 16 bytes and therefore require cipher text stealing
(CTS) to be applied. Vectors were added to cover all possible alignments
combined with various interesting (i.e. for vector implementations working
on 3,4,5 or 8 AES blocks in parallel) lengths.
This code was kindly donated to the public domain by the author.
Link: https://lore.kernel.org/linux-crypto/MN2PR20MB29739591E1A3E54E7A8A8E18CAC00@MN2PR20MB2973.namprd20.prod.outlook.com/
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
crypto/testmgr.h | 308 ++++++++++++++++++++
1 file changed, 308 insertions(+)
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 86f2db6ae571..b3296441f9c7 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -15351,6 +15351,314 @@ static const struct cipher_testvec aes_xts_tv_template[] = {
"\x7b\xe3\xf6\x61\x71\xc7\xc5\xc2"
"\xed\xbf\x9d\xac",
.len = 20,
+ /* Additional vectors to increase CTS coverage */
+ }, { /* 1 block + 21 bytes */
+ .key = "\xa1\x34\x0e\x49\x38\xfd\x8b\xf6"
+ "\x45\x60\x67\x07\x0f\x50\xa8\x2b"
+ "\xa8\xf1\xfe\x7e\xf4\xf0\x47\xcd"
+ "\xfd\x91\x78\xf9\x14\x8b\x7d\x27"
+ "\x0e\xdc\xca\xe6\xf4\xfc\xd7\x4f"
+ "\x19\x8c\xd0\xe6\x9e\x2f\xf8\x75"
+ "\xb5\xe2\x48\x00\x4f\x07\xd9\xa1"
+ "\x42\xbc\x9d\xfc\x17\x98\x00\x48",
+ .klen = 64,
+ .iv = "\xcb\x35\x47\x5a\x7a\x06\x28\xb9"
+ "\x80\xf5\xa7\xe6\x8a\x23\x42\xf8",
+ .ptext = "\x04\x52\xc8\x7f\xb0\x5a\x12\xc5"
+ "\x96\x47\x6b\xf4\xbc\x2e\xdb\x74"
+ "\xd2\x20\x24\x32\xe5\x84\xb6\x25"
+ "\x4c\x2f\x96\xc7\x55\x9c\x90\x6f"
+ "\x0e\x96\x94\x68\xf4",
+ .ctext = "\x6a\x2d\x57\xb8\x72\x49\x10\x6b"
+ "\x5b\x5a\xc9\x92\xab\x59\x79\x36"
+ "\x7a\x01\x95\xf7\xdd\xcb\x3f\xbf"
+ "\xb2\xe3\x7e\x35\xe3\x11\x04\x68"
+ "\x28\xc3\x70\x6a\xe1",
+ .len = 37,
+ }, { /* 3 blocks + 22 bytes */
+ .key = "\xf7\x87\x75\xdf\x36\x20\xe7\xcb"
+ "\x20\x5d\x49\x96\x81\x3d\x1d\x80"
+ "\xc7\x18\x7e\xbf\x2a\x0f\x79\xba"
+ "\x06\xb5\x4b\x63\x03\xfb\xb8\x49"
+ "\x93\x2d\x85\x5b\x95\x1f\x78\xea"
+ "\x7c\x1e\xf5\x5d\x02\xc6\xec\xb0"
+ "\xf0\xaa\x3d\x0a\x04\xe1\x67\x80"
+ "\x2a\xbe\x4e\x73\xc9\x11\xcc\x6c",
+ .klen = 64,
+ .iv = "\xeb\xba\x55\x24\xfc\x8f\x25\x7c"
+ "\x66\xf9\x04\x03\xcc\xb1\xf4\x84",
+ .ptext = "\x40\x75\x1b\x72\x2a\xc8\xbf\xef"
+ "\x0c\x92\x3e\x19\xc5\x09\x07\x38"
+ "\x4d\x87\x5c\xb8\xd6\x4f\x1a\x39"
+ "\x8c\xee\xa5\x22\x41\x12\xe1\x22"
+ "\xb5\x4b\xd7\xeb\x02\xfa\xaa\xf8"
+ "\x94\x47\x04\x5d\x8a\xb5\x40\x12"
+ "\x04\x62\x3d\xe4\x19\x8a\xeb\xb3"
+ "\xf9\xa3\x7d\xb6\xeb\x57\xf9\xb8"
+ "\x7f\xa8\xfa\x2d\x75\x2d",
+ .ctext = "\x46\x6d\xe5\x35\x5d\x22\x42\x33"
+ "\xf7\xb8\xfb\xc0\xcb\x18\xad\xa4"
+ "\x75\x6c\xc6\x38\xbb\xd4\xa1\x32"
+ "\x00\x05\x06\xd9\xc9\x17\xd9\x4f"
+ "\x1a\xf6\x24\x64\x27\x8a\x4a\xad"
+ "\x88\xa0\x86\xb7\xf9\x33\xaf\xa8"
+ "\x0e\x83\xd8\x0e\x88\xa2\x81\x79"
+ "\x65\x2e\x3e\x84\xaf\xa1\x46\x7d"
+ "\xa9\x91\xf8\x17\x82\x8d",
+ .len = 70,
+ }, { /* 4 blocks + 23 bytes */
+ .key = "\x48\x09\xab\x48\xd6\xca\x7d\xb1"
+ "\x90\xa0\x00\xd8\x33\x8a\x20\x79"
+ "\x7c\xbc\x0c\x0c\x5f\x41\xbc\xbc"
+ "\x82\xaf\x41\x81\x23\x93\xcb\xc7"
+ "\x61\x7b\x83\x13\x16\xb1\x3e\x7c"
+ "\xcc\xae\xda\xca\x78\xc7\xab\x18"
+ "\x69\xb6\x58\x3e\x5c\x19\x5f\xed"
+ "\x7b\xcf\x70\xb9\x76\x00\xd8\xc9",
+ .klen = 64,
+ .iv = "\x2e\x20\x36\xf4\xa3\x22\x5d\xd8"
+ "\x38\x49\x82\xbf\x6c\x56\xd9\x3b",
+ .ptext = "\x79\x3c\x73\x99\x65\x21\xe1\xb9"
+ "\xa0\xfd\x22\xb2\x57\xc0\x7f\xf4"
+ "\x7f\x97\x36\xaf\xf8\x8d\x73\xe1"
+ "\x0d\x85\xe9\xd5\x3d\x82\xb3\x49"
+ "\x89\x25\x30\x1f\x0d\xca\x5c\x95"
+ "\x64\x31\x02\x17\x11\x08\x8f\x32"
+ "\xbc\x37\x23\x4f\x03\x98\x91\x4a"
+ "\x50\xe2\x58\xa8\x9b\x64\x09\xe0"
+ "\xce\x99\xc9\xb0\xa8\x21\x73\xb7"
+ "\x2d\x4b\x19\xba\x81\x83\x99\xce"
+ "\xa0\x7a\xd0\x9f\x27\xf6\x8a",
+ .ctext = "\xf9\x12\x76\x21\x06\x1e\xe4\x4b"
+ "\xf9\x94\x38\x29\x0f\xee\xcb\x13"
+ "\xa3\xc3\x50\xe3\xc6\x29\x9d\xcf"
+ "\x6f\x6a\x0a\x25\xab\x44\xf6\xe4"
+ "\x71\x29\x75\x3b\x07\x1c\xfc\x1a"
+ "\x75\xd4\x84\x58\x7f\xc4\xf3\xf7"
+ "\x8f\x7c\x7a\xdc\xa2\xa3\x95\x38"
+ "\x15\xdf\x3b\x9c\xdd\x24\xb4\x0b"
+ "\xa8\x97\xfa\x5f\xee\x58\x00\x0d"
+ "\x23\xc9\x8d\xee\xc2\x3f\x27\xd8"
+ "\xd4\x43\xa5\xf8\x25\x71\x3f",
+ .len = 87,
+ }, { /* 5 blocks + 24 bytes */
+ .key = "\x8c\xf4\x4c\xe5\x91\x8f\x72\xe9"
+ "\x2f\xf8\xc0\x3c\x87\x76\x16\xa4"
+ "\x20\xab\x66\x39\x34\x10\xd6\x91"
+ "\xf1\x99\x2c\xf1\xd6\xc3\xda\x38"
+ "\xed\x2a\x4c\x80\xf4\xa5\x56\x28"
+ "\x1a\x1c\x79\x72\x6c\x93\x08\x86"
+ "\x8f\x8a\xaa\xcd\xf1\x8c\xca\xe7"
+ "\x0a\xe8\xee\x0c\x1c\xc2\xa8\xea",
+ .klen = 64,
+ .iv = "\x9a\x9e\xbc\xe4\xc9\xf3\xef\x9f"
+ "\xff\x82\x0e\x22\x8f\x80\x42\x76",
+ .ptext = "\xc1\xde\x66\x1a\x7e\x60\xd3\x3b"
+ "\x66\xd6\x29\x86\x99\xc6\xd7\xc8"
+ "\x29\xbf\x00\x57\xab\x21\x06\x24"
+ "\xd0\x92\xef\xe6\xb5\x1e\x20\xb9"
+ "\xb7\x7b\xd7\x18\x88\xf8\xd7\xe3"
+ "\x90\x61\xcd\x73\x2b\xa1\xb5\xc7"
+ "\x33\xef\xb5\xf2\x45\xf6\x92\x53"
+ "\x91\x98\xf8\x5a\x20\x75\x4c\xa8"
+ "\xf1\xf6\x01\x26\xbc\xba\x4c\xac"
+ "\xcb\xc2\x6d\xb6\x2c\x3c\x38\x61"
+ "\xe3\x98\x7f\x3e\x98\xbd\xec\xce"
+ "\xc0\xb5\x74\x23\x43\x24\x7b\x7e"
+ "\x3f\xed\xcb\xda\x88\x67\x6f\x9a",
+ .ctext = "\xeb\xdc\x6a\xb7\xd9\x5f\xa7\xfc"
+ "\x48\x75\x10\xef\xca\x65\xdc\x88"
+ "\xd0\x23\xde\x17\x5f\x3b\x61\xa2"
+ "\x15\x13\x81\x81\xf8\x57\x8b\x2a"
+ "\xe2\xc8\x49\xd1\xba\xed\xd6\xcb"
+ "\xed\x6f\x26\x69\x9b\xd2\xd2\x91"
+ "\x4e\xd7\x81\x20\x66\x38\x0c\x62"
+ "\x60\xcd\x01\x36\x97\x22\xf0\x5c"
+ "\xcf\x53\xc6\x58\xf5\x8b\x48\x0c"
+ "\xa5\x50\xc2\x73\xf9\x70\x60\x09"
+ "\x22\x69\xf3\x71\x74\x5d\xc9\xa0"
+ "\x9c\x79\xf9\xc4\x87\xac\xd7\x4b"
+ "\xac\x3c\xc6\xda\x81\x7a\xdd\x14",
+ .len = 104,
+ }, { /* 8 blocks + 25 bytes */
+ .key = "\x70\x18\x09\x93\x10\x3a\x0c\xa9"
+ "\x02\x0b\x11\x10\xae\x34\x98\xdb"
+ "\x10\xb5\xee\x8c\x49\xbc\x52\x8e"
+ "\x4b\xf7\x0a\x36\x16\x8a\xf7\x06"
+ "\xb5\x94\x52\x54\xb9\xc1\x4d\x20"
+ "\xa2\xf0\x6e\x19\x7f\x67\x1e\xaa"
+ "\x94\x6c\xee\x54\x19\xfc\x96\x95"
+ "\x04\x85\x00\x53\x7c\x39\x5f\xeb",
+ .klen = 64,
+ .iv = "\x36\x87\x8f\x9d\x74\xe9\x52\xfb"
+ "\xe1\x76\x16\x99\x61\x86\xec\x8f",
+ .ptext = "\x95\x08\xee\xfe\x87\xb2\x4f\x93"
+ "\x01\xee\xf3\x77\x0d\xbb\xfb\x26"
+ "\x3e\xb3\x34\x20\xee\x51\xd6\x40"
+ "\xb1\x64\xae\xd9\xfd\x71\x8f\x93"
+ "\xa5\x85\xff\x74\xcc\xd3\xfd\x5e"
+ "\xc2\xfc\x49\xda\xa8\x3a\x94\x29"
+ "\xa2\x59\x90\x34\x26\xbb\xa0\x34"
+ "\x5d\x47\x33\xf2\xa8\x77\x90\x98"
+ "\x8d\xfd\x38\x60\x23\x1e\x50\xa1"
+ "\x67\x4d\x8d\x09\xe0\x7d\x30\xe3"
+ "\xdd\x39\x91\xd4\x70\x68\xbb\x06"
+ "\x4e\x11\xb2\x26\x0a\x85\x73\xf6"
+ "\x37\xb6\x15\xd0\x77\xee\x43\x7b"
+ "\x77\x13\xe9\xb9\x84\x2b\x34\xab"
+ "\x49\xc1\x27\x91\x2e\xa3\xca\xe5"
+ "\xa7\x79\x45\xba\x36\x97\x49\x44"
+ "\xf7\x57\x9b\xd7\xac\xb3\xfd\x6a"
+ "\x1c\xd1\xfc\x1c\xdf\x6f\x94\xac"
+ "\x95\xf4\x50\x7a\xc8\xc3\x8c\x60"
+ "\x3c",
+ .ctext = "\xb6\xc8\xf9\x5d\x35\x5a\x0a\x33"
+ "\x2b\xd3\x5a\x18\x09\x1c\x1b\x0b"
+ "\x2a\x0e\xde\xf6\x0d\x04\xa6\xb3"
+ "\xa8\xe8\x1b\x86\x29\x58\x75\x56"
+ "\xab\xab\xbf\xbe\x1f\xb4\xc4\xf3"
+ "\xde\x1a\xb0\x87\x69\xac\x5b\x0c"
+ "\x1b\xb7\xc7\x24\xa4\x47\xe7\x81"
+ "\x2c\x0a\x82\xf9\x18\x5d\xe6\x09"
+ "\xe3\x65\x36\x54\x3d\x8a\x3a\x64"
+ "\x34\xf4\x34\x7f\x26\x3c\x1e\x3b"
+ "\x5a\x13\xdf\x7f\xa8\x2d\x81\xce"
+ "\xfa\xad\xd0\xb1\xca\xfa\xc3\x55"
+ "\x94\xc8\xb8\x16\x7e\xff\x44\x88"
+ "\xb4\x47\x4b\xfe\xda\x60\x68\x2e"
+ "\xfc\x70\xb5\xe3\xf3\xe9\x46\x22"
+ "\x1d\x98\x66\x09\x0f\xed\xbb\x20"
+ "\x7b\x8c\x2a\xff\x45\x62\xde\x9b"
+ "\x20\x2e\x6c\xb4\xe4\x26\x03\x72"
+ "\x8a\xb4\x19\xc9\xb1\xcf\x9d\x86"
+ "\xa3",
+ .len = 153,
+ }, { /* 0 blocks + 26 bytes */
+ .key = "\x5a\x38\x3f\x9c\x0c\x53\x17\x6c"
+ "\x60\x72\x23\x26\xba\xfe\xa1\xb7"
+ "\x03\xa8\xfe\xa0\x7c\xff\x78\x4c"
+ "\x7d\x84\x2f\x24\x84\x77\xec\x6f"
+ "\x88\xc8\x36\xe2\xcb\x52\x3c\xb4"
+ "\x39\xac\x37\xfa\x41\x8b\xc4\x59"
+ "\x24\x03\xe1\x51\xc9\x54\x7d\xb7"
+ "\xa3\xde\x91\x44\x8d\x16\x97\x22",
+ .klen = 64,
+ .iv = "\xfb\x7f\x3d\x60\x26\x0a\x3a\x3d"
+ "\xa5\xa3\x45\xf2\x24\x67\xfa\x6e",
+ .ptext = "\xfb\x56\x97\x65\x7c\xd8\x6c\x3c"
+ "\x5d\xd3\xea\xa6\xa4\x83\xf7\x9d"
+ "\x9d\x89\x2c\x85\xb8\xd9\xd4\xf0"
+ "\x1a\xad",
+ .ctext = "\xc9\x9b\x4b\xf2\xf7\x0f\x23\xfe"
+ "\xc3\x93\x88\xa1\xb3\x88\xab\xd6"
+ "\x26\x78\x82\xa6\x6b\x0b\x76\xad"
+ "\x21\x5e",
+ .len = 26,
+ }, { /* 0 blocks + 27 bytes */
+ .key = "\xc0\xcf\x57\xa2\x3c\xa2\x4b\xf6"
+ "\x5d\x36\x7b\xd7\x1d\x16\xc3\x2f"
+ "\x50\xc6\x0a\xb2\xfd\xe8\x24\xfc"
+ "\x33\xcf\x73\xfd\xe0\xe9\xa5\xd1"
+ "\x98\xfc\xd6\x16\xdd\xfd\x6d\xab"
+ "\x44\xbc\x37\x9d\xab\x5b\x1d\xf2"
+ "\x6f\x5d\xbe\x6b\x14\x14\xc7\x74"
+ "\xbb\x91\x24\x4b\x52\xcb\x78\x31",
+ .klen = 64,
+ .iv = "\x5c\xc1\x3d\xb6\xa1\x6a\x2d\x1f"
+ "\xee\x75\x19\x4b\x04\xfa\xe1\x7e",
+ .ptext = "\x02\x95\x3a\xab\xac\x3b\xcd\xcd"
+ "\x63\xc7\x4c\x7c\xe5\x75\xee\x03"
+ "\x94\xc7\xff\xe8\xe0\xe9\x86\x2a"
+ "\xd3\xc7\xe4",
+ .ctext = "\x8e\x84\x76\x8b\xc1\x47\x55\x15"
+ "\x5e\x51\xb3\xe2\x3f\x72\x4d\x20"
+ "\x09\x3f\x4f\xb1\xce\xf4\xb0\x14"
+ "\xf6\xa7\xb3",
+ .len = 27,
+ }, { /* 0 blocks + 28 bytes */
+ .key = "\x0b\x5b\x1d\xc8\xb1\x3f\x8f\xcd"
+ "\x87\xd2\x58\x28\x36\xc6\x34\xfb"
+ "\x04\xe8\xf1\xb7\x91\x30\xda\x75"
+ "\x66\x4a\x72\x90\x09\x39\x02\x19"
+ "\x62\x2d\xe9\x24\x95\x0e\x87\x43"
+ "\x4c\xc7\x96\xe4\xc9\x31\x6a\x13"
+ "\x16\x10\xef\x34\x9b\x98\x19\xf1"
+ "\x8b\x14\x38\x3f\xf8\x75\xcc\x76",
+ .klen = 64,
+ .iv = "\x0c\x2c\x55\x2c\xda\x40\xe1\xab"
+ "\xa6\x34\x66\x7a\xa4\xa3\xda\x90",
+ .ptext = "\xbe\x84\xd3\xfe\xe6\xb4\x29\x67"
+ "\xfd\x29\x78\x41\x3d\xe9\x81\x4e"
+ "\x3c\xf9\xf4\xf5\x3f\xd8\x0e\xcd"
+ "\x63\x73\x65\xf3",
+ .ctext = "\xd0\xa0\x16\x5f\xf9\x85\xd0\x63"
+ "\x9b\x81\xa1\x15\x93\xb3\x62\x36"
+ "\xec\x93\x0e\x14\x07\xf2\xa9\x38"
+ "\x80\x33\xc0\x20",
+ .len = 28,
+ }, { /* 0 blocks + 29 bytes */
+ .key = "\xdc\x4c\xdc\x20\xb1\x34\x89\xa4"
+ "\xd0\xb6\x77\x05\xea\x0c\xcc\x68"
+ "\xb1\xd6\xf7\xfd\xa7\x0a\x5b\x81"
+ "\x2d\x4d\xa3\x65\xd0\xab\xa1\x02"
+ "\x85\x4b\x33\xea\x51\x16\x50\x12"
+ "\x3b\x25\xba\x13\xba\x7c\xbb\x3a"
+ "\xe4\xfd\xb3\x9c\x88\x8b\xb8\x30"
+ "\x7a\x97\xcf\x95\x5d\x69\x7b\x1d",
+ .klen = 64,
+ .iv = "\xe7\x69\xed\xd2\x54\x5d\x4a\x29"
+ "\xb2\xd7\x60\x90\xa0\x0b\x0d\x3a",
+ .ptext = "\x37\x22\x11\x62\xa0\x74\x92\x62"
+ "\x40\x4e\x2b\x0a\x8b\xab\xd8\x28"
+ "\x8a\xd2\xeb\xa5\x8e\xe1\x42\xc8"
+ "\x49\xef\x9a\xec\x1b",
+ .ctext = "\x7c\x66\x72\x6b\xe3\xc3\x57\x71"
+ "\x37\x13\xce\x1f\x6b\xff\x13\x87"
+ "\x65\xa7\xa1\xc5\x23\x7f\xca\x40"
+ "\x82\xbf\x2f\xc0\x2a",
+ .len = 29,
+ }, { /* 0 blocks + 30 bytes */
+ .key = "\x72\x9a\xf5\x53\x55\xdd\x0f\xef"
+ "\xfc\x75\x6f\x03\x88\xc8\xba\x88"
+ "\xb7\x65\x89\x5d\x03\x86\x21\x22"
+ "\xb8\x42\x87\xd9\xa9\x83\x9e\x9c"
+ "\xca\x28\xa1\xd2\xb6\xd0\xa6\x6c"
+ "\xf8\x57\x42\x7c\x73\xfc\x7b\x0a"
+ "\xbc\x3c\x57\x7b\x5a\x39\x61\x55"
+ "\xb7\x25\xe9\xf1\xc4\xbb\x04\x28",
+ .klen = 64,
+ .iv = "\x8a\x38\x22\xba\xea\x5e\x1d\xa4"
+ "\x31\x18\x12\x5c\x56\x0c\x12\x50",
+ .ptext = "\x06\xfd\xbb\xa9\x2e\x56\x05\x5f"
+ "\xf2\xa7\x36\x76\x26\xd3\xb3\x49"
+ "\x7c\xe2\xe3\xbe\x1f\x65\xd2\x17"
+ "\x65\xe2\xb3\x0e\xb1\x93",
+ .ctext = "\xae\x1f\x19\x7e\x3b\xb3\x65\xcb"
+ "\x14\x70\x6b\x3c\xa0\x63\x95\x94"
+ "\x56\x52\xe1\xb4\x14\xca\x21\x13"
+ "\xb5\x03\x3f\xfe\xc9\x9f",
+ .len = 30,
+ }, { /* 0 blocks + 31 bytes */
+ .key = "\xce\x06\x45\x53\x25\x81\xd2\xb2"
+ "\xdd\xc9\x57\xfe\xbb\xf6\x83\x07"
+ "\x28\xd8\x2a\xff\x53\xf8\x57\xc6"
+ "\x63\x50\xd4\x3e\x2a\x54\x37\x51"
+ "\x07\x3b\x23\x63\x3c\x31\x57\x0d"
+ "\xd3\x59\x20\xf2\xd0\x85\xac\xc5"
+ "\x3f\xa1\x74\x90\x0a\x3f\xf4\x10"
+ "\x12\xf0\x1b\x2b\xef\xcb\x86\x74",
+ .klen = 64,
+ .iv = "\x6d\x3e\x62\x94\x75\x43\x74\xea"
+ "\xed\x4a\xa6\xde\xba\x55\x83\x38",
+ .ptext = "\x6a\xe6\xa3\x66\x7e\x78\xef\x42"
+ "\x8b\x28\x08\x24\xda\xd4\xd6\x42"
+ "\x3d\xb6\x48\x7e\x51\xa6\x92\x65"
+ "\x98\x86\x26\x98\x37\x42\xa5",
+ .ctext = "\x64\xc6\xfc\x60\x21\x87\x7a\xf5"
+ "\xc3\x1d\xba\x41\x3c\x9c\x8c\xe8"
+ "\x2d\x93\xf0\x02\x95\x6d\xfe\x8d"
+ "\x68\x17\x05\x75\xc0\xd3\xa8",
+ .len = 31,
}
};
--
2.17.1
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates
2019-09-03 16:43 [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates Ard Biesheuvel
` (16 preceding siblings ...)
2019-09-03 16:43 ` [PATCH v2 17/17] crypto: testmgr - Add additional AES-XTS vectors for covering CTS Ard Biesheuvel
@ 2019-09-09 7:52 ` Herbert Xu
17 siblings, 0 replies; 19+ messages in thread
From: Herbert Xu @ 2019-09-09 7:52 UTC (permalink / raw)
To: Ard Biesheuvel; +Cc: linux-crypto, ebiggers
On Tue, Sep 03, 2019 at 09:43:22AM -0700, Ard Biesheuvel wrote:
> This is a collection of improvements for the ARM and arm64 implementations
> of the AES based skciphers.
>
> NOTES:
> - the last two patches add XTS ciphertext stealing test vectors and should
> NOT be merged until all AES-XTS implementations have been confirmed to work
> - tested for correctness [on both QEMU and actual hardware (via kernelci)] but
> not for performance regressions
>
> The most important part of this series is the implementation of ciphertext
> stealing support for the XTS skciphers. The CE and NEON bit slicing based
> code for both ARM and arm64 is updated to handle inputs of any length >= the
> XTS block size of 16 bytes.
>
> It also updates the arm64 CTS/CBC implementation not to use a request ctx
> structure, and ports the resulting implementation to ARM as well.
>
> The remaining patches are cleanups and minor improvements in the 'ongoing
> maintenance' category. None of these are -stable candidates AFAICT.
>
> Changes since v1:
> - simply skcipher_walk_abort() - pass -ECANCELED instead of walk->nbytes into
> skcipher_walk_done() so that the latter does not require any changes (#8)
> - rebased onto cryptodev/master
>
>
> Ard Biesheuvel (16):
> crypto: arm/aes - fix round key prototypes
> crypto: arm/aes-ce - yield the SIMD unit between scatterwalk steps
> crypto: arm/aes-ce - switch to 4x interleave
> crypto: arm/aes-ce - replace tweak mask literal with composition
> crypto: arm/aes-neonbs - replace tweak mask literal with composition
> crypto: arm64/aes-neonbs - replace tweak mask literal with composition
> crypto: arm64/aes-neon - limit exposed routines if faster driver is
> enabled
> crypto: skcipher - add the ability to abort a skcipher walk
> crypto: arm64/aes-cts-cbc-ce - performance tweak
> crypto: arm64/aes-cts-cbc - move request context data to the stack
> crypto: arm64/aes - implement support for XTS ciphertext stealing
> crypto: arm64/aes-neonbs - implement ciphertext stealing for XTS
> crypto: arm/aes-ce - implement ciphertext stealing for XTS
> crypto: arm/aes-neonbs - implement ciphertext stealing for XTS
> crypto: arm/aes-ce - implement ciphertext stealing for CBC
> crypto: testmgr - add test vectors for XTS ciphertext stealing
>
> Pascal van Leeuwen (1):
> crypto: testmgr - Add additional AES-XTS vectors for covering CTS
>
> arch/arm/crypto/aes-ce-core.S | 462 ++++++++++++++------
> arch/arm/crypto/aes-ce-glue.c | 377 +++++++++++++---
> arch/arm/crypto/aes-neonbs-core.S | 24 +-
> arch/arm/crypto/aes-neonbs-glue.c | 91 +++-
> arch/arm64/crypto/aes-ce.S | 3 +
> arch/arm64/crypto/aes-glue.c | 299 ++++++++-----
> arch/arm64/crypto/aes-modes.S | 107 ++++-
> arch/arm64/crypto/aes-neon.S | 5 +
> arch/arm64/crypto/aes-neonbs-core.S | 9 +-
> arch/arm64/crypto/aes-neonbs-glue.c | 111 ++++-
> crypto/testmgr.h | 368 ++++++++++++++++
> include/crypto/internal/skcipher.h | 5 +
> 12 files changed, 1495 insertions(+), 366 deletions(-)
Patches 1-15 applied. Thanks.
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply [flat|nested] 19+ messages in thread