From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
To: linux-crypto@vger.kernel.org
Cc: herbert@gondor.apana.org.au, ebiggers@kernel.org,
Ard Biesheuvel <ard.biesheuvel@linaro.org>
Subject: [PATCH v2 13/17] crypto: arm/aes-ce - implement ciphertext stealing for XTS
Date: Tue, 3 Sep 2019 09:43:35 -0700 [thread overview]
Message-ID: <20190903164339.27984-14-ard.biesheuvel@linaro.org> (raw)
In-Reply-To: <20190903164339.27984-1-ard.biesheuvel@linaro.org>
Update the AES-XTS implementation based on AES instructions so that it
can deal with inputs whose size is not a multiple of the cipher block
size. This is part of the original XTS specification, but was never
implemented before in the Linux kernel.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm/crypto/aes-ce-core.S | 103 ++++++++++++++--
arch/arm/crypto/aes-ce-glue.c | 128 ++++++++++++++++++--
2 files changed, 208 insertions(+), 23 deletions(-)
diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S
index bb6ec1844370..763e51604ab6 100644
--- a/arch/arm/crypto/aes-ce-core.S
+++ b/arch/arm/crypto/aes-ce-core.S
@@ -369,9 +369,9 @@ ENDPROC(ce_aes_ctr_encrypt)
/*
* aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
- * int blocks, u8 iv[], u32 const rk2[], int first)
+ * int bytes, u8 iv[], u32 const rk2[], int first)
* aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
- * int blocks, u8 iv[], u32 const rk2[], int first)
+ * int bytes, u8 iv[], u32 const rk2[], int first)
*/
.macro next_tweak, out, in, const, tmp
@@ -414,7 +414,7 @@ ENTRY(ce_aes_xts_encrypt)
.Lxtsencloop4x:
next_tweak q4, q4, q15, q10
.Lxtsenc4x:
- subs r4, r4, #4
+ subs r4, r4, #64
bmi .Lxtsenc1x
vld1.8 {q0-q1}, [r1]! @ get 4 pt blocks
vld1.8 {q2-q3}, [r1]!
@@ -434,24 +434,58 @@ ENTRY(ce_aes_xts_encrypt)
vst1.8 {q2-q3}, [r0]!
vmov q4, q7
teq r4, #0
- beq .Lxtsencout
+ beq .Lxtsencret
b .Lxtsencloop4x
.Lxtsenc1x:
- adds r4, r4, #4
+ adds r4, r4, #64
beq .Lxtsencout
+ subs r4, r4, #16
+ bmi .LxtsencctsNx
.Lxtsencloop:
vld1.8 {q0}, [r1]!
+.Lxtsencctsout:
veor q0, q0, q4
bl aes_encrypt
veor q0, q0, q4
- vst1.8 {q0}, [r0]!
- subs r4, r4, #1
+ teq r4, #0
beq .Lxtsencout
+ subs r4, r4, #16
next_tweak q4, q4, q15, q6
+ bmi .Lxtsenccts
+ vst1.8 {q0}, [r0]!
b .Lxtsencloop
.Lxtsencout:
+ vst1.8 {q0}, [r0]
+.Lxtsencret:
vst1.8 {q4}, [r5]
pop {r4-r6, pc}
+
+.LxtsencctsNx:
+ vmov q0, q3
+ sub r0, r0, #16
+.Lxtsenccts:
+ movw ip, :lower16:.Lcts_permute_table
+ movt ip, :upper16:.Lcts_permute_table
+
+ add r1, r1, r4 @ rewind input pointer
+ add r4, r4, #16 @ # bytes in final block
+ add lr, ip, #32
+ add ip, ip, r4
+ sub lr, lr, r4
+ add r4, r0, r4 @ output address of final block
+
+ vld1.8 {q1}, [r1] @ load final partial block
+ vld1.8 {q2}, [ip]
+ vld1.8 {q3}, [lr]
+
+ vtbl.8 d4, {d0-d1}, d4
+ vtbl.8 d5, {d0-d1}, d5
+ vtbx.8 d0, {d2-d3}, d6
+ vtbx.8 d1, {d2-d3}, d7
+
+ vst1.8 {q2}, [r4] @ overlapping stores
+ mov r4, #0
+ b .Lxtsencctsout
ENDPROC(ce_aes_xts_encrypt)
@@ -462,13 +496,17 @@ ENTRY(ce_aes_xts_decrypt)
prepare_key r2, r3
vmov q4, q0
+ /* subtract 16 bytes if we are doing CTS */
+ tst r4, #0xf
+ subne r4, r4, #0x10
+
teq r6, #0 @ start of a block?
bne .Lxtsdec4x
.Lxtsdecloop4x:
next_tweak q4, q4, q15, q10
.Lxtsdec4x:
- subs r4, r4, #4
+ subs r4, r4, #64
bmi .Lxtsdec1x
vld1.8 {q0-q1}, [r1]! @ get 4 ct blocks
vld1.8 {q2-q3}, [r1]!
@@ -491,22 +529,55 @@ ENTRY(ce_aes_xts_decrypt)
beq .Lxtsdecout
b .Lxtsdecloop4x
.Lxtsdec1x:
- adds r4, r4, #4
+ adds r4, r4, #64
beq .Lxtsdecout
+ subs r4, r4, #16
.Lxtsdecloop:
vld1.8 {q0}, [r1]!
+ bmi .Lxtsdeccts
+.Lxtsdecctsout:
veor q0, q0, q4
- add ip, r2, #32 @ 3rd round key
bl aes_decrypt
veor q0, q0, q4
vst1.8 {q0}, [r0]!
- subs r4, r4, #1
+ teq r4, #0
beq .Lxtsdecout
+ subs r4, r4, #16
next_tweak q4, q4, q15, q6
b .Lxtsdecloop
.Lxtsdecout:
vst1.8 {q4}, [r5]
pop {r4-r6, pc}
+
+.Lxtsdeccts:
+ movw ip, :lower16:.Lcts_permute_table
+ movt ip, :upper16:.Lcts_permute_table
+
+ add r1, r1, r4 @ rewind input pointer
+ add r4, r4, #16 @ # bytes in final block
+ add lr, ip, #32
+ add ip, ip, r4
+ sub lr, lr, r4
+ add r4, r0, r4 @ output address of final block
+
+ next_tweak q5, q4, q15, q6
+
+ vld1.8 {q1}, [r1] @ load final partial block
+ vld1.8 {q2}, [ip]
+ vld1.8 {q3}, [lr]
+
+ veor q0, q0, q5
+ bl aes_decrypt
+ veor q0, q0, q5
+
+ vtbl.8 d4, {d0-d1}, d4
+ vtbl.8 d5, {d0-d1}, d5
+ vtbx.8 d0, {d2-d3}, d6
+ vtbx.8 d1, {d2-d3}, d7
+
+ vst1.8 {q2}, [r4] @ overlapping stores
+ mov r4, #0
+ b .Lxtsdecctsout
ENDPROC(ce_aes_xts_decrypt)
/*
@@ -532,3 +603,13 @@ ENTRY(ce_aes_invert)
vst1.32 {q0}, [r0]
bx lr
ENDPROC(ce_aes_invert)
+
+ .section ".rodata", "a"
+ .align 6
+.Lcts_permute_table:
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
+ .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index 486e862ae34a..c215792a2494 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -13,6 +13,7 @@
#include <crypto/ctr.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
#include <linux/cpufeature.h>
#include <linux/module.h>
#include <crypto/xts.h>
@@ -39,10 +40,10 @@ asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
int rounds, int blocks, u8 ctr[]);
asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
- int rounds, int blocks, u8 iv[],
+ int rounds, int bytes, u8 iv[],
u32 const rk2[], int first);
asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[],
- int rounds, int blocks, u8 iv[],
+ int rounds, int bytes, u8 iv[],
u32 const rk2[], int first);
struct aes_block {
@@ -317,20 +318,71 @@ static int xts_encrypt(struct skcipher_request *req)
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int err, first, rounds = num_rounds(&ctx->key1);
+ int tail = req->cryptlen % AES_BLOCK_SIZE;
+ struct scatterlist sg_src[2], sg_dst[2];
+ struct skcipher_request subreq;
+ struct scatterlist *src, *dst;
struct skcipher_walk walk;
- unsigned int blocks;
+
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
err = skcipher_walk_virt(&walk, req, false);
- for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
+ int xts_blocks = DIV_ROUND_UP(req->cryptlen,
+ AES_BLOCK_SIZE) - 2;
+
+ skcipher_walk_abort(&walk);
+
+ skcipher_request_set_tfm(&subreq, tfm);
+ skcipher_request_set_callback(&subreq,
+ skcipher_request_flags(req),
+ NULL, NULL);
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ xts_blocks * AES_BLOCK_SIZE,
+ req->iv);
+ req = &subreq;
+ err = skcipher_walk_virt(&walk, req, false);
+ } else {
+ tail = 0;
+ }
+
+ for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) {
+ int nbytes = walk.nbytes;
+
+ if (walk.nbytes < walk.total)
+ nbytes &= ~(AES_BLOCK_SIZE - 1);
+
kernel_neon_begin();
ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_enc, rounds, blocks, walk.iv,
+ ctx->key1.key_enc, rounds, nbytes, walk.iv,
ctx->key2.key_enc, first);
kernel_neon_end();
- err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
- return err;
+
+ if (err || likely(!tail))
+ return err;
+
+ dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
+ if (req->dst != req->src)
+ dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
+
+ skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, req, false);
+ if (err)
+ return err;
+
+ kernel_neon_begin();
+ ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key1.key_enc, rounds, walk.nbytes, walk.iv,
+ ctx->key2.key_enc, first);
+ kernel_neon_end();
+
+ return skcipher_walk_done(&walk, 0);
}
static int xts_decrypt(struct skcipher_request *req)
@@ -338,20 +390,71 @@ static int xts_decrypt(struct skcipher_request *req)
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int err, first, rounds = num_rounds(&ctx->key1);
+ int tail = req->cryptlen % AES_BLOCK_SIZE;
+ struct scatterlist sg_src[2], sg_dst[2];
+ struct skcipher_request subreq;
+ struct scatterlist *src, *dst;
struct skcipher_walk walk;
- unsigned int blocks;
+
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
err = skcipher_walk_virt(&walk, req, false);
- for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
+ int xts_blocks = DIV_ROUND_UP(req->cryptlen,
+ AES_BLOCK_SIZE) - 2;
+
+ skcipher_walk_abort(&walk);
+
+ skcipher_request_set_tfm(&subreq, tfm);
+ skcipher_request_set_callback(&subreq,
+ skcipher_request_flags(req),
+ NULL, NULL);
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ xts_blocks * AES_BLOCK_SIZE,
+ req->iv);
+ req = &subreq;
+ err = skcipher_walk_virt(&walk, req, false);
+ } else {
+ tail = 0;
+ }
+
+ for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) {
+ int nbytes = walk.nbytes;
+
+ if (walk.nbytes < walk.total)
+ nbytes &= ~(AES_BLOCK_SIZE - 1);
+
kernel_neon_begin();
ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key1.key_dec, rounds, blocks, walk.iv,
+ ctx->key1.key_dec, rounds, nbytes, walk.iv,
ctx->key2.key_enc, first);
kernel_neon_end();
- err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
- return err;
+
+ if (err || likely(!tail))
+ return err;
+
+ dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
+ if (req->dst != req->src)
+ dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
+
+ skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, req, false);
+ if (err)
+ return err;
+
+ kernel_neon_begin();
+ ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key1.key_dec, rounds, walk.nbytes, walk.iv,
+ ctx->key2.key_enc, first);
+ kernel_neon_end();
+
+ return skcipher_walk_done(&walk, 0);
}
static struct skcipher_alg aes_algs[] = { {
@@ -426,6 +529,7 @@ static struct skcipher_alg aes_algs[] = { {
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
+ .walksize = 2 * AES_BLOCK_SIZE,
.setkey = xts_set_key,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
--
2.17.1
next prev parent reply other threads:[~2019-09-03 16:44 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-09-03 16:43 [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 01/17] crypto: arm/aes - fix round key prototypes Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 02/17] crypto: arm/aes-ce - yield the SIMD unit between scatterwalk steps Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 03/17] crypto: arm/aes-ce - switch to 4x interleave Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 04/17] crypto: arm/aes-ce - replace tweak mask literal with composition Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 05/17] crypto: arm/aes-neonbs " Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 06/17] crypto: arm64/aes-neonbs " Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 07/17] crypto: arm64/aes-neon - limit exposed routines if faster driver is enabled Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 08/17] crypto: skcipher - add the ability to abort a skcipher walk Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 09/17] crypto: arm64/aes-cts-cbc-ce - performance tweak Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 10/17] crypto: arm64/aes-cts-cbc - move request context data to the stack Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 11/17] crypto: arm64/aes - implement support for XTS ciphertext stealing Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 12/17] crypto: arm64/aes-neonbs - implement ciphertext stealing for XTS Ard Biesheuvel
2019-09-03 16:43 ` Ard Biesheuvel [this message]
2019-09-03 16:43 ` [PATCH v2 14/17] crypto: arm/aes-neonbs " Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 15/17] crypto: arm/aes-ce - implement ciphertext stealing for CBC Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 16/17] crypto: testmgr - add test vectors for XTS ciphertext stealing Ard Biesheuvel
2019-09-03 16:43 ` [PATCH v2 17/17] crypto: testmgr - Add additional AES-XTS vectors for covering CTS Ard Biesheuvel
2019-09-09 7:52 ` [PATCH v2 00/17] crypto: arm/aes - XTS ciphertext stealing and other updates Herbert Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190903164339.27984-14-ard.biesheuvel@linaro.org \
--to=ard.biesheuvel@linaro.org \
--cc=ebiggers@kernel.org \
--cc=herbert@gondor.apana.org.au \
--cc=linux-crypto@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).