* [PATCH 1/2] crypto: aesni - replace CTR function pointer with static call
2021-01-16 16:48 [PATCH 0/2] crypto: aesni - fix more FPU handling and indirect call issues Ard Biesheuvel
@ 2021-01-16 16:48 ` Ard Biesheuvel
2021-01-16 16:48 ` [PATCH 2/2] crypto: aesni - release FPU during skcipher walk API calls Ard Biesheuvel
2021-01-22 6:21 ` [PATCH 0/2] crypto: aesni - fix more FPU handling and indirect call issues Herbert Xu
2 siblings, 0 replies; 4+ messages in thread
From: Ard Biesheuvel @ 2021-01-16 16:48 UTC (permalink / raw)
To: linux-crypto; +Cc: Ard Biesheuvel, Megha Dey, Eric Biggers, Herbert Xu
Indirect calls are very expensive on x86, so use a static call to set
the system-wide AES-NI/CTR asm helper.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
arch/x86/crypto/aesni-intel_glue.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index a548fdbc3073..d96685457196 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -34,6 +34,7 @@
#include <linux/jump_label.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
+#include <linux/static_call.h>
#define AESNI_ALIGN 16
@@ -107,10 +108,9 @@ asmlinkage void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *out,
#ifdef CONFIG_X86_64
-static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out,
- const u8 *in, unsigned int len, u8 *iv);
asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
+DEFINE_STATIC_CALL(aesni_ctr_enc_tfm, aesni_ctr_enc);
/* Scatter / Gather routines, with args similar to above */
asmlinkage void aesni_gcm_init(void *ctx,
@@ -520,8 +520,10 @@ static int ctr_crypt(struct skcipher_request *req)
kernel_fpu_begin();
while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
- aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr,
- nbytes & AES_BLOCK_MASK, walk.iv);
+ static_call(aesni_ctr_enc_tfm)(ctx, walk.dst.virt.addr,
+ walk.src.virt.addr,
+ nbytes & AES_BLOCK_MASK,
+ walk.iv);
nbytes &= AES_BLOCK_SIZE - 1;
err = skcipher_walk_done(&walk, nbytes);
}
@@ -1160,10 +1162,9 @@ static int __init aesni_init(void)
} else {
pr_info("SSE version of gcm_enc/dec engaged.\n");
}
- aesni_ctr_enc_tfm = aesni_ctr_enc;
if (boot_cpu_has(X86_FEATURE_AVX)) {
/* optimize performance of ctr mode encryption transform */
- aesni_ctr_enc_tfm = aesni_ctr_enc_avx_tfm;
+ static_call_update(aesni_ctr_enc_tfm, aesni_ctr_enc_avx_tfm);
pr_info("AES CTR mode by8 optimization enabled\n");
}
#endif
--
2.17.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] crypto: aesni - release FPU during skcipher walk API calls
2021-01-16 16:48 [PATCH 0/2] crypto: aesni - fix more FPU handling and indirect call issues Ard Biesheuvel
2021-01-16 16:48 ` [PATCH 1/2] crypto: aesni - replace CTR function pointer with static call Ard Biesheuvel
@ 2021-01-16 16:48 ` Ard Biesheuvel
2021-01-22 6:21 ` [PATCH 0/2] crypto: aesni - fix more FPU handling and indirect call issues Herbert Xu
2 siblings, 0 replies; 4+ messages in thread
From: Ard Biesheuvel @ 2021-01-16 16:48 UTC (permalink / raw)
To: linux-crypto; +Cc: Ard Biesheuvel, Megha Dey, Eric Biggers, Herbert Xu
Taking ownership of the FPU in kernel mode disables preemption, and
this may result in excessive scheduling blackouts if the size of the
data being processed on the FPU is unbounded.
Given that taking and releasing the FPU is cheap these days on x86, we
can limit the impact of this issue easily for skcipher implementations,
by moving the FPU begin/end calls inside the skcipher walk processing
loop. Considering that skcipher walks operate on at most one page at a
time, doing so fully mitigates this issue.
This also permits the skcipher walk logic to use non-atomic kmalloc()
calls etc so we can change the 'atomic' bool argument in the calls to
skcipher_walk_virt() to false as well.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
arch/x86/crypto/aesni-intel_glue.c | 73 +++++++++-----------
1 file changed, 32 insertions(+), 41 deletions(-)
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index d96685457196..2144e54a6c89 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -283,16 +283,16 @@ static int ecb_encrypt(struct skcipher_request *req)
unsigned int nbytes;
int err;
- err = skcipher_walk_virt(&walk, req, true);
+ err = skcipher_walk_virt(&walk, req, false);
- kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
+ kernel_fpu_begin();
aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = skcipher_walk_done(&walk, nbytes);
}
- kernel_fpu_end();
return err;
}
@@ -305,16 +305,16 @@ static int ecb_decrypt(struct skcipher_request *req)
unsigned int nbytes;
int err;
- err = skcipher_walk_virt(&walk, req, true);
+ err = skcipher_walk_virt(&walk, req, false);
- kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
+ kernel_fpu_begin();
aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = skcipher_walk_done(&walk, nbytes);
}
- kernel_fpu_end();
return err;
}
@@ -327,16 +327,16 @@ static int cbc_encrypt(struct skcipher_request *req)
unsigned int nbytes;
int err;
- err = skcipher_walk_virt(&walk, req, true);
+ err = skcipher_walk_virt(&walk, req, false);
- kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
+ kernel_fpu_begin();
aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK, walk.iv);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = skcipher_walk_done(&walk, nbytes);
}
- kernel_fpu_end();
return err;
}
@@ -349,16 +349,16 @@ static int cbc_decrypt(struct skcipher_request *req)
unsigned int nbytes;
int err;
- err = skcipher_walk_virt(&walk, req, true);
+ err = skcipher_walk_virt(&walk, req, false);
- kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
+ kernel_fpu_begin();
aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK, walk.iv);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = skcipher_walk_done(&walk, nbytes);
}
- kernel_fpu_end();
return err;
}
@@ -476,21 +476,6 @@ static int cts_cbc_decrypt(struct skcipher_request *req)
}
#ifdef CONFIG_X86_64
-static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
- struct skcipher_walk *walk)
-{
- u8 *ctrblk = walk->iv;
- u8 keystream[AES_BLOCK_SIZE];
- u8 *src = walk->src.virt.addr;
- u8 *dst = walk->dst.virt.addr;
- unsigned int nbytes = walk->nbytes;
-
- aesni_enc(ctx, keystream, ctrblk);
- crypto_xor_cpy(dst, keystream, src, nbytes);
-
- crypto_inc(ctrblk, AES_BLOCK_SIZE);
-}
-
static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv)
{
@@ -512,27 +497,33 @@ static int ctr_crypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
+ u8 keystream[AES_BLOCK_SIZE];
struct skcipher_walk walk;
unsigned int nbytes;
int err;
- err = skcipher_walk_virt(&walk, req, true);
+ err = skcipher_walk_virt(&walk, req, false);
- kernel_fpu_begin();
- while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
- static_call(aesni_ctr_enc_tfm)(ctx, walk.dst.virt.addr,
- walk.src.virt.addr,
- nbytes & AES_BLOCK_MASK,
- walk.iv);
- nbytes &= AES_BLOCK_SIZE - 1;
+ while ((nbytes = walk.nbytes) > 0) {
+ kernel_fpu_begin();
+ if (nbytes & AES_BLOCK_MASK)
+ static_call(aesni_ctr_enc_tfm)(ctx, walk.dst.virt.addr,
+ walk.src.virt.addr,
+ nbytes & AES_BLOCK_MASK,
+ walk.iv);
+ nbytes &= ~AES_BLOCK_MASK;
+
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ aesni_enc(ctx, keystream, walk.iv);
+ crypto_xor_cpy(walk.dst.virt.addr + walk.nbytes - nbytes,
+ walk.src.virt.addr + walk.nbytes - nbytes,
+ keystream, nbytes);
+ crypto_inc(walk.iv, AES_BLOCK_SIZE);
+ nbytes = 0;
+ }
+ kernel_fpu_end();
err = skcipher_walk_done(&walk, nbytes);
}
- if (walk.nbytes) {
- ctr_crypt_final(ctx, &walk);
- err = skcipher_walk_done(&walk, 0);
- }
- kernel_fpu_end();
-
return err;
}
--
2.17.1
^ permalink raw reply related [flat|nested] 4+ messages in thread