linux-crypto.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/5] crypto: gcm-aes-ni cleanups
@ 2021-01-04 15:55 Ard Biesheuvel
  2021-01-04 15:55 ` [PATCH v2 1/5] crypto: x86/gcm-aes-ni - prevent misaligned buffers on the stack Ard Biesheuvel
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Ard Biesheuvel @ 2021-01-04 15:55 UTC (permalink / raw)
  To: linux-crypto; +Cc: Ard Biesheuvel, Megha Dey, Eric Biggers, Herbert Xu

Clean up some issues and peculiarities in the gcm(aes-ni) driver.

Changes since v1:
- fix sleep while atomic issue reported by Eric
- add patch to get rid of indirect calls, to avoid taking the retpoline
  performance hit

Cc: Megha Dey <megha.dey@intel.com>
Cc: Eric Biggers <ebiggers@google.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>

Ard Biesheuvel (5):
  crypto: x86/gcm-aes-ni - prevent misaligned buffers on the stack
  crypto: x86/gcm-aes-ni - drop unused asm prototypes
  crypto: x86/gcm-aes-ni - clean up mapping of associated data
  crypto: x86/gcm-aes-ni - refactor scatterlist processing
  crypto: x86/gcm-aes-ni - replace function pointers with static
    branches

 arch/x86/crypto/aesni-intel_glue.c | 321 ++++++++------------
 1 file changed, 121 insertions(+), 200 deletions(-)


base-commit: 858e88e2e54cd50cd43f3a8b490b64c22ae8267b
-- 
2.17.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v2 1/5] crypto: x86/gcm-aes-ni - prevent misaligned buffers on the stack
  2021-01-04 15:55 [PATCH v2 0/5] crypto: gcm-aes-ni cleanups Ard Biesheuvel
@ 2021-01-04 15:55 ` Ard Biesheuvel
  2021-01-04 15:55 ` [PATCH v2 2/5] crypto: x86/gcm-aes-ni - drop unused asm prototypes Ard Biesheuvel
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Ard Biesheuvel @ 2021-01-04 15:55 UTC (permalink / raw)
  To: linux-crypto; +Cc: Ard Biesheuvel, Megha Dey, Eric Biggers, Herbert Xu, stable

The GCM mode driver uses 16 byte aligned buffers on the stack to pass
the IV to the asm helpers, but unfortunately, the x86 port does not
guarantee that the stack pointer is 16 byte aligned upon entry in the
first place. Since the compiler is not aware of this, it will not emit
the additional stack realignment sequence that is needed, and so the
alignment is not guaranteed to be more than 8 bytes.

So instead, allocate some padding on the stack, and realign the IV
pointer by hand.

Cc: <stable@vger.kernel.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/x86/crypto/aesni-intel_glue.c | 28 +++++++++++---------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 2116bc2b9507..880f9f8b5153 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -710,7 +710,8 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
 	const struct aesni_gcm_tfm_s *gcm_tfm = aesni_gcm_tfm;
-	struct gcm_context_data data AESNI_ALIGN_ATTR;
+	u8 databuf[sizeof(struct gcm_context_data) + (AESNI_ALIGN - 8)] __aligned(8);
+	struct gcm_context_data *data = PTR_ALIGN((void *)databuf, AESNI_ALIGN);
 	struct scatter_walk dst_sg_walk = {};
 	unsigned long left = req->cryptlen;
 	unsigned long len, srclen, dstlen;
@@ -759,8 +760,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
 	}
 
 	kernel_fpu_begin();
-	gcm_tfm->init(aes_ctx, &data, iv,
-		hash_subkey, assoc, assoclen);
+	gcm_tfm->init(aes_ctx, data, iv, hash_subkey, assoc, assoclen);
 	if (req->src != req->dst) {
 		while (left) {
 			src = scatterwalk_map(&src_sg_walk);
@@ -770,10 +770,10 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
 			len = min(srclen, dstlen);
 			if (len) {
 				if (enc)
-					gcm_tfm->enc_update(aes_ctx, &data,
+					gcm_tfm->enc_update(aes_ctx, data,
 							     dst, src, len);
 				else
-					gcm_tfm->dec_update(aes_ctx, &data,
+					gcm_tfm->dec_update(aes_ctx, data,
 							     dst, src, len);
 			}
 			left -= len;
@@ -791,10 +791,10 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
 			len = scatterwalk_clamp(&src_sg_walk, left);
 			if (len) {
 				if (enc)
-					gcm_tfm->enc_update(aes_ctx, &data,
+					gcm_tfm->enc_update(aes_ctx, data,
 							     src, src, len);
 				else
-					gcm_tfm->dec_update(aes_ctx, &data,
+					gcm_tfm->dec_update(aes_ctx, data,
 							     src, src, len);
 			}
 			left -= len;
@@ -803,7 +803,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
 			scatterwalk_done(&src_sg_walk, 1, left);
 		}
 	}
-	gcm_tfm->finalize(aes_ctx, &data, authTag, auth_tag_len);
+	gcm_tfm->finalize(aes_ctx, data, authTag, auth_tag_len);
 	kernel_fpu_end();
 
 	if (!assocmem)
@@ -852,7 +852,8 @@ static int helper_rfc4106_encrypt(struct aead_request *req)
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
 	void *aes_ctx = &(ctx->aes_key_expanded);
-	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
+	u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8);
+	u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN);
 	unsigned int i;
 	__be32 counter = cpu_to_be32(1);
 
@@ -879,7 +880,8 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
 	void *aes_ctx = &(ctx->aes_key_expanded);
-	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
+	u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8);
+	u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN);
 	unsigned int i;
 
 	if (unlikely(req->assoclen != 16 && req->assoclen != 20))
@@ -1149,7 +1151,8 @@ static int generic_gcmaes_encrypt(struct aead_request *req)
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm);
 	void *aes_ctx = &(ctx->aes_key_expanded);
-	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
+	u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8);
+	u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN);
 	__be32 counter = cpu_to_be32(1);
 
 	memcpy(iv, req->iv, 12);
@@ -1165,7 +1168,8 @@ static int generic_gcmaes_decrypt(struct aead_request *req)
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm);
 	void *aes_ctx = &(ctx->aes_key_expanded);
-	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
+	u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8);
+	u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN);
 
 	memcpy(iv, req->iv, 12);
 	*((__be32 *)(iv+12)) = counter;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 2/5] crypto: x86/gcm-aes-ni - drop unused asm prototypes
  2021-01-04 15:55 [PATCH v2 0/5] crypto: gcm-aes-ni cleanups Ard Biesheuvel
  2021-01-04 15:55 ` [PATCH v2 1/5] crypto: x86/gcm-aes-ni - prevent misaligned buffers on the stack Ard Biesheuvel
@ 2021-01-04 15:55 ` Ard Biesheuvel
  2021-01-04 15:55 ` [PATCH v2 3/5] crypto: x86/gcm-aes-ni - clean up mapping of associated data Ard Biesheuvel
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Ard Biesheuvel @ 2021-01-04 15:55 UTC (permalink / raw)
  To: linux-crypto; +Cc: Ard Biesheuvel, Megha Dey, Eric Biggers, Herbert Xu

Drop some prototypes that are declared but never called.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/x86/crypto/aesni-intel_glue.c | 67 --------------------
 1 file changed, 67 deletions(-)

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 880f9f8b5153..0f124d72e6b4 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -111,49 +111,6 @@ static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out,
 asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
 			      const u8 *in, unsigned int len, u8 *iv);
 
-/* asmlinkage void aesni_gcm_enc()
- * void *ctx,  AES Key schedule. Starts on a 16 byte boundary.
- * struct gcm_context_data.  May be uninitialized.
- * u8 *out, Ciphertext output. Encrypt in-place is allowed.
- * const u8 *in, Plaintext input
- * unsigned long plaintext_len, Length of data in bytes for encryption.
- * u8 *iv, Pre-counter block j0: 12 byte IV concatenated with 0x00000001.
- *         16-byte aligned pointer.
- * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
- * const u8 *aad, Additional Authentication Data (AAD)
- * unsigned long aad_len, Length of AAD in bytes.
- * u8 *auth_tag, Authenticated Tag output.
- * unsigned long auth_tag_len), Authenticated Tag Length in bytes.
- *          Valid values are 16 (most likely), 12 or 8.
- */
-asmlinkage void aesni_gcm_enc(void *ctx,
-			struct gcm_context_data *gdata, u8 *out,
-			const u8 *in, unsigned long plaintext_len, u8 *iv,
-			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
-			u8 *auth_tag, unsigned long auth_tag_len);
-
-/* asmlinkage void aesni_gcm_dec()
- * void *ctx, AES Key schedule. Starts on a 16 byte boundary.
- * struct gcm_context_data.  May be uninitialized.
- * u8 *out, Plaintext output. Decrypt in-place is allowed.
- * const u8 *in, Ciphertext input
- * unsigned long ciphertext_len, Length of data in bytes for decryption.
- * u8 *iv, Pre-counter block j0: 12 byte IV concatenated with 0x00000001.
- *         16-byte aligned pointer.
- * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
- * const u8 *aad, Additional Authentication Data (AAD)
- * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this is going
- * to be 8 or 12 bytes
- * u8 *auth_tag, Authenticated Tag output.
- * unsigned long auth_tag_len) Authenticated Tag Length in bytes.
- * Valid values are 16 (most likely), 12 or 8.
- */
-asmlinkage void aesni_gcm_dec(void *ctx,
-			struct gcm_context_data *gdata, u8 *out,
-			const u8 *in, unsigned long ciphertext_len, u8 *iv,
-			u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
-			u8 *auth_tag, unsigned long auth_tag_len);
-
 /* Scatter / Gather routines, with args similar to above */
 asmlinkage void aesni_gcm_init(void *ctx,
 			       struct gcm_context_data *gdata,
@@ -218,18 +175,6 @@ asmlinkage void aesni_gcm_finalize_avx_gen2(void *ctx,
 				   struct gcm_context_data *gdata,
 				   u8 *auth_tag, unsigned long auth_tag_len);
 
-asmlinkage void aesni_gcm_enc_avx_gen2(void *ctx,
-				struct gcm_context_data *gdata, u8 *out,
-			const u8 *in, unsigned long plaintext_len, u8 *iv,
-			const u8 *aad, unsigned long aad_len,
-			u8 *auth_tag, unsigned long auth_tag_len);
-
-asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx,
-				struct gcm_context_data *gdata, u8 *out,
-			const u8 *in, unsigned long ciphertext_len, u8 *iv,
-			const u8 *aad, unsigned long aad_len,
-			u8 *auth_tag, unsigned long auth_tag_len);
-
 static const struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen2 = {
 	.init = &aesni_gcm_init_avx_gen2,
 	.enc_update = &aesni_gcm_enc_update_avx_gen2,
@@ -260,18 +205,6 @@ asmlinkage void aesni_gcm_finalize_avx_gen4(void *ctx,
 				   struct gcm_context_data *gdata,
 				   u8 *auth_tag, unsigned long auth_tag_len);
 
-asmlinkage void aesni_gcm_enc_avx_gen4(void *ctx,
-				struct gcm_context_data *gdata, u8 *out,
-			const u8 *in, unsigned long plaintext_len, u8 *iv,
-			const u8 *aad, unsigned long aad_len,
-			u8 *auth_tag, unsigned long auth_tag_len);
-
-asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx,
-				struct gcm_context_data *gdata, u8 *out,
-			const u8 *in, unsigned long ciphertext_len, u8 *iv,
-			const u8 *aad, unsigned long aad_len,
-			u8 *auth_tag, unsigned long auth_tag_len);
-
 static const struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen4 = {
 	.init = &aesni_gcm_init_avx_gen4,
 	.enc_update = &aesni_gcm_enc_update_avx_gen4,
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 3/5] crypto: x86/gcm-aes-ni - clean up mapping of associated data
  2021-01-04 15:55 [PATCH v2 0/5] crypto: gcm-aes-ni cleanups Ard Biesheuvel
  2021-01-04 15:55 ` [PATCH v2 1/5] crypto: x86/gcm-aes-ni - prevent misaligned buffers on the stack Ard Biesheuvel
  2021-01-04 15:55 ` [PATCH v2 2/5] crypto: x86/gcm-aes-ni - drop unused asm prototypes Ard Biesheuvel
@ 2021-01-04 15:55 ` Ard Biesheuvel
  2021-01-04 15:55 ` [PATCH v2 4/5] crypto: x86/gcm-aes-ni - refactor scatterlist processing Ard Biesheuvel
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Ard Biesheuvel @ 2021-01-04 15:55 UTC (permalink / raw)
  To: linux-crypto; +Cc: Ard Biesheuvel, Megha Dey, Eric Biggers, Herbert Xu

The gcm(aes-ni) driver is only built for x86_64, which does not make
use of highmem. So testing for PageHighMem is pointless and can be
omitted.

While at it, replace GFP_ATOMIC with the appropriate runtime decided
value based on the context.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/x86/crypto/aesni-intel_glue.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 0f124d72e6b4..26b012065701 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -667,14 +667,15 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
 		gcm_tfm = &aesni_gcm_tfm_sse;
 
 	/* Linearize assoc, if not already linear */
-	if (req->src->length >= assoclen && req->src->length &&
-		(!PageHighMem(sg_page(req->src)) ||
-			req->src->offset + req->src->length <= PAGE_SIZE)) {
+	if (req->src->length >= assoclen && req->src->length) {
 		scatterwalk_start(&assoc_sg_walk, req->src);
 		assoc = scatterwalk_map(&assoc_sg_walk);
 	} else {
+		gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP)
+			      ? GFP_KERNEL : GFP_ATOMIC;
+
 		/* assoc can be any length, so must be on heap */
-		assocmem = kmalloc(assoclen, GFP_ATOMIC);
+		assocmem = kmalloc(assoclen, flags);
 		if (unlikely(!assocmem))
 			return -ENOMEM;
 		assoc = assocmem;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 4/5] crypto: x86/gcm-aes-ni - refactor scatterlist processing
  2021-01-04 15:55 [PATCH v2 0/5] crypto: gcm-aes-ni cleanups Ard Biesheuvel
                   ` (2 preceding siblings ...)
  2021-01-04 15:55 ` [PATCH v2 3/5] crypto: x86/gcm-aes-ni - clean up mapping of associated data Ard Biesheuvel
@ 2021-01-04 15:55 ` Ard Biesheuvel
  2021-01-04 15:55 ` [PATCH v2 5/5] crypto: x86/gcm-aes-ni - replace function pointers with static branches Ard Biesheuvel
  2021-01-14  6:46 ` [PATCH v2 0/5] crypto: gcm-aes-ni cleanups Herbert Xu
  5 siblings, 0 replies; 7+ messages in thread
From: Ard Biesheuvel @ 2021-01-04 15:55 UTC (permalink / raw)
  To: linux-crypto; +Cc: Ard Biesheuvel, Megha Dey, Eric Biggers, Herbert Xu

Currently, the gcm(aes-ni) driver open codes the scatterlist handling
that is encapsulated by the skcipher walk API. So let's switch to that
instead.

Also, move the handling at the end of gcmaes_crypt_by_sg() that is
dependent on whether we are encrypting or decrypting into the callers,
which always do one or the other.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/x86/crypto/aesni-intel_glue.c | 139 ++++++++------------
 1 file changed, 56 insertions(+), 83 deletions(-)

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 26b012065701..d0b4fa7bd2d0 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -638,25 +638,18 @@ static int generic_gcmaes_set_authsize(struct crypto_aead *tfm,
 
 static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
 			      unsigned int assoclen, u8 *hash_subkey,
-			      u8 *iv, void *aes_ctx)
+			      u8 *iv, void *aes_ctx, u8 *auth_tag,
+			      unsigned long auth_tag_len)
 {
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
 	const struct aesni_gcm_tfm_s *gcm_tfm = aesni_gcm_tfm;
 	u8 databuf[sizeof(struct gcm_context_data) + (AESNI_ALIGN - 8)] __aligned(8);
 	struct gcm_context_data *data = PTR_ALIGN((void *)databuf, AESNI_ALIGN);
-	struct scatter_walk dst_sg_walk = {};
 	unsigned long left = req->cryptlen;
-	unsigned long len, srclen, dstlen;
 	struct scatter_walk assoc_sg_walk;
-	struct scatter_walk src_sg_walk;
-	struct scatterlist src_start[2];
-	struct scatterlist dst_start[2];
-	struct scatterlist *src_sg;
-	struct scatterlist *dst_sg;
-	u8 *src, *dst, *assoc;
+	struct skcipher_walk walk;
 	u8 *assocmem = NULL;
-	u8 authTag[16];
+	u8 *assoc;
+	int err;
 
 	if (!enc)
 		left -= auth_tag_len;
@@ -683,61 +676,8 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
 		scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0);
 	}
 
-	if (left) {
-		src_sg = scatterwalk_ffwd(src_start, req->src, req->assoclen);
-		scatterwalk_start(&src_sg_walk, src_sg);
-		if (req->src != req->dst) {
-			dst_sg = scatterwalk_ffwd(dst_start, req->dst,
-						  req->assoclen);
-			scatterwalk_start(&dst_sg_walk, dst_sg);
-		}
-	}
-
 	kernel_fpu_begin();
 	gcm_tfm->init(aes_ctx, data, iv, hash_subkey, assoc, assoclen);
-	if (req->src != req->dst) {
-		while (left) {
-			src = scatterwalk_map(&src_sg_walk);
-			dst = scatterwalk_map(&dst_sg_walk);
-			srclen = scatterwalk_clamp(&src_sg_walk, left);
-			dstlen = scatterwalk_clamp(&dst_sg_walk, left);
-			len = min(srclen, dstlen);
-			if (len) {
-				if (enc)
-					gcm_tfm->enc_update(aes_ctx, data,
-							     dst, src, len);
-				else
-					gcm_tfm->dec_update(aes_ctx, data,
-							     dst, src, len);
-			}
-			left -= len;
-
-			scatterwalk_unmap(src);
-			scatterwalk_unmap(dst);
-			scatterwalk_advance(&src_sg_walk, len);
-			scatterwalk_advance(&dst_sg_walk, len);
-			scatterwalk_done(&src_sg_walk, 0, left);
-			scatterwalk_done(&dst_sg_walk, 1, left);
-		}
-	} else {
-		while (left) {
-			dst = src = scatterwalk_map(&src_sg_walk);
-			len = scatterwalk_clamp(&src_sg_walk, left);
-			if (len) {
-				if (enc)
-					gcm_tfm->enc_update(aes_ctx, data,
-							     src, src, len);
-				else
-					gcm_tfm->dec_update(aes_ctx, data,
-							     src, src, len);
-			}
-			left -= len;
-			scatterwalk_unmap(src);
-			scatterwalk_advance(&src_sg_walk, len);
-			scatterwalk_done(&src_sg_walk, 1, left);
-		}
-	}
-	gcm_tfm->finalize(aes_ctx, data, authTag, auth_tag_len);
 	kernel_fpu_end();
 
 	if (!assocmem)
@@ -745,24 +685,25 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
 	else
 		kfree(assocmem);
 
-	if (!enc) {
-		u8 authTagMsg[16];
+	err = enc ? skcipher_walk_aead_encrypt(&walk, req, false)
+		  : skcipher_walk_aead_decrypt(&walk, req, false);
 
-		/* Copy out original authTag */
-		scatterwalk_map_and_copy(authTagMsg, req->src,
-					 req->assoclen + req->cryptlen -
-					 auth_tag_len,
-					 auth_tag_len, 0);
+	while (walk.nbytes > 0) {
+		kernel_fpu_begin();
+		(enc ? gcm_tfm->enc_update
+		     : gcm_tfm->dec_update)(aes_ctx, data, walk.dst.virt.addr,
+					    walk.src.virt.addr, walk.nbytes);
+		kernel_fpu_end();
 
-		/* Compare generated tag with passed in tag. */
-		return crypto_memneq(authTagMsg, authTag, auth_tag_len) ?
-			-EBADMSG : 0;
+		err = skcipher_walk_done(&walk, 0);
 	}
 
-	/* Copy in the authTag */
-	scatterwalk_map_and_copy(authTag, req->dst,
-				 req->assoclen + req->cryptlen,
-				 auth_tag_len, 1);
+	if (err)
+		return err;
+
+	kernel_fpu_begin();
+	gcm_tfm->finalize(aes_ctx, data, auth_tag, auth_tag_len);
+	kernel_fpu_end();
 
 	return 0;
 }
@@ -770,15 +711,47 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
 static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
 			  u8 *hash_subkey, u8 *iv, void *aes_ctx)
 {
-	return gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv,
-				aes_ctx);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
+	u8 auth_tag[16];
+	int err;
+
+	err = gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv, aes_ctx,
+				 auth_tag, auth_tag_len);
+	if (err)
+		return err;
+
+	scatterwalk_map_and_copy(auth_tag, req->dst,
+				 req->assoclen + req->cryptlen,
+				 auth_tag_len, 1);
+	return 0;
 }
 
 static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen,
 			  u8 *hash_subkey, u8 *iv, void *aes_ctx)
 {
-	return gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv,
-				aes_ctx);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
+	u8 auth_tag_msg[16];
+	u8 auth_tag[16];
+	int err;
+
+	err = gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv, aes_ctx,
+				 auth_tag, auth_tag_len);
+	if (err)
+		return err;
+
+	/* Copy out original auth_tag */
+	scatterwalk_map_and_copy(auth_tag_msg, req->src,
+				 req->assoclen + req->cryptlen - auth_tag_len,
+				 auth_tag_len, 0);
+
+	/* Compare generated tag with passed in tag. */
+	if (crypto_memneq(auth_tag_msg, auth_tag, auth_tag_len)) {
+		memzero_explicit(auth_tag, sizeof(auth_tag));
+		return -EBADMSG;
+	}
+	return 0;
 }
 
 static int helper_rfc4106_encrypt(struct aead_request *req)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 5/5] crypto: x86/gcm-aes-ni - replace function pointers with static branches
  2021-01-04 15:55 [PATCH v2 0/5] crypto: gcm-aes-ni cleanups Ard Biesheuvel
                   ` (3 preceding siblings ...)
  2021-01-04 15:55 ` [PATCH v2 4/5] crypto: x86/gcm-aes-ni - refactor scatterlist processing Ard Biesheuvel
@ 2021-01-04 15:55 ` Ard Biesheuvel
  2021-01-14  6:46 ` [PATCH v2 0/5] crypto: gcm-aes-ni cleanups Herbert Xu
  5 siblings, 0 replies; 7+ messages in thread
From: Ard Biesheuvel @ 2021-01-04 15:55 UTC (permalink / raw)
  To: linux-crypto; +Cc: Ard Biesheuvel, Megha Dey, Eric Biggers, Herbert Xu

Replace the function pointers in the GCM implementation with static branches,
which are based on code patching, which occurs only at module load time.
This avoids the severe performance penalty caused by the use of retpolines.

In order to retain the ability to switch between different versions of the
implementation based on the input size on cores that support AVX and AVX2,
use static branches instead of static calls.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/x86/crypto/aesni-intel_glue.c | 98 +++++++++++---------
 1 file changed, 54 insertions(+), 44 deletions(-)

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index d0b4fa7bd2d0..fb17d4a2a5ca 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -31,6 +31,7 @@
 #include <crypto/internal/aead.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
+#include <linux/jump_label.h>
 #include <linux/workqueue.h>
 #include <linux/spinlock.h>
 
@@ -128,24 +129,6 @@ asmlinkage void aesni_gcm_finalize(void *ctx,
 				   struct gcm_context_data *gdata,
 				   u8 *auth_tag, unsigned long auth_tag_len);
 
-static const struct aesni_gcm_tfm_s {
-	void (*init)(void *ctx, struct gcm_context_data *gdata, u8 *iv,
-		     u8 *hash_subkey, const u8 *aad, unsigned long aad_len);
-	void (*enc_update)(void *ctx, struct gcm_context_data *gdata, u8 *out,
-			   const u8 *in, unsigned long plaintext_len);
-	void (*dec_update)(void *ctx, struct gcm_context_data *gdata, u8 *out,
-			   const u8 *in, unsigned long ciphertext_len);
-	void (*finalize)(void *ctx, struct gcm_context_data *gdata,
-			 u8 *auth_tag, unsigned long auth_tag_len);
-} *aesni_gcm_tfm;
-
-static const struct aesni_gcm_tfm_s aesni_gcm_tfm_sse = {
-	.init = &aesni_gcm_init,
-	.enc_update = &aesni_gcm_enc_update,
-	.dec_update = &aesni_gcm_dec_update,
-	.finalize = &aesni_gcm_finalize,
-};
-
 asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv,
 		void *keys, u8 *out, unsigned int num_bytes);
 asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv,
@@ -175,13 +158,6 @@ asmlinkage void aesni_gcm_finalize_avx_gen2(void *ctx,
 				   struct gcm_context_data *gdata,
 				   u8 *auth_tag, unsigned long auth_tag_len);
 
-static const struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen2 = {
-	.init = &aesni_gcm_init_avx_gen2,
-	.enc_update = &aesni_gcm_enc_update_avx_gen2,
-	.dec_update = &aesni_gcm_dec_update_avx_gen2,
-	.finalize = &aesni_gcm_finalize_avx_gen2,
-};
-
 /*
  * asmlinkage void aesni_gcm_init_avx_gen4()
  * gcm_data *my_ctx_data, context data
@@ -205,12 +181,8 @@ asmlinkage void aesni_gcm_finalize_avx_gen4(void *ctx,
 				   struct gcm_context_data *gdata,
 				   u8 *auth_tag, unsigned long auth_tag_len);
 
-static const struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen4 = {
-	.init = &aesni_gcm_init_avx_gen4,
-	.enc_update = &aesni_gcm_enc_update_avx_gen4,
-	.dec_update = &aesni_gcm_dec_update_avx_gen4,
-	.finalize = &aesni_gcm_finalize_avx_gen4,
-};
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(gcm_use_avx);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(gcm_use_avx2);
 
 static inline struct
 aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
@@ -641,12 +613,12 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
 			      u8 *iv, void *aes_ctx, u8 *auth_tag,
 			      unsigned long auth_tag_len)
 {
-	const struct aesni_gcm_tfm_s *gcm_tfm = aesni_gcm_tfm;
 	u8 databuf[sizeof(struct gcm_context_data) + (AESNI_ALIGN - 8)] __aligned(8);
 	struct gcm_context_data *data = PTR_ALIGN((void *)databuf, AESNI_ALIGN);
 	unsigned long left = req->cryptlen;
 	struct scatter_walk assoc_sg_walk;
 	struct skcipher_walk walk;
+	bool do_avx, do_avx2;
 	u8 *assocmem = NULL;
 	u8 *assoc;
 	int err;
@@ -654,10 +626,8 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
 	if (!enc)
 		left -= auth_tag_len;
 
-	if (left < AVX_GEN4_OPTSIZE && gcm_tfm == &aesni_gcm_tfm_avx_gen4)
-		gcm_tfm = &aesni_gcm_tfm_avx_gen2;
-	if (left < AVX_GEN2_OPTSIZE && gcm_tfm == &aesni_gcm_tfm_avx_gen2)
-		gcm_tfm = &aesni_gcm_tfm_sse;
+	do_avx = (left >= AVX_GEN2_OPTSIZE);
+	do_avx2 = (left >= AVX_GEN4_OPTSIZE);
 
 	/* Linearize assoc, if not already linear */
 	if (req->src->length >= assoclen && req->src->length) {
@@ -677,7 +647,14 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
 	}
 
 	kernel_fpu_begin();
-	gcm_tfm->init(aes_ctx, data, iv, hash_subkey, assoc, assoclen);
+	if (static_branch_likely(&gcm_use_avx2) && do_avx2)
+		aesni_gcm_init_avx_gen4(aes_ctx, data, iv, hash_subkey, assoc,
+					assoclen);
+	else if (static_branch_likely(&gcm_use_avx) && do_avx)
+		aesni_gcm_init_avx_gen2(aes_ctx, data, iv, hash_subkey, assoc,
+					assoclen);
+	else
+		aesni_gcm_init(aes_ctx, data, iv, hash_subkey, assoc, assoclen);
 	kernel_fpu_end();
 
 	if (!assocmem)
@@ -690,9 +667,35 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
 
 	while (walk.nbytes > 0) {
 		kernel_fpu_begin();
-		(enc ? gcm_tfm->enc_update
-		     : gcm_tfm->dec_update)(aes_ctx, data, walk.dst.virt.addr,
-					    walk.src.virt.addr, walk.nbytes);
+		if (static_branch_likely(&gcm_use_avx2) && do_avx2) {
+			if (enc)
+				aesni_gcm_enc_update_avx_gen4(aes_ctx, data,
+							      walk.dst.virt.addr,
+							      walk.src.virt.addr,
+							      walk.nbytes);
+			else
+				aesni_gcm_dec_update_avx_gen4(aes_ctx, data,
+							      walk.dst.virt.addr,
+							      walk.src.virt.addr,
+							      walk.nbytes);
+		} else if (static_branch_likely(&gcm_use_avx) && do_avx) {
+			if (enc)
+				aesni_gcm_enc_update_avx_gen2(aes_ctx, data,
+							      walk.dst.virt.addr,
+							      walk.src.virt.addr,
+							      walk.nbytes);
+			else
+				aesni_gcm_dec_update_avx_gen2(aes_ctx, data,
+							      walk.dst.virt.addr,
+							      walk.src.virt.addr,
+							      walk.nbytes);
+		} else if (enc) {
+			aesni_gcm_enc_update(aes_ctx, data, walk.dst.virt.addr,
+					     walk.src.virt.addr, walk.nbytes);
+		} else {
+			aesni_gcm_dec_update(aes_ctx, data, walk.dst.virt.addr,
+					     walk.src.virt.addr, walk.nbytes);
+		}
 		kernel_fpu_end();
 
 		err = skcipher_walk_done(&walk, 0);
@@ -702,7 +705,14 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
 		return err;
 
 	kernel_fpu_begin();
-	gcm_tfm->finalize(aes_ctx, data, auth_tag, auth_tag_len);
+	if (static_branch_likely(&gcm_use_avx2) && do_avx2)
+		aesni_gcm_finalize_avx_gen4(aes_ctx, data, auth_tag,
+					    auth_tag_len);
+	else if (static_branch_likely(&gcm_use_avx) && do_avx)
+		aesni_gcm_finalize_avx_gen2(aes_ctx, data, auth_tag,
+					    auth_tag_len);
+	else
+		aesni_gcm_finalize(aes_ctx, data, auth_tag, auth_tag_len);
 	kernel_fpu_end();
 
 	return 0;
@@ -1141,14 +1151,14 @@ static int __init aesni_init(void)
 #ifdef CONFIG_X86_64
 	if (boot_cpu_has(X86_FEATURE_AVX2)) {
 		pr_info("AVX2 version of gcm_enc/dec engaged.\n");
-		aesni_gcm_tfm = &aesni_gcm_tfm_avx_gen4;
+		static_branch_enable(&gcm_use_avx);
+		static_branch_enable(&gcm_use_avx2);
 	} else
 	if (boot_cpu_has(X86_FEATURE_AVX)) {
 		pr_info("AVX version of gcm_enc/dec engaged.\n");
-		aesni_gcm_tfm = &aesni_gcm_tfm_avx_gen2;
+		static_branch_enable(&gcm_use_avx);
 	} else {
 		pr_info("SSE version of gcm_enc/dec engaged.\n");
-		aesni_gcm_tfm = &aesni_gcm_tfm_sse;
 	}
 	aesni_ctr_enc_tfm = aesni_ctr_enc;
 	if (boot_cpu_has(X86_FEATURE_AVX)) {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 0/5] crypto: gcm-aes-ni cleanups
  2021-01-04 15:55 [PATCH v2 0/5] crypto: gcm-aes-ni cleanups Ard Biesheuvel
                   ` (4 preceding siblings ...)
  2021-01-04 15:55 ` [PATCH v2 5/5] crypto: x86/gcm-aes-ni - replace function pointers with static branches Ard Biesheuvel
@ 2021-01-14  6:46 ` Herbert Xu
  5 siblings, 0 replies; 7+ messages in thread
From: Herbert Xu @ 2021-01-14  6:46 UTC (permalink / raw)
  To: Ard Biesheuvel; +Cc: linux-crypto, Megha Dey, Eric Biggers

On Mon, Jan 04, 2021 at 04:55:45PM +0100, Ard Biesheuvel wrote:
> Clean up some issues and peculiarities in the gcm(aes-ni) driver.
> 
> Changes since v1:
> - fix sleep while atomic issue reported by Eric
> - add patch to get rid of indirect calls, to avoid taking the retpoline
>   performance hit
> 
> Cc: Megha Dey <megha.dey@intel.com>
> Cc: Eric Biggers <ebiggers@google.com>
> Cc: Herbert Xu <herbert@gondor.apana.org.au>
> 
> Ard Biesheuvel (5):
>   crypto: x86/gcm-aes-ni - prevent misaligned buffers on the stack
>   crypto: x86/gcm-aes-ni - drop unused asm prototypes
>   crypto: x86/gcm-aes-ni - clean up mapping of associated data
>   crypto: x86/gcm-aes-ni - refactor scatterlist processing
>   crypto: x86/gcm-aes-ni - replace function pointers with static
>     branches
> 
>  arch/x86/crypto/aesni-intel_glue.c | 321 ++++++++------------
>  1 file changed, 121 insertions(+), 200 deletions(-)

All applied.  Thanks.
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-01-14  6:47 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-04 15:55 [PATCH v2 0/5] crypto: gcm-aes-ni cleanups Ard Biesheuvel
2021-01-04 15:55 ` [PATCH v2 1/5] crypto: x86/gcm-aes-ni - prevent misaligned buffers on the stack Ard Biesheuvel
2021-01-04 15:55 ` [PATCH v2 2/5] crypto: x86/gcm-aes-ni - drop unused asm prototypes Ard Biesheuvel
2021-01-04 15:55 ` [PATCH v2 3/5] crypto: x86/gcm-aes-ni - clean up mapping of associated data Ard Biesheuvel
2021-01-04 15:55 ` [PATCH v2 4/5] crypto: x86/gcm-aes-ni - refactor scatterlist processing Ard Biesheuvel
2021-01-04 15:55 ` [PATCH v2 5/5] crypto: x86/gcm-aes-ni - replace function pointers with static branches Ard Biesheuvel
2021-01-14  6:46 ` [PATCH v2 0/5] crypto: gcm-aes-ni cleanups Herbert Xu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).