All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] crypto: aesni - add ccm(aes) algorithm implementation
@ 2020-12-01 19:45 Ard Biesheuvel
  2020-12-01 21:40 ` Ben Greear
  2020-12-01 21:57 ` Herbert Xu
  0 siblings, 2 replies; 38+ messages in thread
From: Ard Biesheuvel @ 2020-12-01 19:45 UTC (permalink / raw)
  To: linux-crypto; +Cc: herbert, greearb, Ard Biesheuvel, Steve deRosier

Add ccm(aes) implementation from linux-wireless mailing list (see
http://permalink.gmane.org/gmane.linux.kernel.wireless.general/126679).

This eliminates FPU context store/restore overhead existing in more
general ccm_base(ctr(aes-aesni),aes-aesni) case in MAC calculation.

Suggested-by: Ben Greear <greearb@candelatech.com>
Co-developed-by: Steve deRosier <derosier@cal-sierra.com>
Signed-off-by: Steve deRosier <derosier@cal-sierra.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
v2: avoid the SIMD helper, as it produces an CRYPTO_ALG_ASYNC aead, which
    is not usable by the 802.11 ccmp driver

 arch/x86/crypto/aesni-intel_glue.c | 406 +++++++++++++++++++-
 1 file changed, 404 insertions(+), 2 deletions(-)

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index ad8a7188a2bf..d90b03d9b420 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -97,12 +97,13 @@ asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
 #define AVX_GEN2_OPTSIZE 640
 #define AVX_GEN4_OPTSIZE 4096
 
+asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
+			      const u8 *in, unsigned int len, u8 *iv);
+
 #ifdef CONFIG_X86_64
 
 static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out,
 			      const u8 *in, unsigned int len, u8 *iv);
-asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
-			      const u8 *in, unsigned int len, u8 *iv);
 
 asmlinkage void aesni_xts_crypt8(const struct crypto_aes_ctx *ctx, u8 *out,
 				 const u8 *in, bool enc, le128 *iv);
@@ -454,6 +455,377 @@ static int cbc_decrypt(struct skcipher_request *req)
 	return err;
 }
 
+static int aesni_ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
+			    unsigned int key_len)
+{
+	struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm);
+
+	return aes_set_key_common(crypto_aead_tfm(tfm), ctx, in_key, key_len);
+}
+
+static int aesni_ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+	if ((authsize & 1) || authsize < 4)
+		return -EINVAL;
+	return 0;
+}
+
+static int ccm_set_msg_len(u8 *block, unsigned int msglen, int csize)
+{
+	__be32 data;
+
+	memset(block, 0, csize);
+	block += csize;
+
+	if (csize >= 4)
+		csize = 4;
+	else if (msglen > (1 << (8 * csize)))
+		return -EOVERFLOW;
+
+	data = cpu_to_be32(msglen);
+	memcpy(block - csize, (u8 *)&data + 4 - csize, csize);
+
+	return 0;
+}
+
+static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	__be32 *n = (__be32 *)&maciv[AES_BLOCK_SIZE - 8];
+	u32 l = req->iv[0] + 1;
+
+	/* verify that CCM dimension 'L' is set correctly in the IV */
+	if (l < 2 || l > 8)
+		return -EINVAL;
+
+	/* verify that msglen can in fact be represented in L bytes */
+	if (l < 4 && msglen >> (8 * l))
+		return -EOVERFLOW;
+
+	/*
+	 * Even if the CCM spec allows L values of up to 8, the Linux cryptoapi
+	 * uses a u32 type to represent msglen so the top 4 bytes are always 0.
+	 */
+	n[0] = 0;
+	n[1] = cpu_to_be32(msglen);
+
+	memcpy(maciv, req->iv, AES_BLOCK_SIZE - l);
+
+	/*
+	 * Meaning of byte 0 according to CCM spec (RFC 3610/NIST 800-38C)
+	 * - bits 0..2	: max # of bytes required to represent msglen, minus 1
+	 *                (already set by caller)
+	 * - bits 3..5	: size of auth tag (1 => 4 bytes, 2 => 6 bytes, etc)
+	 * - bit 6	: indicates presence of authenticate-only data
+	 */
+	maciv[0] |= (crypto_aead_authsize(aead) - 2) << 2;
+	if (req->assoclen)
+		maciv[0] |= 0x40;
+
+	memset(&req->iv[AES_BLOCK_SIZE - l], 0, l);
+	return ccm_set_msg_len(maciv + AES_BLOCK_SIZE - l, msglen, l);
+}
+
+static int compute_mac(struct crypto_aes_ctx *ctx, u8 mac[], u8 *data, int n,
+		       unsigned int ilen, u8 *idata, bool do_simd)
+{
+	unsigned int bs = AES_BLOCK_SIZE;
+	u8 *odata = mac;
+	int datalen, getlen;
+
+	datalen = n;
+
+	/* first time in here, block may be partially filled. */
+	getlen = bs - ilen;
+	if (datalen >= getlen) {
+		memcpy(idata + ilen, data, getlen);
+
+		if (likely(do_simd)) {
+			aesni_cbc_enc(ctx, odata, idata, AES_BLOCK_SIZE, odata);
+		} else {
+			crypto_xor(odata, idata, AES_BLOCK_SIZE);
+			aes_encrypt(ctx, odata, odata);
+		}
+
+		datalen -= getlen;
+		data += getlen;
+		ilen = 0;
+	}
+
+	/* now encrypt rest of data */
+	while (datalen >= bs) {
+		if (likely(do_simd)) {
+			aesni_cbc_enc(ctx, odata, data, AES_BLOCK_SIZE, odata);
+		} else {
+			crypto_xor(odata, data, AES_BLOCK_SIZE);
+			aes_encrypt(ctx, odata, odata);
+		}
+
+		datalen -= bs;
+		data += bs;
+	}
+
+	/* check and see if there's leftover data that wasn't
+	 * enough to fill a block.
+	 */
+	if (datalen) {
+		memcpy(idata + ilen, data, datalen);
+		ilen += datalen;
+	}
+	return ilen;
+}
+
+static void ccm_calculate_auth_mac(struct aead_request *req,
+				   struct crypto_aes_ctx *ctx, u8 mac[],
+				   struct scatterlist *src,
+				   bool do_simd)
+{
+	unsigned int len = req->assoclen;
+	struct scatter_walk walk;
+	u8 idata[AES_BLOCK_SIZE];
+	unsigned int ilen;
+	struct {
+		__be16 l;
+		__be32 h;
+	} __packed *ltag = (void *)idata;
+
+	/* prepend the AAD with a length tag */
+	if (len < 0xff00) {
+		ltag->l = cpu_to_be16(len);
+		ilen = 2;
+	} else  {
+		ltag->l = cpu_to_be16(0xfffe);
+		ltag->h = cpu_to_be32(len);
+		ilen = 6;
+	}
+
+	scatterwalk_start(&walk, src);
+
+	while (len) {
+		u8 *src;
+		int n;
+
+		n = scatterwalk_clamp(&walk, len);
+		if (!n) {
+			scatterwalk_start(&walk, sg_next(walk.sg));
+			n = scatterwalk_clamp(&walk, len);
+		}
+		src = scatterwalk_map(&walk);
+
+		ilen = compute_mac(ctx, mac, src, n, ilen, idata, do_simd);
+		len -= n;
+
+		scatterwalk_unmap(src);
+		scatterwalk_advance(&walk, n);
+		scatterwalk_done(&walk, 0, len);
+	}
+
+	/* any leftover needs padding and then encrypted */
+	if (ilen) {
+		crypto_xor(mac, idata, ilen);
+		if (likely(do_simd))
+			aesni_enc(ctx, mac, mac);
+		else
+			aes_encrypt(ctx, mac, mac);
+	}
+}
+
+static int aesni_ccm_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_aead_ctx(aead));
+	bool const do_simd = crypto_simd_usable();
+	u8 __aligned(8) mac[AES_BLOCK_SIZE];
+	u8 __aligned(8) buf[AES_BLOCK_SIZE];
+	struct skcipher_walk walk;
+	u32 l = req->iv[0] + 1;
+	int err;
+
+	err = ccm_init_mac(req, mac, req->cryptlen);
+	if (err)
+		return err;
+
+	if (likely(do_simd)) {
+		kernel_fpu_begin();
+		aesni_enc(ctx, mac, mac);
+	} else {
+		aes_encrypt(ctx, mac, mac);
+	}
+
+	if (req->assoclen)
+		ccm_calculate_auth_mac(req, ctx, mac, req->src, do_simd);
+
+	req->iv[AES_BLOCK_SIZE - 1] = 0x1;
+	err = skcipher_walk_aead_encrypt(&walk, req, true);
+
+	while (walk.nbytes >= AES_BLOCK_SIZE) {
+		int len = walk.nbytes & AES_BLOCK_MASK;
+		int n;
+
+		for (n = 0; n < len; n += AES_BLOCK_SIZE) {
+			if (likely(do_simd)) {
+				aesni_cbc_enc(ctx, mac, walk.src.virt.addr + n,
+					      AES_BLOCK_SIZE, mac);
+			} else {
+				crypto_xor(mac, walk.src.virt.addr + n,
+					   AES_BLOCK_SIZE);
+				aes_encrypt(ctx, mac, mac);
+
+				aes_encrypt(ctx, buf, walk.iv);
+				crypto_inc(walk.iv, AES_BLOCK_SIZE);
+				crypto_xor_cpy(walk.dst.virt.addr + n,
+					       walk.src.virt.addr + n,
+					       buf, AES_BLOCK_SIZE);
+			}
+		}
+		if (likely(do_simd))
+			aesni_ctr_enc(ctx, walk.dst.virt.addr,
+				      walk.src.virt.addr, len, walk.iv);
+
+		err = skcipher_walk_done(&walk, walk.nbytes & ~AES_BLOCK_MASK);
+	}
+	if (walk.nbytes) {
+		if (likely(do_simd))
+			aesni_enc(ctx, buf, walk.iv);
+		else
+			aes_encrypt(ctx, buf, walk.iv);
+
+		crypto_xor(mac, walk.src.virt.addr, walk.nbytes);
+		crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr,
+			       buf, walk.nbytes);
+
+		if (likely(do_simd))
+			aesni_enc(ctx, mac, mac);
+		else
+			aes_encrypt(ctx, mac, mac);
+
+		err = skcipher_walk_done(&walk, 0);
+	}
+
+	if (err)
+		goto fail;
+
+	memset(walk.iv + AES_BLOCK_SIZE - l, 0, l);
+
+	if (likely(do_simd)) {
+		aesni_ctr_enc(ctx, mac, mac, AES_BLOCK_SIZE, walk.iv);
+	} else {
+		aes_encrypt(ctx, buf, walk.iv);
+		crypto_xor(mac, buf, AES_BLOCK_SIZE);
+	}
+
+	/* copy authtag to end of dst */
+	scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen,
+				 crypto_aead_authsize(aead), 1);
+
+fail:
+	if (likely(do_simd))
+		kernel_fpu_end();
+	return err;
+}
+
+static int aesni_ccm_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_aead_ctx(aead));
+	unsigned int authsize = crypto_aead_authsize(aead);
+	bool const do_simd = crypto_simd_usable();
+	u8 __aligned(8) mac[AES_BLOCK_SIZE];
+	u8 __aligned(8) tag[AES_BLOCK_SIZE];
+	u8 __aligned(8) buf[AES_BLOCK_SIZE];
+	struct skcipher_walk walk;
+	u32 l = req->iv[0] + 1;
+	int err;
+
+	err = ccm_init_mac(req, mac, req->cryptlen - authsize);
+	if (err)
+		return err;
+
+	/* copy authtag from end of src */
+	scatterwalk_map_and_copy(tag, req->src,
+				 req->assoclen + req->cryptlen - authsize,
+				 authsize, 0);
+
+	if (likely(do_simd)) {
+		kernel_fpu_begin();
+		aesni_enc(ctx, mac, mac);
+	} else {
+		aes_encrypt(ctx, mac, mac);
+	}
+
+	if (req->assoclen)
+		ccm_calculate_auth_mac(req, ctx, mac, req->src, do_simd);
+
+	req->iv[AES_BLOCK_SIZE - 1] = 0x1;
+	err = skcipher_walk_aead_decrypt(&walk, req, true);
+
+	while (walk.nbytes >= AES_BLOCK_SIZE) {
+		int len = walk.nbytes & AES_BLOCK_MASK;
+		int n;
+
+		if (likely(do_simd))
+			aesni_ctr_enc(ctx, walk.dst.virt.addr,
+				      walk.src.virt.addr, len, walk.iv);
+
+		for (n = 0; n < len; n += AES_BLOCK_SIZE) {
+			if (likely(do_simd)) {
+				aesni_cbc_enc(ctx, mac, walk.dst.virt.addr + n,
+					      AES_BLOCK_SIZE, mac);
+			} else {
+				aes_encrypt(ctx, buf, walk.iv);
+				crypto_inc(walk.iv, AES_BLOCK_SIZE);
+				crypto_xor_cpy(walk.dst.virt.addr + n,
+					       walk.src.virt.addr + n,
+					       buf, AES_BLOCK_SIZE);
+
+				crypto_xor(mac, walk.dst.virt.addr + n,
+					   AES_BLOCK_SIZE);
+				aes_encrypt(ctx, mac, mac);
+			}
+		}
+
+		err = skcipher_walk_done(&walk, walk.nbytes & ~AES_BLOCK_MASK);
+	}
+	if (walk.nbytes) {
+		if (likely(do_simd))
+			aesni_enc(ctx, buf, walk.iv);
+		else
+			aes_encrypt(ctx, buf, walk.iv);
+
+		crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr,
+			       buf, walk.nbytes);
+		crypto_xor(mac, walk.dst.virt.addr, walk.nbytes);
+
+		if (likely(do_simd))
+			aesni_enc(ctx, mac, mac);
+		else
+			aes_encrypt(ctx, mac, mac);
+
+		err = skcipher_walk_done(&walk, 0);
+	}
+
+	if (err)
+		goto fail;
+
+	memset(walk.iv + AES_BLOCK_SIZE - l, 0, l);
+
+	if (likely(do_simd)) {
+		aesni_ctr_enc(ctx, mac, mac, AES_BLOCK_SIZE, walk.iv);
+	} else {
+		aes_encrypt(ctx, buf, walk.iv);
+		crypto_xor(mac, buf, AES_BLOCK_SIZE);
+	}
+
+	/* compare calculated auth tag with the stored one */
+	if (crypto_memneq(mac, tag, authsize))
+		err = -EBADMSG;
+
+fail:
+	if (likely(do_simd))
+		kernel_fpu_end();
+	return err;
+}
+
 #ifdef CONFIG_X86_64
 static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
 			    struct skcipher_walk *walk)
@@ -1045,10 +1417,28 @@ static struct aead_alg aesni_aeads[] = { {
 		.cra_module		= THIS_MODULE,
 	},
 } };
+
 #else
 static struct aead_alg aesni_aeads[0];
 #endif
 
+static struct aead_alg aesni_ccm_aead = {
+	.base.cra_name		= "ccm(aes)",
+	.base.cra_driver_name	= "ccm-aesni",
+	.base.cra_priority	= 400,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct crypto_aes_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.setkey			= aesni_ccm_setkey,
+	.setauthsize		= aesni_ccm_setauthsize,
+	.encrypt		= aesni_ccm_encrypt,
+	.decrypt		= aesni_ccm_decrypt,
+	.ivsize			= AES_BLOCK_SIZE,
+	.chunksize		= AES_BLOCK_SIZE,
+	.maxauthsize		= AES_BLOCK_SIZE,
+};
+
 static struct simd_aead_alg *aesni_simd_aeads[ARRAY_SIZE(aesni_aeads)];
 
 static const struct x86_cpu_id aesni_cpu_id[] = {
@@ -1098,8 +1488,17 @@ static int __init aesni_init(void)
 	if (err)
 		goto unregister_skciphers;
 
+	if (IS_ENABLED(CONFIG_X86_64)) {
+		err = crypto_register_aead(&aesni_ccm_aead);
+		if (err)
+			goto unregister_aeads;
+	}
+
 	return 0;
 
+unregister_aeads:
+	simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads),
+			      aesni_simd_aeads);
 unregister_skciphers:
 	simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
 				  aesni_simd_skciphers);
@@ -1110,6 +1509,9 @@ static int __init aesni_init(void)
 
 static void __exit aesni_exit(void)
 {
+	if (IS_ENABLED(CONFIG_X86_64))
+		crypto_unregister_aead(&aesni_ccm_aead);
+
 	simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads),
 			      aesni_simd_aeads);
 	simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

end of thread, other threads:[~2023-10-18  1:24 UTC | newest]

Thread overview: 38+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-01 19:45 [PATCH v2] crypto: aesni - add ccm(aes) algorithm implementation Ard Biesheuvel
2020-12-01 21:40 ` Ben Greear
2020-12-01 21:57 ` Herbert Xu
2020-12-01 22:00   ` Ben Greear
2020-12-01 22:01     ` Herbert Xu
2020-12-01 22:01   ` Ard Biesheuvel
2020-12-01 22:04     ` Herbert Xu
2020-12-01 22:12       ` Ard Biesheuvel
2020-12-01 22:16         ` Herbert Xu
2020-12-01 22:27           ` Ard Biesheuvel
2020-12-01 23:11             ` Herbert Xu
2020-12-01 23:24               ` Ard Biesheuvel
2020-12-01 23:30                 ` Herbert Xu
2020-12-01 23:41                   ` Ard Biesheuvel
2020-12-01 23:48                     ` Herbert Xu
2020-12-02  0:01                       ` Ben Greear
2020-12-10  0:18               ` Ard Biesheuvel
2020-12-10  2:43                 ` Herbert Xu
2020-12-10  3:01                   ` Ben Greear
2020-12-10  7:30                     ` Ard Biesheuvel
2020-12-10 11:14                       ` Herbert Xu
2020-12-10 12:03                         ` Ard Biesheuvel
2020-12-10 12:16                           ` Herbert Xu
2020-12-10 12:19                             ` Ard Biesheuvel
2020-12-15  8:55                               ` Ard Biesheuvel
2020-12-15  9:19                                 ` Herbert Xu
2022-11-08 18:50                                   ` Ben Greear
2022-11-09  3:52                                     ` Herbert Xu
2022-11-09 10:05                                       ` Ard Biesheuvel
2022-11-09 14:12                                         ` Ben Greear
2022-11-11 22:29                                         ` Ben Greear
2022-11-12 14:59                                           ` Ard Biesheuvel
2023-10-16 20:50                                             ` Ben Greear
2023-10-17  3:16                                               ` Eric Biggers
2023-10-17  6:43                                                 ` Ard Biesheuvel
2023-10-18  1:24                                                   ` Herbert Xu
2020-12-10 14:40                       ` Ben Greear
2020-12-01 22:12       ` Ben Greear

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.