All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] crypto: aesni - add ccm(aes) algorithm implementation
@ 2020-11-29 18:20 Ard Biesheuvel
  2020-11-29 18:21 ` Ard Biesheuvel
  2020-11-30 22:47 ` Ben Greear
  0 siblings, 2 replies; 5+ messages in thread
From: Ard Biesheuvel @ 2020-11-29 18:20 UTC (permalink / raw)
  To: linux-crypto; +Cc: herbert, greearb, Steve deRosier, Steve deRosier

From: Steve deRosier <ardb@kernel.org>

Add ccm(aes) implementation from linux-wireless mailing list (see
http://permalink.gmane.org/gmane.linux.kernel.wireless.general/126679).

This eliminates FPU context store/restore overhead existing in more
general ccm_base(ctr(aes-aesni),aes-aesni) case in MAC calculation.

Suggested-by: Ben Greear <greearb@candelatech.com>
Co-developed-by: Steve deRosier <derosier@cal-sierra.com>
Signed-off-by: Steve deRosier <derosier@cal-sierra.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
Ben,

This is almost a rewrite of the original patch, switching to the new
skcipher API, using the existing SIMD helper, and drop numerous unrelated
changes. The basic approach is almost identical, though, so I expect this
to perform on par or perhaps slightly faster than the original.

Could you please confirm with some numbers?

Thanks,
Ard.


 arch/x86/crypto/aesni-intel_glue.c | 310 ++++++++++++++++++++
 1 file changed, 310 insertions(+)

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index ad8a7188a2bf..f59f3c8772a6 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -513,6 +513,298 @@ static int ctr_crypt(struct skcipher_request *req)
 	return err;
 }
 
+static int aesni_ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
+			    unsigned int key_len)
+{
+	struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm);
+
+	return aes_set_key_common(crypto_aead_tfm(tfm), ctx, in_key, key_len);
+}
+
+static int aesni_ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+	if ((authsize & 1) || authsize < 4)
+		return -EINVAL;
+	return 0;
+}
+
+static int ccm_set_msg_len(u8 *block, unsigned int msglen, int csize)
+{
+	__be32 data;
+
+	memset(block, 0, csize);
+	block += csize;
+
+	if (csize >= 4)
+		csize = 4;
+	else if (msglen > (1 << (8 * csize)))
+		return -EOVERFLOW;
+
+	data = cpu_to_be32(msglen);
+	memcpy(block - csize, (u8 *)&data + 4 - csize, csize);
+
+	return 0;
+}
+
+static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	__be32 *n = (__be32 *)&maciv[AES_BLOCK_SIZE - 8];
+	u32 l = req->iv[0] + 1;
+
+	/* verify that CCM dimension 'L' is set correctly in the IV */
+	if (l < 2 || l > 8)
+		return -EINVAL;
+
+	/* verify that msglen can in fact be represented in L bytes */
+	if (l < 4 && msglen >> (8 * l))
+		return -EOVERFLOW;
+
+	/*
+	 * Even if the CCM spec allows L values of up to 8, the Linux cryptoapi
+	 * uses a u32 type to represent msglen so the top 4 bytes are always 0.
+	 */
+	n[0] = 0;
+	n[1] = cpu_to_be32(msglen);
+
+	memcpy(maciv, req->iv, AES_BLOCK_SIZE - l);
+
+	/*
+	 * Meaning of byte 0 according to CCM spec (RFC 3610/NIST 800-38C)
+	 * - bits 0..2	: max # of bytes required to represent msglen, minus 1
+	 *                (already set by caller)
+	 * - bits 3..5	: size of auth tag (1 => 4 bytes, 2 => 6 bytes, etc)
+	 * - bit 6	: indicates presence of authenticate-only data
+	 */
+	maciv[0] |= (crypto_aead_authsize(aead) - 2) << 2;
+	if (req->assoclen)
+		maciv[0] |= 0x40;
+
+	memset(&req->iv[AES_BLOCK_SIZE - l], 0, l);
+	return ccm_set_msg_len(maciv + AES_BLOCK_SIZE - l, msglen, l);
+}
+
+static int compute_mac(struct crypto_aes_ctx *ctx, u8 mac[], u8 *data, int n,
+		       unsigned int ilen, u8 *idata)
+{
+	unsigned int bs = AES_BLOCK_SIZE;
+	u8 *odata = mac;
+	int datalen, getlen;
+
+	datalen = n;
+
+	/* first time in here, block may be partially filled. */
+	getlen = bs - ilen;
+	if (datalen >= getlen) {
+		memcpy(idata + ilen, data, getlen);
+
+		aesni_cbc_enc(ctx, odata, idata, AES_BLOCK_SIZE, odata);
+
+		datalen -= getlen;
+		data += getlen;
+		ilen = 0;
+	}
+
+	/* now encrypt rest of data */
+	while (datalen >= bs) {
+		aesni_cbc_enc(ctx, odata, data, AES_BLOCK_SIZE, odata);
+
+		datalen -= bs;
+		data += bs;
+	}
+
+	/* check and see if there's leftover data that wasn't
+	 * enough to fill a block.
+	 */
+	if (datalen) {
+		memcpy(idata + ilen, data, datalen);
+		ilen += datalen;
+	}
+	return ilen;
+}
+
+static void ccm_calculate_auth_mac(struct aead_request *req,
+				   struct crypto_aes_ctx *ctx, u8 mac[],
+				   struct scatterlist *src)
+{
+	unsigned int len = req->assoclen;
+	struct scatter_walk walk;
+	u8 idata[AES_BLOCK_SIZE];
+	unsigned int ilen;
+	struct {
+		__be16 l;
+		__be32 h;
+	} __packed *ltag = (void *)idata;
+
+	/* prepend the AAD with a length tag */
+	if (len < 0xff00) {
+		ltag->l = cpu_to_be16(len);
+		ilen = 2;
+	} else  {
+		ltag->l = cpu_to_be16(0xfffe);
+		ltag->h = cpu_to_be32(len);
+		ilen = 6;
+	}
+
+	scatterwalk_start(&walk, src);
+
+	while (len) {
+		u8 *src;
+		int n;
+
+		n = scatterwalk_clamp(&walk, len);
+		if (!n) {
+			scatterwalk_start(&walk, sg_next(walk.sg));
+			n = scatterwalk_clamp(&walk, len);
+		}
+		src = scatterwalk_map(&walk);
+
+		ilen = compute_mac(ctx, mac, src, n, ilen, idata);
+		len -= n;
+
+		scatterwalk_unmap(src);
+		scatterwalk_advance(&walk, n);
+		scatterwalk_done(&walk, 0, len);
+	}
+
+	/* any leftover needs padding and then encrypted */
+	if (ilen) {
+		crypto_xor(mac, idata, ilen);
+		aesni_enc(ctx, mac, mac);
+	}
+}
+
+static int aesni_ccm_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_aead_ctx(aead));
+	u8 __aligned(8) mac[AES_BLOCK_SIZE];
+	struct skcipher_walk walk;
+	u32 l = req->iv[0] + 1;
+	int err;
+
+	err = ccm_init_mac(req, mac, req->cryptlen);
+	if (err)
+		return err;
+
+	kernel_fpu_begin();
+
+	aesni_enc(ctx, mac, mac);
+
+	if (req->assoclen)
+		ccm_calculate_auth_mac(req, ctx, mac, req->src);
+
+	req->iv[AES_BLOCK_SIZE - 1] = 0x1;
+	err = skcipher_walk_aead_encrypt(&walk, req, true);
+
+	while (walk.nbytes >= AES_BLOCK_SIZE) {
+		int len = walk.nbytes & AES_BLOCK_MASK;
+		int n;
+
+		for (n = 0; n < len; n += AES_BLOCK_SIZE)
+			aesni_cbc_enc(ctx, mac, walk.src.virt.addr + n,
+				      AES_BLOCK_SIZE, mac);
+
+		aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, len,
+			      walk.iv);
+
+		err = skcipher_walk_done(&walk, walk.nbytes & ~AES_BLOCK_MASK);
+	}
+	if (walk.nbytes) {
+		u8 __aligned(8) buf[AES_BLOCK_SIZE] = {};
+
+		memcpy(buf, walk.src.virt.addr, walk.nbytes);
+		aesni_cbc_enc(ctx, mac, buf, AES_BLOCK_SIZE, mac);
+
+		ctr_crypt_final(ctx, &walk);
+
+		err = skcipher_walk_done(&walk, 0);
+	}
+
+	if (err)
+		goto fail;
+
+	memset(walk.iv + AES_BLOCK_SIZE - l, 0, l);
+	aesni_ctr_enc(ctx, mac, mac, AES_BLOCK_SIZE, walk.iv);
+
+	/* copy authtag to end of dst */
+	scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen,
+				 crypto_aead_authsize(aead), 1);
+
+fail:
+	kernel_fpu_end();
+	return err;
+}
+
+static int aesni_ccm_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_aead_ctx(aead));
+	unsigned int authsize = crypto_aead_authsize(aead);
+	u8 __aligned(8) mac[AES_BLOCK_SIZE];
+	u8 __aligned(8) tag[AES_BLOCK_SIZE];
+	struct skcipher_walk walk;
+	u32 l = req->iv[0] + 1;
+	int err;
+
+	err = ccm_init_mac(req, mac, req->cryptlen - authsize);
+	if (err)
+		return err;
+
+	/* copy authtag from end of src */
+	scatterwalk_map_and_copy(tag, req->src,
+				 req->assoclen + req->cryptlen - authsize,
+				 authsize, 0);
+
+	kernel_fpu_begin();
+
+	aesni_enc(ctx, mac, mac);
+
+	if (req->assoclen)
+		ccm_calculate_auth_mac(req, ctx, mac, req->src);
+
+	req->iv[AES_BLOCK_SIZE - 1] = 0x1;
+	err = skcipher_walk_aead_decrypt(&walk, req, true);
+
+	while (walk.nbytes >= AES_BLOCK_SIZE) {
+		int len = walk.nbytes & AES_BLOCK_MASK;
+		int n;
+
+		aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, len,
+			      walk.iv);
+
+		for (n = 0; n < len; n += AES_BLOCK_SIZE)
+			aesni_cbc_enc(ctx, mac, walk.dst.virt.addr + n,
+				      AES_BLOCK_SIZE, mac);
+
+		err = skcipher_walk_done(&walk, walk.nbytes & ~AES_BLOCK_MASK);
+	}
+	if (walk.nbytes) {
+		u8 __aligned(8) buf[AES_BLOCK_SIZE] = {};
+
+		ctr_crypt_final(ctx, &walk);
+
+		memcpy(buf, walk.dst.virt.addr, walk.nbytes);
+		aesni_cbc_enc(ctx, mac, buf, AES_BLOCK_SIZE, mac);
+
+		err = skcipher_walk_done(&walk, 0);
+	}
+
+	if (err)
+		goto fail;
+
+	memset(walk.iv + AES_BLOCK_SIZE - l, 0, l);
+	aesni_ctr_enc(ctx, mac, mac, AES_BLOCK_SIZE, walk.iv);
+
+	/* compare calculated auth tag with the stored one */
+	if (crypto_memneq(mac, tag, authsize))
+		err = -EBADMSG;
+
+fail:
+	kernel_fpu_end();
+	return err;
+}
+
 static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			    unsigned int keylen)
 {
@@ -1044,6 +1336,24 @@ static struct aead_alg aesni_aeads[] = { {
 		.cra_alignmask		= AESNI_ALIGN - 1,
 		.cra_module		= THIS_MODULE,
 	},
+}, {
+	.setkey		= aesni_ccm_setkey,
+	.setauthsize	= aesni_ccm_setauthsize,
+	.encrypt	= aesni_ccm_encrypt,
+	.decrypt	= aesni_ccm_decrypt,
+	.ivsize		= AES_BLOCK_SIZE,
+	.chunksize	= AES_BLOCK_SIZE,
+	.maxauthsize	= AES_BLOCK_SIZE,
+	.base = {
+		.cra_name		= "__ccm(aes)",
+		.cra_driver_name	= "__ccm-aesni",
+		.cra_priority		= 400,
+		.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+		.cra_alignmask		= AESNI_ALIGN - 1,
+		.cra_module		= THIS_MODULE,
+	},
 } };
 #else
 static struct aead_alg aesni_aeads[0];
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] crypto: aesni - add ccm(aes) algorithm implementation
  2020-11-29 18:20 [PATCH] crypto: aesni - add ccm(aes) algorithm implementation Ard Biesheuvel
@ 2020-11-29 18:21 ` Ard Biesheuvel
  2020-11-30 22:47 ` Ben Greear
  1 sibling, 0 replies; 5+ messages in thread
From: Ard Biesheuvel @ 2020-11-29 18:21 UTC (permalink / raw)
  To: Linux Crypto Mailing List; +Cc: Herbert Xu, Ben Greear, Steve deRosier

On Sun, 29 Nov 2020 at 19:20, Ard Biesheuvel <ardb@kernel.org> wrote:
>
> From: Steve deRosier <ardb@kernel.org>
>

Whoops - please ignore this line.

> Add ccm(aes) implementation from linux-wireless mailing list (see
> http://permalink.gmane.org/gmane.linux.kernel.wireless.general/126679).
>
> This eliminates FPU context store/restore overhead existing in more
> general ccm_base(ctr(aes-aesni),aes-aesni) case in MAC calculation.
>
> Suggested-by: Ben Greear <greearb@candelatech.com>
> Co-developed-by: Steve deRosier <derosier@cal-sierra.com>
> Signed-off-by: Steve deRosier <derosier@cal-sierra.com>
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> ---
> Ben,
>
> This is almost a rewrite of the original patch, switching to the new
> skcipher API, using the existing SIMD helper, and drop numerous unrelated
> changes. The basic approach is almost identical, though, so I expect this
> to perform on par or perhaps slightly faster than the original.
>
> Could you please confirm with some numbers?
>
> Thanks,
> Ard.
>
>
>  arch/x86/crypto/aesni-intel_glue.c | 310 ++++++++++++++++++++
>  1 file changed, 310 insertions(+)
>
> diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
> index ad8a7188a2bf..f59f3c8772a6 100644
> --- a/arch/x86/crypto/aesni-intel_glue.c
> +++ b/arch/x86/crypto/aesni-intel_glue.c
> @@ -513,6 +513,298 @@ static int ctr_crypt(struct skcipher_request *req)
>         return err;
>  }
>
> +static int aesni_ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
> +                           unsigned int key_len)
> +{
> +       struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm);
> +
> +       return aes_set_key_common(crypto_aead_tfm(tfm), ctx, in_key, key_len);
> +}
> +
> +static int aesni_ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
> +{
> +       if ((authsize & 1) || authsize < 4)
> +               return -EINVAL;
> +       return 0;
> +}
> +
> +static int ccm_set_msg_len(u8 *block, unsigned int msglen, int csize)
> +{
> +       __be32 data;
> +
> +       memset(block, 0, csize);
> +       block += csize;
> +
> +       if (csize >= 4)
> +               csize = 4;
> +       else if (msglen > (1 << (8 * csize)))
> +               return -EOVERFLOW;
> +
> +       data = cpu_to_be32(msglen);
> +       memcpy(block - csize, (u8 *)&data + 4 - csize, csize);
> +
> +       return 0;
> +}
> +
> +static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
> +{
> +       struct crypto_aead *aead = crypto_aead_reqtfm(req);
> +       __be32 *n = (__be32 *)&maciv[AES_BLOCK_SIZE - 8];
> +       u32 l = req->iv[0] + 1;
> +
> +       /* verify that CCM dimension 'L' is set correctly in the IV */
> +       if (l < 2 || l > 8)
> +               return -EINVAL;
> +
> +       /* verify that msglen can in fact be represented in L bytes */
> +       if (l < 4 && msglen >> (8 * l))
> +               return -EOVERFLOW;
> +
> +       /*
> +        * Even if the CCM spec allows L values of up to 8, the Linux cryptoapi
> +        * uses a u32 type to represent msglen so the top 4 bytes are always 0.
> +        */
> +       n[0] = 0;
> +       n[1] = cpu_to_be32(msglen);
> +
> +       memcpy(maciv, req->iv, AES_BLOCK_SIZE - l);
> +
> +       /*
> +        * Meaning of byte 0 according to CCM spec (RFC 3610/NIST 800-38C)
> +        * - bits 0..2  : max # of bytes required to represent msglen, minus 1
> +        *                (already set by caller)
> +        * - bits 3..5  : size of auth tag (1 => 4 bytes, 2 => 6 bytes, etc)
> +        * - bit 6      : indicates presence of authenticate-only data
> +        */
> +       maciv[0] |= (crypto_aead_authsize(aead) - 2) << 2;
> +       if (req->assoclen)
> +               maciv[0] |= 0x40;
> +
> +       memset(&req->iv[AES_BLOCK_SIZE - l], 0, l);
> +       return ccm_set_msg_len(maciv + AES_BLOCK_SIZE - l, msglen, l);
> +}
> +
> +static int compute_mac(struct crypto_aes_ctx *ctx, u8 mac[], u8 *data, int n,
> +                      unsigned int ilen, u8 *idata)
> +{
> +       unsigned int bs = AES_BLOCK_SIZE;
> +       u8 *odata = mac;
> +       int datalen, getlen;
> +
> +       datalen = n;
> +
> +       /* first time in here, block may be partially filled. */
> +       getlen = bs - ilen;
> +       if (datalen >= getlen) {
> +               memcpy(idata + ilen, data, getlen);
> +
> +               aesni_cbc_enc(ctx, odata, idata, AES_BLOCK_SIZE, odata);
> +
> +               datalen -= getlen;
> +               data += getlen;
> +               ilen = 0;
> +       }
> +
> +       /* now encrypt rest of data */
> +       while (datalen >= bs) {
> +               aesni_cbc_enc(ctx, odata, data, AES_BLOCK_SIZE, odata);
> +
> +               datalen -= bs;
> +               data += bs;
> +       }
> +
> +       /* check and see if there's leftover data that wasn't
> +        * enough to fill a block.
> +        */
> +       if (datalen) {
> +               memcpy(idata + ilen, data, datalen);
> +               ilen += datalen;
> +       }
> +       return ilen;
> +}
> +
> +static void ccm_calculate_auth_mac(struct aead_request *req,
> +                                  struct crypto_aes_ctx *ctx, u8 mac[],
> +                                  struct scatterlist *src)
> +{
> +       unsigned int len = req->assoclen;
> +       struct scatter_walk walk;
> +       u8 idata[AES_BLOCK_SIZE];
> +       unsigned int ilen;
> +       struct {
> +               __be16 l;
> +               __be32 h;
> +       } __packed *ltag = (void *)idata;
> +
> +       /* prepend the AAD with a length tag */
> +       if (len < 0xff00) {
> +               ltag->l = cpu_to_be16(len);
> +               ilen = 2;
> +       } else  {
> +               ltag->l = cpu_to_be16(0xfffe);
> +               ltag->h = cpu_to_be32(len);
> +               ilen = 6;
> +       }
> +
> +       scatterwalk_start(&walk, src);
> +
> +       while (len) {
> +               u8 *src;
> +               int n;
> +
> +               n = scatterwalk_clamp(&walk, len);
> +               if (!n) {
> +                       scatterwalk_start(&walk, sg_next(walk.sg));
> +                       n = scatterwalk_clamp(&walk, len);
> +               }
> +               src = scatterwalk_map(&walk);
> +
> +               ilen = compute_mac(ctx, mac, src, n, ilen, idata);
> +               len -= n;
> +
> +               scatterwalk_unmap(src);
> +               scatterwalk_advance(&walk, n);
> +               scatterwalk_done(&walk, 0, len);
> +       }
> +
> +       /* any leftover needs padding and then encrypted */
> +       if (ilen) {
> +               crypto_xor(mac, idata, ilen);
> +               aesni_enc(ctx, mac, mac);
> +       }
> +}
> +
> +static int aesni_ccm_encrypt(struct aead_request *req)
> +{
> +       struct crypto_aead *aead = crypto_aead_reqtfm(req);
> +       struct crypto_aes_ctx *ctx = aes_ctx(crypto_aead_ctx(aead));
> +       u8 __aligned(8) mac[AES_BLOCK_SIZE];
> +       struct skcipher_walk walk;
> +       u32 l = req->iv[0] + 1;
> +       int err;
> +
> +       err = ccm_init_mac(req, mac, req->cryptlen);
> +       if (err)
> +               return err;
> +
> +       kernel_fpu_begin();
> +
> +       aesni_enc(ctx, mac, mac);
> +
> +       if (req->assoclen)
> +               ccm_calculate_auth_mac(req, ctx, mac, req->src);
> +
> +       req->iv[AES_BLOCK_SIZE - 1] = 0x1;
> +       err = skcipher_walk_aead_encrypt(&walk, req, true);
> +
> +       while (walk.nbytes >= AES_BLOCK_SIZE) {
> +               int len = walk.nbytes & AES_BLOCK_MASK;
> +               int n;
> +
> +               for (n = 0; n < len; n += AES_BLOCK_SIZE)
> +                       aesni_cbc_enc(ctx, mac, walk.src.virt.addr + n,
> +                                     AES_BLOCK_SIZE, mac);
> +
> +               aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, len,
> +                             walk.iv);
> +
> +               err = skcipher_walk_done(&walk, walk.nbytes & ~AES_BLOCK_MASK);
> +       }
> +       if (walk.nbytes) {
> +               u8 __aligned(8) buf[AES_BLOCK_SIZE] = {};
> +
> +               memcpy(buf, walk.src.virt.addr, walk.nbytes);
> +               aesni_cbc_enc(ctx, mac, buf, AES_BLOCK_SIZE, mac);
> +
> +               ctr_crypt_final(ctx, &walk);
> +
> +               err = skcipher_walk_done(&walk, 0);
> +       }
> +
> +       if (err)
> +               goto fail;
> +
> +       memset(walk.iv + AES_BLOCK_SIZE - l, 0, l);
> +       aesni_ctr_enc(ctx, mac, mac, AES_BLOCK_SIZE, walk.iv);
> +
> +       /* copy authtag to end of dst */
> +       scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen,
> +                                crypto_aead_authsize(aead), 1);
> +
> +fail:
> +       kernel_fpu_end();
> +       return err;
> +}
> +
> +static int aesni_ccm_decrypt(struct aead_request *req)
> +{
> +       struct crypto_aead *aead = crypto_aead_reqtfm(req);
> +       struct crypto_aes_ctx *ctx = aes_ctx(crypto_aead_ctx(aead));
> +       unsigned int authsize = crypto_aead_authsize(aead);
> +       u8 __aligned(8) mac[AES_BLOCK_SIZE];
> +       u8 __aligned(8) tag[AES_BLOCK_SIZE];
> +       struct skcipher_walk walk;
> +       u32 l = req->iv[0] + 1;
> +       int err;
> +
> +       err = ccm_init_mac(req, mac, req->cryptlen - authsize);
> +       if (err)
> +               return err;
> +
> +       /* copy authtag from end of src */
> +       scatterwalk_map_and_copy(tag, req->src,
> +                                req->assoclen + req->cryptlen - authsize,
> +                                authsize, 0);
> +
> +       kernel_fpu_begin();
> +
> +       aesni_enc(ctx, mac, mac);
> +
> +       if (req->assoclen)
> +               ccm_calculate_auth_mac(req, ctx, mac, req->src);
> +
> +       req->iv[AES_BLOCK_SIZE - 1] = 0x1;
> +       err = skcipher_walk_aead_decrypt(&walk, req, true);
> +
> +       while (walk.nbytes >= AES_BLOCK_SIZE) {
> +               int len = walk.nbytes & AES_BLOCK_MASK;
> +               int n;
> +
> +               aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, len,
> +                             walk.iv);
> +
> +               for (n = 0; n < len; n += AES_BLOCK_SIZE)
> +                       aesni_cbc_enc(ctx, mac, walk.dst.virt.addr + n,
> +                                     AES_BLOCK_SIZE, mac);
> +
> +               err = skcipher_walk_done(&walk, walk.nbytes & ~AES_BLOCK_MASK);
> +       }
> +       if (walk.nbytes) {
> +               u8 __aligned(8) buf[AES_BLOCK_SIZE] = {};
> +
> +               ctr_crypt_final(ctx, &walk);
> +
> +               memcpy(buf, walk.dst.virt.addr, walk.nbytes);
> +               aesni_cbc_enc(ctx, mac, buf, AES_BLOCK_SIZE, mac);
> +
> +               err = skcipher_walk_done(&walk, 0);
> +       }
> +
> +       if (err)
> +               goto fail;
> +
> +       memset(walk.iv + AES_BLOCK_SIZE - l, 0, l);
> +       aesni_ctr_enc(ctx, mac, mac, AES_BLOCK_SIZE, walk.iv);
> +
> +       /* compare calculated auth tag with the stored one */
> +       if (crypto_memneq(mac, tag, authsize))
> +               err = -EBADMSG;
> +
> +fail:
> +       kernel_fpu_end();
> +       return err;
> +}
> +
>  static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key,
>                             unsigned int keylen)
>  {
> @@ -1044,6 +1336,24 @@ static struct aead_alg aesni_aeads[] = { {
>                 .cra_alignmask          = AESNI_ALIGN - 1,
>                 .cra_module             = THIS_MODULE,
>         },
> +}, {
> +       .setkey         = aesni_ccm_setkey,
> +       .setauthsize    = aesni_ccm_setauthsize,
> +       .encrypt        = aesni_ccm_encrypt,
> +       .decrypt        = aesni_ccm_decrypt,
> +       .ivsize         = AES_BLOCK_SIZE,
> +       .chunksize      = AES_BLOCK_SIZE,
> +       .maxauthsize    = AES_BLOCK_SIZE,
> +       .base = {
> +               .cra_name               = "__ccm(aes)",
> +               .cra_driver_name        = "__ccm-aesni",
> +               .cra_priority           = 400,
> +               .cra_flags              = CRYPTO_ALG_INTERNAL,
> +               .cra_blocksize          = 1,
> +               .cra_ctxsize            = sizeof(struct crypto_aes_ctx),
> +               .cra_alignmask          = AESNI_ALIGN - 1,
> +               .cra_module             = THIS_MODULE,
> +       },
>  } };
>  #else
>  static struct aead_alg aesni_aeads[0];
> --
> 2.17.1
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] crypto: aesni - add ccm(aes) algorithm implementation
  2020-11-29 18:20 [PATCH] crypto: aesni - add ccm(aes) algorithm implementation Ard Biesheuvel
  2020-11-29 18:21 ` Ard Biesheuvel
@ 2020-11-30 22:47 ` Ben Greear
  2020-12-01  6:32   ` Ard Biesheuvel
  1 sibling, 1 reply; 5+ messages in thread
From: Ben Greear @ 2020-11-30 22:47 UTC (permalink / raw)
  To: Ard Biesheuvel, linux-crypto; +Cc: herbert, Steve deRosier

On 11/29/20 10:20 AM, Ard Biesheuvel wrote:
> From: Steve deRosier <ardb@kernel.org>
> 
> Add ccm(aes) implementation from linux-wireless mailing list (see
> http://permalink.gmane.org/gmane.linux.kernel.wireless.general/126679).
> 
> This eliminates FPU context store/restore overhead existing in more
> general ccm_base(ctr(aes-aesni),aes-aesni) case in MAC calculation.
> 
> Suggested-by: Ben Greear <greearb@candelatech.com>
> Co-developed-by: Steve deRosier <derosier@cal-sierra.com>
> Signed-off-by: Steve deRosier <derosier@cal-sierra.com>
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> ---
> Ben,
> 
> This is almost a rewrite of the original patch, switching to the new
> skcipher API, using the existing SIMD helper, and drop numerous unrelated
> changes. The basic approach is almost identical, though, so I expect this
> to perform on par or perhaps slightly faster than the original.
> 
> Could you please confirm with some numbers?

I tried this on my apu2 platform, here is perf top during a TCP download using
rx-sw-crypt (ie, the aesni cpu decrypt path):

   18.77%  [kernel]                            [k] acpi_idle_enter
   14.68%  [kernel]                            [k] kernel_fpu_begin
    4.45%  [kernel]                            [k] __crypto_xor
    3.46%  [kernel]                            [k] _aesni_enc1

Total throughput is 127Mbps or so.  This is with your patch applied to 5.8.0+
kernel (it applied clean with 'git am')

Is there a good way to verify at runtime that I've properly applied your patch?

On my 5.4 kernel with the old version of the patch installed, I see 253Mbps throughput,
and perf-top shows:

   13.33%  [kernel]                            [k] acpi_idle_do_entry
    9.21%  [kernel]                            [k] _aesni_enc1
    4.49%  [unknown]                           [.] 0x00007fbc3f00adb6
    4.34%  [unknown]                           [.] 0x00007fbc3f00adba
    3.85%  [kernel]                            [k] memcpy


So, new patch is not working that well for me...

Thanks,
Ben


-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] crypto: aesni - add ccm(aes) algorithm implementation
  2020-11-30 22:47 ` Ben Greear
@ 2020-12-01  6:32   ` Ard Biesheuvel
  2020-12-01 14:26     ` Ben Greear
  0 siblings, 1 reply; 5+ messages in thread
From: Ard Biesheuvel @ 2020-12-01  6:32 UTC (permalink / raw)
  To: Ben Greear; +Cc: Linux Crypto Mailing List, Herbert Xu, Steve deRosier

On Mon, 30 Nov 2020 at 23:48, Ben Greear <greearb@candelatech.com> wrote:
>
> On 11/29/20 10:20 AM, Ard Biesheuvel wrote:
> > From: Steve deRosier <ardb@kernel.org>
> >
> > Add ccm(aes) implementation from linux-wireless mailing list (see
> > http://permalink.gmane.org/gmane.linux.kernel.wireless.general/126679).
> >
> > This eliminates FPU context store/restore overhead existing in more
> > general ccm_base(ctr(aes-aesni),aes-aesni) case in MAC calculation.
> >
> > Suggested-by: Ben Greear <greearb@candelatech.com>
> > Co-developed-by: Steve deRosier <derosier@cal-sierra.com>
> > Signed-off-by: Steve deRosier <derosier@cal-sierra.com>
> > Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> > ---
> > Ben,
> >
> > This is almost a rewrite of the original patch, switching to the new
> > skcipher API, using the existing SIMD helper, and drop numerous unrelated
> > changes. The basic approach is almost identical, though, so I expect this
> > to perform on par or perhaps slightly faster than the original.
> >
> > Could you please confirm with some numbers?
>
> I tried this on my apu2 platform, here is perf top during a TCP download using
> rx-sw-crypt (ie, the aesni cpu decrypt path):
>
>    18.77%  [kernel]                            [k] acpi_idle_enter
>    14.68%  [kernel]                            [k] kernel_fpu_begin
>     4.45%  [kernel]                            [k] __crypto_xor
>     3.46%  [kernel]                            [k] _aesni_enc1
>
> Total throughput is 127Mbps or so.  This is with your patch applied to 5.8.0+
> kernel (it applied clean with 'git am')
>
> Is there a good way to verify at runtime that I've properly applied your patch?
>
> On my 5.4 kernel with the old version of the patch installed, I see 253Mbps throughput,
> and perf-top shows:
>
>    13.33%  [kernel]                            [k] acpi_idle_do_entry
>     9.21%  [kernel]                            [k] _aesni_enc1
>     4.49%  [unknown]                           [.] 0x00007fbc3f00adb6
>     4.34%  [unknown]                           [.] 0x00007fbc3f00adba
>     3.85%  [kernel]                            [k] memcpy
>
>
> So, new patch is not working that well for me...
>

That is odd. The net number of invocations of kernel_fpu_begin()
should be the same, so I cannot explain why they suddenly take more
time. I am starting to think that this is a different issue
altogether.

One thing that you could try is dropping the '.cra_alignmask' line as
we don't actually need it, but I am skeptical that this is the cause
of this.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] crypto: aesni - add ccm(aes) algorithm implementation
  2020-12-01  6:32   ` Ard Biesheuvel
@ 2020-12-01 14:26     ` Ben Greear
  0 siblings, 0 replies; 5+ messages in thread
From: Ben Greear @ 2020-12-01 14:26 UTC (permalink / raw)
  To: Ard Biesheuvel; +Cc: Linux Crypto Mailing List, Herbert Xu, Steve deRosier

On 11/30/20 10:32 PM, Ard Biesheuvel wrote:
> On Mon, 30 Nov 2020 at 23:48, Ben Greear <greearb@candelatech.com> wrote:
>>
>> On 11/29/20 10:20 AM, Ard Biesheuvel wrote:
>>> From: Steve deRosier <ardb@kernel.org>
>>>
>>> Add ccm(aes) implementation from linux-wireless mailing list (see
>>> http://permalink.gmane.org/gmane.linux.kernel.wireless.general/126679).
>>>
>>> This eliminates FPU context store/restore overhead existing in more
>>> general ccm_base(ctr(aes-aesni),aes-aesni) case in MAC calculation.
>>>
>>> Suggested-by: Ben Greear <greearb@candelatech.com>
>>> Co-developed-by: Steve deRosier <derosier@cal-sierra.com>
>>> Signed-off-by: Steve deRosier <derosier@cal-sierra.com>
>>> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
>>> ---
>>> Ben,
>>>
>>> This is almost a rewrite of the original patch, switching to the new
>>> skcipher API, using the existing SIMD helper, and drop numerous unrelated
>>> changes. The basic approach is almost identical, though, so I expect this
>>> to perform on par or perhaps slightly faster than the original.
>>>
>>> Could you please confirm with some numbers?
>>
>> I tried this on my apu2 platform, here is perf top during a TCP download using
>> rx-sw-crypt (ie, the aesni cpu decrypt path):
>>
>>     18.77%  [kernel]                            [k] acpi_idle_enter
>>     14.68%  [kernel]                            [k] kernel_fpu_begin
>>      4.45%  [kernel]                            [k] __crypto_xor
>>      3.46%  [kernel]                            [k] _aesni_enc1
>>
>> Total throughput is 127Mbps or so.  This is with your patch applied to 5.8.0+
>> kernel (it applied clean with 'git am')
>>
>> Is there a good way to verify at runtime that I've properly applied your patch?
>>
>> On my 5.4 kernel with the old version of the patch installed, I see 253Mbps throughput,
>> and perf-top shows:
>>
>>     13.33%  [kernel]                            [k] acpi_idle_do_entry
>>      9.21%  [kernel]                            [k] _aesni_enc1
>>      4.49%  [unknown]                           [.] 0x00007fbc3f00adb6
>>      4.34%  [unknown]                           [.] 0x00007fbc3f00adba
>>      3.85%  [kernel]                            [k] memcpy
>>
>>
>> So, new patch is not working that well for me...
>>
> 
> That is odd. The net number of invocations of kernel_fpu_begin()
> should be the same, so I cannot explain why they suddenly take more
> time. I am starting to think that this is a different issue
> altogether.
> 
> One thing that you could try is dropping the '.cra_alignmask' line as
> we don't actually need it, but I am skeptical that this is the cause
> of this.

Here is tcrypt output from the 5.8 kernel with your patch:

               testing speed of rfc4106(gcm(aes)) (rfc4106-gcm-aesni) encryption
[54886.223056] test 0 (160 bit key, 16 byte blocks):
[54887.222241] 723747 operations in 1 seconds (11579952 bytes)
[54887.222274] test 1 (160 bit key, 64 byte blocks):
[54888.222216] 676632 operations in 1 seconds (43304448 bytes)
[54888.222251] test 2 (160 bit key, 256 byte blocks):
[54889.222178] 485715 operations in 1 seconds (124343040 bytes)
[54889.222197] test 3 (160 bit key, 512 byte blocks):
[54890.222169] 355708 operations in 1 seconds (182122496 bytes)
[54890.222188] test 4 (160 bit key, 1024 byte blocks):
[54891.222190] 237094 operations in 1 seconds (242784256 bytes)
[54891.222210] test 5 (160 bit key, 2048 byte blocks):
[54892.222169] 151576 operations in 1 seconds (310427648 bytes)
[54892.222189] test 6 (160 bit key, 4096 byte blocks):
[54893.222182] 89871 operations in 1 seconds (368111616 bytes)
[54893.222230] test 7 (160 bit key, 8192 byte blocks):
[54894.222144] 47446 operations in 1 seconds (388677632 bytes)
[54894.232292]
                testing speed of gcm(aes) (generic-gcm-aesni) encryption
[54894.232310] test 0 (128 bit key, 16 byte blocks):
[54895.232121] 744793 operations in 1 seconds (11916688 bytes)
[54895.232139] test 1 (128 bit key, 64 byte blocks):
[54896.232147] 693209 operations in 1 seconds (44365376 bytes)
[54896.232166] test 2 (128 bit key, 256 byte blocks):
[54897.232108] 494839 operations in 1 seconds (126678784 bytes)
[54897.232127] test 3 (128 bit key, 512 byte blocks):
[54898.232129] 356805 operations in 1 seconds (182684160 bytes)
[54898.232148] test 4 (128 bit key, 1024 byte blocks):
[54899.232093] 238977 operations in 1 seconds (244712448 bytes)
[54899.232112] test 5 (128 bit key, 2048 byte blocks):
[54900.232086] 151400 operations in 1 seconds (310067200 bytes)
[54900.232107] test 6 (128 bit key, 4096 byte blocks):
[54901.232080] 88499 operations in 1 seconds (362491904 bytes)
[54901.232128] test 7 (128 bit key, 8192 byte blocks):
[54902.232073] 46508 operations in 1 seconds (380993536 bytes)
[54902.232093] test 8 (192 bit key, 16 byte blocks):
[54903.232055] 734289 operations in 1 seconds (11748624 bytes)
[54903.232074] test 9 (192 bit key, 64 byte blocks):
[54904.232046] 676257 operations in 1 seconds (43280448 bytes)
[54904.232066] test 10 (192 bit key, 256 byte blocks):
[54905.232037] 480367 operations in 1 seconds (122973952 bytes)
[54905.232057] test 11 (192 bit key, 512 byte blocks):
[54906.232028] 344775 operations in 1 seconds (176524800 bytes)
[54906.232048] test 12 (192 bit key, 1024 byte blocks):
[54907.232021] 246743 operations in 1 seconds (252664832 bytes)
[54907.232041] test 13 (192 bit key, 2048 byte blocks):
[54908.232013] 149042 operations in 1 seconds (305238016 bytes)
[54908.232033] test 14 (192 bit key, 4096 byte blocks):
[54909.232034] 83689 operations in 1 seconds (342790144 bytes)
[54909.232053] test 15 (192 bit key, 8192 byte blocks):
[54910.232004] 43424 operations in 1 seconds (355729408 bytes)
[54910.232042] test 16 (256 bit key, 16 byte blocks):
[54911.232030] 720990 operations in 1 seconds (11535840 bytes)
[54911.232050] test 17 (256 bit key, 64 byte blocks):
[54912.232006] 666866 operations in 1 seconds (42679424 bytes)
[54912.232054] test 18 (256 bit key, 256 byte blocks):
[54913.231997] 459305 operations in 1 seconds (117582080 bytes)
[54913.232018] test 19 (256 bit key, 512 byte blocks):
[54914.231958] 322779 operations in 1 seconds (165262848 bytes)
[54914.231979] test 20 (256 bit key, 1024 byte blocks):
[54915.231970] 229525 operations in 1 seconds (235033600 bytes)
[54915.231990] test 21 (256 bit key, 2048 byte blocks):
[54916.231975] 137955 operations in 1 seconds (282531840 bytes)
[54916.231995] test 22 (256 bit key, 4096 byte blocks):
[54917.231998] 75876 operations in 1 seconds (310788096 bytes)
[54917.232035] test 23 (256 bit key, 8192 byte blocks):
[54918.231938] 39803 operations in 1 seconds (326066176 bytes)
[54918.232046]
                testing speed of rfc4106(gcm(aes)) (rfc4106-gcm-aesni) decryption
[54918.232060] test 0 (160 bit key, 16 byte blocks):
[54919.231914] 711193 operations in 1 seconds (11379088 bytes)
[54919.231933] test 1 (160 bit key, 64 byte blocks):
[54920.231912] 683171 operations in 1 seconds (43722944 bytes)
[54920.231932] test 2 (160 bit key, 256 byte blocks):
[54921.231926] 490569 operations in 1 seconds (125585664 bytes)
[54921.231946] test 3 (160 bit key, 512 byte blocks):
[54922.231904] 354731 operations in 1 seconds (181622272 bytes)
[54922.231938] test 4 (160 bit key, 1024 byte blocks):
[54923.231879] 236161 operations in 1 seconds (241828864 bytes)
[54923.231930] test 5 (160 bit key, 2048 byte blocks):
[54924.231897] 148859 operations in 1 seconds (304863232 bytes)
[54924.231917] test 6 (160 bit key, 4096 byte blocks):
[54925.231866] 87114 operations in 1 seconds (356818944 bytes)
[54925.231885] test 7 (160 bit key, 8192 byte blocks):
[54926.231888] 46273 operations in 1 seconds (379068416 bytes)
[54926.232049]
                testing speed of gcm(aes) (generic-gcm-aesni) decryption
[54926.232064] test 0 (128 bit key, 16 byte blocks):
[54927.231841] 743417 operations in 1 seconds (11894672 bytes)
[54927.231892] test 1 (128 bit key, 64 byte blocks):
[54928.231832] 708360 operations in 1 seconds (45335040 bytes)
[54928.231851] test 2 (128 bit key, 256 byte blocks):
[54929.231853] 501092 operations in 1 seconds (128279552 bytes)
[54929.231872] test 3 (128 bit key, 512 byte blocks):
[54930.231830] 362779 operations in 1 seconds (185742848 bytes)
[54930.231848] test 4 (128 bit key, 1024 byte blocks):
[54931.231808] 238285 operations in 1 seconds (244003840 bytes)
[54931.231828] test 5 (128 bit key, 2048 byte blocks):
[54932.231800] 149171 operations in 1 seconds (305502208 bytes)
[54932.231849] test 6 (128 bit key, 4096 byte blocks):
[54933.231821] 87536 operations in 1 seconds (358547456 bytes)
[54933.231841] test 7 (128 bit key, 8192 byte blocks):
[54934.231783] 46091 operations in 1 seconds (377577472 bytes)
[54934.231803] test 8 (192 bit key, 16 byte blocks):
[54935.231773] 730135 operations in 1 seconds (11682160 bytes)
[54935.231792] test 9 (192 bit key, 64 byte blocks):
[54936.231762] 694952 operations in 1 seconds (44476928 bytes)
[54936.231782] test 10 (192 bit key, 256 byte blocks):
[54937.231754] 479033 operations in 1 seconds (122632448 bytes)
[54937.231774] test 11 (192 bit key, 512 byte blocks):
[54938.231747] 339268 operations in 1 seconds (173705216 bytes)
[54938.231767] test 12 (192 bit key, 1024 byte blocks):
[54939.231744] 216619 operations in 1 seconds (221817856 bytes)
[54939.231763] test 13 (192 bit key, 2048 byte blocks):
[54940.231758] 136358 operations in 1 seconds (279261184 bytes)
[54940.231778] test 14 (192 bit key, 4096 byte blocks):
[54941.231719] 79845 operations in 1 seconds (327045120 bytes)
[54941.231756] test 15 (192 bit key, 8192 byte blocks):
[54942.231740] 42121 operations in 1 seconds (345055232 bytes)
[54942.231761] test 16 (256 bit key, 16 byte blocks):
[54943.231712] 718082 operations in 1 seconds (11489312 bytes)
[54943.231733] test 17 (256 bit key, 64 byte blocks):
[54944.231691] 677413 operations in 1 seconds (43354432 bytes)
[54944.231711] test 18 (256 bit key, 256 byte blocks):
[54945.231683] 463746 operations in 1 seconds (118718976 bytes)
[54945.231703] test 19 (256 bit key, 512 byte blocks):
[54946.231710] 321881 operations in 1 seconds (164803072 bytes)
[54946.231744] test 20 (256 bit key, 1024 byte blocks):
[54947.231667] 224947 operations in 1 seconds (230345728 bytes)
[54947.231687] test 21 (256 bit key, 2048 byte blocks):
[54948.231661] 136130 operations in 1 seconds (278794240 bytes)
[54948.231681] test 22 (256 bit key, 4096 byte blocks):
[54949.231667] 75775 operations in 1 seconds (310374400 bytes)
[54949.231701] test 23 (256 bit key, 8192 byte blocks):
[54950.231677] 39429 operations in 1 seconds (323002368 bytes)


And here is 5.4 with the old patch:

                testing speed of rfc4106(gcm(aes)) (rfc4106-gcm-aesni) encryption
[  189.151375] test 0 (160 bit key, 16 byte blocks):
[  190.150706] 813049 operations in 1 seconds (13008784 bytes)
[  190.150725] test 1 (160 bit key, 64 byte blocks):
[  191.150708] 774554 operations in 1 seconds (49571456 bytes)
[  191.150726] test 2 (160 bit key, 256 byte blocks):
[  192.150714] 532955 operations in 1 seconds (136436480 bytes)
[  192.150732] test 3 (160 bit key, 512 byte blocks):
[  193.150663] 376599 operations in 1 seconds (192818688 bytes)
[  193.150681] test 4 (160 bit key, 1024 byte blocks):
[  194.150655] 262476 operations in 1 seconds (268775424 bytes)
[  194.150703] test 5 (160 bit key, 2048 byte blocks):
[  195.150673] 160616 operations in 1 seconds (328941568 bytes)
[  195.150693] test 6 (160 bit key, 4096 byte blocks):
[  196.150667] 90413 operations in 1 seconds (370331648 bytes)
[  196.150687] test 7 (160 bit key, 8192 byte blocks):
[  197.150658] 47446 operations in 1 seconds (388677632 bytes)
[  197.150783]
                testing speed of gcm(aes) (generic-gcm-aesni) encryption
[  197.150797] test 0 (128 bit key, 16 byte blocks):
[  198.150641] 851015 operations in 1 seconds (13616240 bytes)
[  198.150660] test 1 (128 bit key, 64 byte blocks):
[  199.150629] 815656 operations in 1 seconds (52201984 bytes)
[  199.150648] test 2 (128 bit key, 256 byte blocks):
[  200.150617] 553263 operations in 1 seconds (141635328 bytes)
[  200.150675] test 3 (128 bit key, 512 byte blocks):
[  201.150611] 386949 operations in 1 seconds (198117888 bytes)
[  201.150660] test 4 (128 bit key, 1024 byte blocks):
[  202.150601] 268681 operations in 1 seconds (275129344 bytes)
[  202.150635] test 5 (128 bit key, 2048 byte blocks):
[  203.150588] 162482 operations in 1 seconds (332763136 bytes)
[  203.150607] test 6 (128 bit key, 4096 byte blocks):
[  204.150549] 92852 operations in 1 seconds (380321792 bytes)
[  204.150569] test 7 (128 bit key, 8192 byte blocks):
[  205.150571] 48214 operations in 1 seconds (394969088 bytes)
[  205.150592] test 8 (192 bit key, 16 byte blocks):
[  206.150526] 832863 operations in 1 seconds (13325808 bytes)
[  206.150546] test 9 (192 bit key, 64 byte blocks):
[  207.150545] 784489 operations in 1 seconds (50207296 bytes)
[  207.150566] test 10 (192 bit key, 256 byte blocks):
[  208.150506] 530243 operations in 1 seconds (135742208 bytes)
[  208.150526] test 11 (192 bit key, 512 byte blocks):
[  209.150506] 366099 operations in 1 seconds (187442688 bytes)
[  209.150532] test 12 (192 bit key, 1024 byte blocks):
[  210.150488] 250462 operations in 1 seconds (256473088 bytes)
[  210.150509] test 13 (192 bit key, 2048 byte blocks):
[  211.150486] 151644 operations in 1 seconds (310566912 bytes)
[  211.150507] test 14 (192 bit key, 4096 byte blocks):
[  212.150474] 84226 operations in 1 seconds (344989696 bytes)
[  212.150494] test 15 (192 bit key, 8192 byte blocks):
[  213.150560] 43609 operations in 1 seconds (357244928 bytes)
[  213.150581] test 16 (256 bit key, 16 byte blocks):
[  214.150445] 804817 operations in 1 seconds (12877072 bytes)
[  214.150464] test 17 (256 bit key, 64 byte blocks):
[  215.150447] 764872 operations in 1 seconds (48951808 bytes)
[  215.150467] test 18 (256 bit key, 256 byte blocks):
[  216.150451] 501522 operations in 1 seconds (128389632 bytes)
[  216.150471] test 19 (256 bit key, 512 byte blocks):
[  217.150463] 339614 operations in 1 seconds (173882368 bytes)
[  217.150495] test 20 (256 bit key, 1024 byte blocks):
[  218.150406] 238889 operations in 1 seconds (244622336 bytes)
[  218.150426] test 21 (256 bit key, 2048 byte blocks):
[  219.150406] 141513 operations in 1 seconds (289818624 bytes)
[  219.150426] test 22 (256 bit key, 4096 byte blocks):
[  220.150432] 77995 operations in 1 seconds (319467520 bytes)
[  220.150453] test 23 (256 bit key, 8192 byte blocks):
[  221.150410] 40279 operations in 1 seconds (329965568 bytes)
[  221.150546]
                testing speed of rfc4106(gcm(aes)) (rfc4106-gcm-aesni) decryption
[  221.150561] test 0 (160 bit key, 16 byte blocks):
[  222.150393] 758689 operations in 1 seconds (12139024 bytes)
[  222.150426] test 1 (160 bit key, 64 byte blocks):
[  223.150351] 599877 operations in 1 seconds (38392128 bytes)
[  223.150399] test 2 (160 bit key, 256 byte blocks):
[  224.150339] 453279 operations in 1 seconds (116039424 bytes)
[  224.150360] test 3 (160 bit key, 512 byte blocks):
[  225.150367] 332659 operations in 1 seconds (170321408 bytes)
[  225.150392] test 4 (160 bit key, 1024 byte blocks):
[  226.150375] 258949 operations in 1 seconds (265163776 bytes)
[  226.150394] test 5 (160 bit key, 2048 byte blocks):
[  227.150345] 157536 operations in 1 seconds (322633728 bytes)
[  227.150382] test 6 (160 bit key, 4096 byte blocks):
[  228.150341] 89150 operations in 1 seconds (365158400 bytes)
[  228.150360] test 7 (160 bit key, 8192 byte blocks):
[  229.150291] 46679 operations in 1 seconds (382394368 bytes)
[  229.150420]
                testing speed of gcm(aes) (generic-gcm-aesni) decryption
[  229.150435] test 0 (128 bit key, 16 byte blocks):
[  230.150312] 784010 operations in 1 seconds (12544160 bytes)
[  230.150331] test 1 (128 bit key, 64 byte blocks):
[  231.150271] 616765 operations in 1 seconds (39472960 bytes)
[  231.150290] test 2 (128 bit key, 256 byte blocks):
[  232.150251] 456053 operations in 1 seconds (116749568 bytes)
[  232.150271] test 3 (128 bit key, 512 byte blocks):
[  233.150245] 339125 operations in 1 seconds (173632000 bytes)
[  233.150264] test 4 (128 bit key, 1024 byte blocks):
[  234.150251] 260288 operations in 1 seconds (266534912 bytes)
[  234.150300] test 5 (128 bit key, 2048 byte blocks):
[  235.150225] 158126 operations in 1 seconds (323842048 bytes)
[  235.150245] test 6 (128 bit key, 4096 byte blocks):
[  236.150203] 89756 operations in 1 seconds (367640576 bytes)
[  236.150222] test 7 (128 bit key, 8192 byte blocks):
[  237.150238] 46408 operations in 1 seconds (380174336 bytes)
[  237.150258] test 8 (192 bit key, 16 byte blocks):
[  238.150185] 767710 operations in 1 seconds (12283360 bytes)
[  238.150204] test 9 (192 bit key, 64 byte blocks):
[  239.150223] 602290 operations in 1 seconds (38546560 bytes)
[  239.150243] test 10 (192 bit key, 256 byte blocks):
[  240.150156] 440038 operations in 1 seconds (112649728 bytes)
[  240.150177] test 11 (192 bit key, 512 byte blocks):
[  241.150144] 321800 operations in 1 seconds (164761600 bytes)
[  241.150164] test 12 (192 bit key, 1024 byte blocks):
[  242.150137] 213059 operations in 1 seconds (218172416 bytes)
[  242.150186] test 13 (192 bit key, 2048 byte blocks):
[  243.150119] 134641 operations in 1 seconds (275744768 bytes)
[  243.150138] test 14 (192 bit key, 4096 byte blocks):
[  244.150110] 78540 operations in 1 seconds (321699840 bytes)
[  244.150131] test 15 (192 bit key, 8192 byte blocks):
[  245.150124] 41604 operations in 1 seconds (340819968 bytes)
[  245.150144] test 16 (256 bit key, 16 byte blocks):
[  246.150143] 749367 operations in 1 seconds (11989872 bytes)
[  246.150179] test 17 (256 bit key, 64 byte blocks):
[  247.150101] 584427 operations in 1 seconds (37403328 bytes)
[  247.150121] test 18 (256 bit key, 256 byte blocks):
[  248.150087] 427519 operations in 1 seconds (109444864 bytes)
[  248.150107] test 19 (256 bit key, 512 byte blocks):
[  249.150046] 309171 operations in 1 seconds (158295552 bytes)
[  249.150065] test 20 (256 bit key, 1024 byte blocks):
[  250.150058] 236908 operations in 1 seconds (242593792 bytes)
[  250.150078] test 21 (256 bit key, 2048 byte blocks):
[  251.150027] 139251 operations in 1 seconds (285186048 bytes)
[  251.150048] test 22 (256 bit key, 4096 byte blocks):
[  252.150066] 76453 operations in 1 seconds (313151488 bytes)
[  252.150118] test 23 (256 bit key, 8192 byte blocks):
[  253.150039] 39852 operations in 1 seconds (326467584 bytes)


Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-12-01 14:27 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-29 18:20 [PATCH] crypto: aesni - add ccm(aes) algorithm implementation Ard Biesheuvel
2020-11-29 18:21 ` Ard Biesheuvel
2020-11-30 22:47 ` Ben Greear
2020-12-01  6:32   ` Ard Biesheuvel
2020-12-01 14:26     ` Ben Greear

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.