linux-crypto.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/4] crypto: time invariant AES for CCM (and GCM/CTR)
@ 2017-01-26 17:17 Ard Biesheuvel
  2017-01-26 17:17 ` [PATCH 1/4] crypto: testmgr - add test cases for cbcmac(aes) Ard Biesheuvel
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Ard Biesheuvel @ 2017-01-26 17:17 UTC (permalink / raw)
  To: linux-crypto, linux-arm-kernel; +Cc: herbert, Ard Biesheuvel

This series is primarily directed at improving the performance and security
of CCM on the Rasperry Pi 3. This involves splitting the MAC handling of
CCM into a separate driver so that we can efficiently replace it by something
else using the ordinary algo resolution machinery.

Patch #1 adds some testcases for cbcmac(aes), which will be introduced later.

Patch #2 replaces the open coded CBC MAC hashing routines in the CCM driver
with calls to a cbcmac() hash, and implements a template for producing such
transforms. This eliminates all the fuzzy scatterwalk code as well.

Patch #3 implements cbcmac(aes) using NEON on arm64

Patch #4 is an RFC patch that implements ctr(aes) and cbcmac(aes) in a way
that is intended to eliminate observeable data dependent latencies in AES
processing, by replacing the usual 16 KB of lookup tables with a single
Sbox that is prefetched before processing each block. It is 50% slower than
generic AES, but this may be acceptable in many cases.

Ard Biesheuvel (4):
  crypto: testmgr - add test cases for cbcmac(aes)
  crypto: ccm - switch to separate cbcmac driver
  crypto: arm64/aes - add NEON and Crypto Extension CBC-MAC driver
  crypto: aes - add generic time invariant AES for CTR/CCM/GCM

 arch/arm64/crypto/aes-glue.c  | 102 ++++++
 arch/arm64/crypto/aes-modes.S |  19 +
 crypto/Kconfig                |  15 +
 crypto/Makefile               |   1 +
 crypto/aes_ti.c               | 314 ++++++++++++++++
 crypto/ccm.c                  | 373 +++++++++++++-------
 crypto/testmgr.c              |   7 +
 crypto/testmgr.h              |  58 +++
 8 files changed, 753 insertions(+), 136 deletions(-)
 create mode 100644 crypto/aes_ti.c

-- 
2.7.4

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/4] crypto: testmgr - add test cases for cbcmac(aes)
  2017-01-26 17:17 [PATCH 0/4] crypto: time invariant AES for CCM (and GCM/CTR) Ard Biesheuvel
@ 2017-01-26 17:17 ` Ard Biesheuvel
  2017-01-26 17:17 ` [PATCH 2/4] crypto: ccm - switch to separate cbcmac driver Ard Biesheuvel
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 8+ messages in thread
From: Ard Biesheuvel @ 2017-01-26 17:17 UTC (permalink / raw)
  To: linux-crypto, linux-arm-kernel; +Cc: herbert, Ard Biesheuvel

In preparation of splitting off the CBC-MAC transform in the CCM
driver into a separate algorithm, define some test cases for the
AES incarnation of cbcmac.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 crypto/testmgr.c |  7 +++
 crypto/testmgr.h | 58 ++++++++++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 98eb09782db8..f9c378af3907 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -2514,6 +2514,13 @@ static const struct alg_test_desc alg_test_descs[] = {
 			}
 		}
 	}, {
+		.alg = "cbcmac(aes)",
+		.fips_allowed = 1,
+		.test = alg_test_hash,
+		.suite = {
+			.hash = __VECS(aes_cbcmac_tv_template)
+		}
+	}, {
 		.alg = "ccm(aes)",
 		.test = alg_test_aead,
 		.fips_allowed = 1,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 64595f067d72..ed6b09978611 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -3413,6 +3413,64 @@ static struct hash_testvec aes_cmac128_tv_template[] = {
 	}
 };
 
+static struct hash_testvec aes_cbcmac_tv_template[] = {
+	{
+		.key		= "\x2b\x7e\x15\x16\x28\xae\xd2\xa6"
+				  "\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
+		.plaintext	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+				  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a",
+		.digest		= "\x3a\xd7\x7b\xb4\x0d\x7a\x36\x60"
+				  "\xa8\x9e\xca\xf3\x24\x66\xef\x97",
+		.psize		= 16,
+		.ksize		= 16,
+	}, {
+		.key		= "\x2b\x7e\x15\x16\x28\xae\xd2\xa6"
+				  "\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
+		.plaintext	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+				  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+				  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+				  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+				  "\x30",
+		.digest		= "\x9d\x0d\xd0\x63\xfb\xcb\x24\x43"
+				  "\xf8\xf2\x76\x03\xac\x39\xb0\x9d",
+		.psize		= 33,
+		.ksize		= 16,
+	}, {
+		.key		= "\x2b\x7e\x15\x16\x28\xae\xd2\xa6"
+				  "\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
+		.plaintext	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+				  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+				  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+				  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+				  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+				  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+				  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+				  "\xad\x2b\x41\x7b\xe6\x6c\x37",
+		.digest		= "\xc0\x71\x73\xb8\xa0\x2c\x11\x7c"
+				  "\xaf\xdc\xb2\xf8\x89\x32\xa3\x3a",
+		.psize		= 63,
+		.ksize		= 16,
+	}, {
+		.key		= "\x60\x3d\xeb\x10\x15\xca\x71\xbe"
+				  "\x2b\x73\xae\xf0\x85\x7d\x77\x81"
+				  "\x1f\x35\x2c\x07\x3b\x61\x08\xd7"
+				  "\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
+		.plaintext	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+				  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+				  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+				  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+				  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+				  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+				  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+				  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10"
+				  "\x1c",
+		.digest		= "\x6a\x4e\xdb\x21\x47\x51\xdf\x4f"
+				  "\xa8\x4d\x4c\x10\x3b\x72\x7d\xd6",
+		.psize		= 65,
+		.ksize		= 32,
+	}
+};
+
 static struct hash_testvec des3_ede_cmac64_tv_template[] = {
 /*
  * From NIST Special Publication 800-38B, Three Key TDEA
-- 
2.7.4

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 2/4] crypto: ccm - switch to separate cbcmac driver
  2017-01-26 17:17 [PATCH 0/4] crypto: time invariant AES for CCM (and GCM/CTR) Ard Biesheuvel
  2017-01-26 17:17 ` [PATCH 1/4] crypto: testmgr - add test cases for cbcmac(aes) Ard Biesheuvel
@ 2017-01-26 17:17 ` Ard Biesheuvel
  2017-01-27  9:41   ` Ard Biesheuvel
  2017-01-26 17:17 ` [PATCH 3/4] crypto: arm64/aes - add NEON and Crypto Extension CBC-MAC driver Ard Biesheuvel
  2017-01-26 17:17 ` [RFC PATCH 4/4] crypto: aes - add generic time invariant AES for CTR/CCM/GCM Ard Biesheuvel
  3 siblings, 1 reply; 8+ messages in thread
From: Ard Biesheuvel @ 2017-01-26 17:17 UTC (permalink / raw)
  To: linux-crypto, linux-arm-kernel; +Cc: herbert, Ard Biesheuvel

Update the generic CCM driver to defer CBC-MAC processing to a
dedicated CBC-MAC ahash transform rather than open coding this
transform (and much of the associated scatterwalk plumbing) in
the CCM driver itself.

This cleans up the code considerably, but more importantly, it allows
the use of alternative CBC-MAC implementations that don't suffer from
performance degradation due to significant setup time (e.g., the NEON
based AES code needs to load the entire S-box into SIMD registers, which
cannot be amortized over the entire input when using the AES cipher
directly)

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 crypto/Kconfig |   1 +
 crypto/ccm.c   | 373 +++++++++++++-------
 2 files changed, 238 insertions(+), 136 deletions(-)

diff --git a/crypto/Kconfig b/crypto/Kconfig
index 160f08e721cc..e8269d1b0282 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -263,6 +263,7 @@ comment "Authenticated Encryption with Associated Data"
 config CRYPTO_CCM
 	tristate "CCM support"
 	select CRYPTO_CTR
+	select CRYPTO_HASH
 	select CRYPTO_AEAD
 	help
 	  Support for Counter with CBC MAC. Required for IPsec.
diff --git a/crypto/ccm.c b/crypto/ccm.c
index 26b924d1e582..635f11fc52e7 100644
--- a/crypto/ccm.c
+++ b/crypto/ccm.c
@@ -11,6 +11,7 @@
  */
 
 #include <crypto/internal/aead.h>
+#include <crypto/internal/hash.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/scatterwalk.h>
 #include <linux/err.h>
@@ -23,11 +24,11 @@
 
 struct ccm_instance_ctx {
 	struct crypto_skcipher_spawn ctr;
-	struct crypto_spawn cipher;
+	struct crypto_ahash_spawn mac;
 };
 
 struct crypto_ccm_ctx {
-	struct crypto_cipher *cipher;
+	struct crypto_ahash *mac;
 	struct crypto_skcipher *ctr;
 };
 
@@ -44,7 +45,6 @@ struct crypto_rfc4309_req_ctx {
 
 struct crypto_ccm_req_priv_ctx {
 	u8 odata[16];
-	u8 idata[16];
 	u8 auth_tag[16];
 	u32 ilen;
 	u32 flags;
@@ -53,6 +53,15 @@ struct crypto_ccm_req_priv_ctx {
 	struct skcipher_request skreq;
 };
 
+struct cbcmac_tfm_ctx {
+	struct crypto_cipher *child;
+};
+
+struct cbcmac_desc_ctx {
+	unsigned int len;
+	u8 dg[];
+};
+
 static inline struct crypto_ccm_req_priv_ctx *crypto_ccm_reqctx(
 	struct aead_request *req)
 {
@@ -84,7 +93,7 @@ static int crypto_ccm_setkey(struct crypto_aead *aead, const u8 *key,
 {
 	struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead);
 	struct crypto_skcipher *ctr = ctx->ctr;
-	struct crypto_cipher *tfm = ctx->cipher;
+	struct crypto_ahash *mac = ctx->mac;
 	int err = 0;
 
 	crypto_skcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK);
@@ -96,11 +105,11 @@ static int crypto_ccm_setkey(struct crypto_aead *aead, const u8 *key,
 	if (err)
 		goto out;
 
-	crypto_cipher_clear_flags(tfm, CRYPTO_TFM_REQ_MASK);
-	crypto_cipher_set_flags(tfm, crypto_aead_get_flags(aead) &
+	crypto_ahash_clear_flags(mac, CRYPTO_TFM_REQ_MASK);
+	crypto_ahash_set_flags(mac, crypto_aead_get_flags(aead) &
 				    CRYPTO_TFM_REQ_MASK);
-	err = crypto_cipher_setkey(tfm, key, keylen);
-	crypto_aead_set_flags(aead, crypto_cipher_get_flags(tfm) &
+	err = crypto_ahash_setkey(mac, key, keylen);
+	crypto_aead_set_flags(aead, crypto_ahash_get_flags(mac) &
 			      CRYPTO_TFM_RES_MASK);
 
 out:
@@ -167,119 +176,59 @@ static int format_adata(u8 *adata, unsigned int a)
 	return len;
 }
 
-static void compute_mac(struct crypto_cipher *tfm, u8 *data, int n,
-		       struct crypto_ccm_req_priv_ctx *pctx)
-{
-	unsigned int bs = 16;
-	u8 *odata = pctx->odata;
-	u8 *idata = pctx->idata;
-	int datalen, getlen;
-
-	datalen = n;
-
-	/* first time in here, block may be partially filled. */
-	getlen = bs - pctx->ilen;
-	if (datalen >= getlen) {
-		memcpy(idata + pctx->ilen, data, getlen);
-		crypto_xor(odata, idata, bs);
-		crypto_cipher_encrypt_one(tfm, odata, odata);
-		datalen -= getlen;
-		data += getlen;
-		pctx->ilen = 0;
-	}
-
-	/* now encrypt rest of data */
-	while (datalen >= bs) {
-		crypto_xor(odata, data, bs);
-		crypto_cipher_encrypt_one(tfm, odata, odata);
-
-		datalen -= bs;
-		data += bs;
-	}
-
-	/* check and see if there's leftover data that wasn't
-	 * enough to fill a block.
-	 */
-	if (datalen) {
-		memcpy(idata + pctx->ilen, data, datalen);
-		pctx->ilen += datalen;
-	}
-}
-
-static void get_data_to_compute(struct crypto_cipher *tfm,
-			       struct crypto_ccm_req_priv_ctx *pctx,
-			       struct scatterlist *sg, unsigned int len)
-{
-	struct scatter_walk walk;
-	u8 *data_src;
-	int n;
-
-	scatterwalk_start(&walk, sg);
-
-	while (len) {
-		n = scatterwalk_clamp(&walk, len);
-		if (!n) {
-			scatterwalk_start(&walk, sg_next(walk.sg));
-			n = scatterwalk_clamp(&walk, len);
-		}
-		data_src = scatterwalk_map(&walk);
-
-		compute_mac(tfm, data_src, n, pctx);
-		len -= n;
-
-		scatterwalk_unmap(data_src);
-		scatterwalk_advance(&walk, n);
-		scatterwalk_done(&walk, 0, len);
-		if (len)
-			crypto_yield(pctx->flags);
-	}
-
-	/* any leftover needs padding and then encrypted */
-	if (pctx->ilen) {
-		int padlen;
-		u8 *odata = pctx->odata;
-		u8 *idata = pctx->idata;
-
-		padlen = 16 - pctx->ilen;
-		memset(idata + pctx->ilen, 0, padlen);
-		crypto_xor(odata, idata, 16);
-		crypto_cipher_encrypt_one(tfm, odata, odata);
-		pctx->ilen = 0;
-	}
-}
-
 static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain,
 			   unsigned int cryptlen)
 {
+	struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req);
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead);
-	struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req);
-	struct crypto_cipher *cipher = ctx->cipher;
+	AHASH_REQUEST_ON_STACK(ahreq, ctx->mac);
 	unsigned int assoclen = req->assoclen;
-	u8 *odata = pctx->odata;
-	u8 *idata = pctx->idata;
-	int err;
+	struct scatterlist sg[3];
+	u8 odata[16];
+	u8 idata[16];
+	int ilen, err;
 
 	/* format control data for input */
 	err = format_input(odata, req, cryptlen);
 	if (err)
 		goto out;
 
-	/* encrypt first block to use as start in computing mac  */
-	crypto_cipher_encrypt_one(cipher, odata, odata);
+	sg_init_table(sg, 3);
+	sg_set_buf(&sg[0], odata, 16);
 
 	/* format associated data and compute into mac */
 	if (assoclen) {
-		pctx->ilen = format_adata(idata, assoclen);
-		get_data_to_compute(cipher, pctx, req->src, req->assoclen);
+		ilen = format_adata(idata, assoclen);
+		sg_set_buf(&sg[1], idata, ilen);
+		sg_chain(sg, 3, req->src);
 	} else {
-		pctx->ilen = 0;
+		ilen = 0;
+		sg_chain(sg, 2, req->src);
 	}
 
-	/* compute plaintext into mac */
-	if (cryptlen)
-		get_data_to_compute(cipher, pctx, plain, cryptlen);
+	ahash_request_set_tfm(ahreq, ctx->mac);
+	ahash_request_set_crypt(ahreq, sg, NULL, assoclen + ilen + 16);
+	err = crypto_ahash_init(ahreq);
+	if (err)
+		goto out;
+	err = crypto_ahash_update(ahreq);
+	if (err)
+		goto out;
 
+	/* we need to pad the MAC input to a round multiple of the block size */
+	ilen = 16 - (assoclen + ilen) % 16;
+	if (ilen < 16) {
+		memset(idata, 0, ilen);
+		sg_init_table(sg, 2);
+		sg_set_buf(&sg[0], idata, ilen);
+		sg_chain(sg, 2, plain);
+		plain = sg;
+		cryptlen += ilen;
+	}
+
+	ahash_request_set_crypt(ahreq, plain, pctx->odata, cryptlen);
+	err = crypto_ahash_finup(ahreq);
 out:
 	return err;
 }
@@ -453,21 +402,21 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm)
 	struct aead_instance *inst = aead_alg_instance(tfm);
 	struct ccm_instance_ctx *ictx = aead_instance_ctx(inst);
 	struct crypto_ccm_ctx *ctx = crypto_aead_ctx(tfm);
-	struct crypto_cipher *cipher;
+	struct crypto_ahash *mac;
 	struct crypto_skcipher *ctr;
 	unsigned long align;
 	int err;
 
-	cipher = crypto_spawn_cipher(&ictx->cipher);
-	if (IS_ERR(cipher))
-		return PTR_ERR(cipher);
+	mac = crypto_spawn_ahash(&ictx->mac);
+	if (IS_ERR(mac))
+		return PTR_ERR(mac);
 
 	ctr = crypto_spawn_skcipher(&ictx->ctr);
 	err = PTR_ERR(ctr);
 	if (IS_ERR(ctr))
-		goto err_free_cipher;
+		goto err_free_mac;
 
-	ctx->cipher = cipher;
+	ctx->mac = mac;
 	ctx->ctr = ctr;
 
 	align = crypto_aead_alignmask(tfm);
@@ -479,8 +428,8 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm)
 
 	return 0;
 
-err_free_cipher:
-	crypto_free_cipher(cipher);
+err_free_mac:
+	crypto_free_ahash(mac);
 	return err;
 }
 
@@ -488,7 +437,7 @@ static void crypto_ccm_exit_tfm(struct crypto_aead *tfm)
 {
 	struct crypto_ccm_ctx *ctx = crypto_aead_ctx(tfm);
 
-	crypto_free_cipher(ctx->cipher);
+	crypto_free_ahash(ctx->mac);
 	crypto_free_skcipher(ctx->ctr);
 }
 
@@ -496,7 +445,7 @@ static void crypto_ccm_free(struct aead_instance *inst)
 {
 	struct ccm_instance_ctx *ctx = aead_instance_ctx(inst);
 
-	crypto_drop_spawn(&ctx->cipher);
+	crypto_drop_ahash(&ctx->mac);
 	crypto_drop_skcipher(&ctx->ctr);
 	kfree(inst);
 }
@@ -505,12 +454,13 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
 				    struct rtattr **tb,
 				    const char *full_name,
 				    const char *ctr_name,
-				    const char *cipher_name)
+				    const char *mac_name)
 {
 	struct crypto_attr_type *algt;
 	struct aead_instance *inst;
 	struct skcipher_alg *ctr;
-	struct crypto_alg *cipher;
+	struct crypto_alg *mac_alg;
+	struct hash_alg_common *mac;
 	struct ccm_instance_ctx *ictx;
 	int err;
 
@@ -521,25 +471,26 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
 		return -EINVAL;
 
-	cipher = crypto_alg_mod_lookup(cipher_name,  CRYPTO_ALG_TYPE_CIPHER,
-				       CRYPTO_ALG_TYPE_MASK);
-	if (IS_ERR(cipher))
-		return PTR_ERR(cipher);
+	mac_alg = crypto_find_alg(mac_name, &crypto_ahash_type,
+				  CRYPTO_ALG_TYPE_HASH,
+				  CRYPTO_ALG_TYPE_AHASH_MASK |
+				  CRYPTO_ALG_ASYNC);
+	if (IS_ERR(mac_alg))
+		return PTR_ERR(mac_alg);
 
+	mac = __crypto_hash_alg_common(mac_alg);
 	err = -EINVAL;
-	if (cipher->cra_blocksize != 16)
-		goto out_put_cipher;
+	if (mac->digestsize != 16)
+		goto out_put_mac;
 
 	inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL);
 	err = -ENOMEM;
 	if (!inst)
-		goto out_put_cipher;
+		goto out_put_mac;
 
 	ictx = aead_instance_ctx(inst);
-
-	err = crypto_init_spawn(&ictx->cipher, cipher,
-				aead_crypto_instance(inst),
-				CRYPTO_ALG_TYPE_MASK);
+	err = crypto_init_ahash_spawn(&ictx->mac, mac,
+				      aead_crypto_instance(inst));
 	if (err)
 		goto err_free_inst;
 
@@ -548,7 +499,7 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
 				   crypto_requires_sync(algt->type,
 							algt->mask));
 	if (err)
-		goto err_drop_cipher;
+		goto err_drop_mac;
 
 	ctr = crypto_spawn_skcipher_alg(&ictx->ctr);
 
@@ -564,16 +515,16 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
 	err = -ENAMETOOLONG;
 	if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
 		     "ccm_base(%s,%s)", ctr->base.cra_driver_name,
-		     cipher->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+		     mac->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
 		goto err_drop_ctr;
 
 	memcpy(inst->alg.base.cra_name, full_name, CRYPTO_MAX_ALG_NAME);
 
 	inst->alg.base.cra_flags = ctr->base.cra_flags & CRYPTO_ALG_ASYNC;
-	inst->alg.base.cra_priority = (cipher->cra_priority +
+	inst->alg.base.cra_priority = (mac->base.cra_priority +
 				       ctr->base.cra_priority) / 2;
 	inst->alg.base.cra_blocksize = 1;
-	inst->alg.base.cra_alignmask = cipher->cra_alignmask |
+	inst->alg.base.cra_alignmask = mac->base.cra_alignmask |
 				       ctr->base.cra_alignmask |
 				       (__alignof__(u32) - 1);
 	inst->alg.ivsize = 16;
@@ -593,23 +544,24 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
 	if (err)
 		goto err_drop_ctr;
 
-out_put_cipher:
-	crypto_mod_put(cipher);
+out_put_mac:
+	crypto_mod_put(mac_alg);
 	return err;
 
 err_drop_ctr:
 	crypto_drop_skcipher(&ictx->ctr);
-err_drop_cipher:
-	crypto_drop_spawn(&ictx->cipher);
+err_drop_mac:
+	crypto_drop_ahash(&ictx->mac);
 err_free_inst:
 	kfree(inst);
-	goto out_put_cipher;
+	goto out_put_mac;
 }
 
 static int crypto_ccm_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
 	const char *cipher_name;
 	char ctr_name[CRYPTO_MAX_ALG_NAME];
+	char mac_name[CRYPTO_MAX_ALG_NAME];
 	char full_name[CRYPTO_MAX_ALG_NAME];
 
 	cipher_name = crypto_attr_alg_name(tb[1]);
@@ -620,12 +572,16 @@ static int crypto_ccm_create(struct crypto_template *tmpl, struct rtattr **tb)
 		     cipher_name) >= CRYPTO_MAX_ALG_NAME)
 		return -ENAMETOOLONG;
 
+	if (snprintf(mac_name, CRYPTO_MAX_ALG_NAME, "cbcmac(%s)",
+		     cipher_name) >= CRYPTO_MAX_ALG_NAME)
+		return -ENAMETOOLONG;
+
 	if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "ccm(%s)", cipher_name) >=
 	    CRYPTO_MAX_ALG_NAME)
 		return -ENAMETOOLONG;
 
 	return crypto_ccm_create_common(tmpl, tb, full_name, ctr_name,
-					cipher_name);
+					mac_name);
 }
 
 static struct crypto_template crypto_ccm_tmpl = {
@@ -899,14 +855,156 @@ static struct crypto_template crypto_rfc4309_tmpl = {
 	.module = THIS_MODULE,
 };
 
+static int crypto_cbcmac_digest_setkey(struct crypto_shash *parent,
+				     const u8 *inkey, unsigned int keylen)
+{
+	struct cbcmac_tfm_ctx *ctx = crypto_shash_ctx(parent);
+
+	return crypto_cipher_setkey(ctx->child, inkey, keylen);
+}
+
+static int crypto_cbcmac_digest_init(struct shash_desc *pdesc)
+{
+	struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
+	int bs = crypto_shash_digestsize(pdesc->tfm);
+
+	memset(ctx->dg, 0, bs);
+	ctx->len = 0;
+
+	return 0;
+}
+
+static int crypto_cbcmac_digest_update(struct shash_desc *pdesc, const u8 *p,
+				       unsigned int len)
+{
+	struct crypto_shash *parent = pdesc->tfm;
+	struct cbcmac_tfm_ctx *tctx = crypto_shash_ctx(parent);
+	struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
+	struct crypto_cipher *tfm = tctx->child;
+	int bs = crypto_shash_digestsize(parent);
+
+	while (len--) {
+		ctx->dg[ctx->len++] ^= *p++;
+
+		if (ctx->len == bs) {
+			crypto_cipher_encrypt_one(tfm, ctx->dg, ctx->dg);
+			ctx->len = 0;
+		}
+	}
+
+	return 0;
+}
+
+static int crypto_cbcmac_digest_final(struct shash_desc *pdesc, u8 *out)
+{
+	struct crypto_shash *parent = pdesc->tfm;
+	struct cbcmac_tfm_ctx *tctx = crypto_shash_ctx(parent);
+	struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
+	struct crypto_cipher *tfm = tctx->child;
+	int bs = crypto_shash_digestsize(parent);
+
+	if (ctx->len)
+		crypto_cipher_encrypt_one(tfm, out, ctx->dg);
+	else
+		memcpy(out, ctx->dg, bs);
+
+	return 0;
+}
+
+static int cbcmac_init_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_cipher *cipher;
+	struct crypto_instance *inst = (void *)tfm->__crt_alg;
+	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
+	struct cbcmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	cipher = crypto_spawn_cipher(spawn);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
+
+	ctx->child = cipher;
+
+	return 0;
+};
+
+static void cbcmac_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct cbcmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
+	crypto_free_cipher(ctx->child);
+}
+
+static int cbcmac_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+	struct shash_instance *inst;
+	struct crypto_alg *alg;
+	int err;
+
+	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH);
+	if (err)
+		return err;
+
+	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
+				  CRYPTO_ALG_TYPE_MASK);
+	if (IS_ERR(alg))
+		return PTR_ERR(alg);
+
+	inst = shash_alloc_instance("cbcmac", alg);
+	err = PTR_ERR(inst);
+	if (IS_ERR(inst))
+		goto out_put_alg;
+
+	err = crypto_init_spawn(shash_instance_ctx(inst), alg,
+				shash_crypto_instance(inst),
+				CRYPTO_ALG_TYPE_MASK);
+	if (err)
+		goto out_free_inst;
+
+	inst->alg.base.cra_priority = alg->cra_priority;
+	inst->alg.base.cra_blocksize = 1;
+
+	inst->alg.digestsize = alg->cra_blocksize;
+	inst->alg.descsize = sizeof(struct cbcmac_desc_ctx) +
+			     alg->cra_blocksize;
+
+	inst->alg.base.cra_ctxsize = sizeof(struct cbcmac_tfm_ctx);
+	inst->alg.base.cra_init = cbcmac_init_tfm;
+	inst->alg.base.cra_exit = cbcmac_exit_tfm;
+
+	inst->alg.init = crypto_cbcmac_digest_init;
+	inst->alg.update = crypto_cbcmac_digest_update;
+	inst->alg.final = crypto_cbcmac_digest_final;
+	inst->alg.setkey = crypto_cbcmac_digest_setkey;
+
+	err = shash_register_instance(tmpl, inst);
+
+out_free_inst:
+	if (err)
+		shash_free_instance(shash_crypto_instance(inst));
+
+out_put_alg:
+	crypto_mod_put(alg);
+	return err;
+}
+
+static struct crypto_template crypto_cbcmac_tmpl = {
+	.name = "cbcmac",
+	.create = cbcmac_create,
+	.free = shash_free_instance,
+	.module = THIS_MODULE,
+};
+
 static int __init crypto_ccm_module_init(void)
 {
 	int err;
 
-	err = crypto_register_template(&crypto_ccm_base_tmpl);
+	err = crypto_register_template(&crypto_cbcmac_tmpl);
 	if (err)
 		goto out;
 
+	err = crypto_register_template(&crypto_ccm_base_tmpl);
+	if (err)
+		goto out_undo_cbcmac;
+
 	err = crypto_register_template(&crypto_ccm_tmpl);
 	if (err)
 		goto out_undo_base;
@@ -922,6 +1020,8 @@ static int __init crypto_ccm_module_init(void)
 	crypto_unregister_template(&crypto_ccm_tmpl);
 out_undo_base:
 	crypto_unregister_template(&crypto_ccm_base_tmpl);
+out_undo_cbcmac:
+	crypto_register_template(&crypto_cbcmac_tmpl);
 	goto out;
 }
 
@@ -930,6 +1030,7 @@ static void __exit crypto_ccm_module_exit(void)
 	crypto_unregister_template(&crypto_rfc4309_tmpl);
 	crypto_unregister_template(&crypto_ccm_tmpl);
 	crypto_unregister_template(&crypto_ccm_base_tmpl);
+	crypto_unregister_template(&crypto_cbcmac_tmpl);
 }
 
 module_init(crypto_ccm_module_init);
-- 
2.7.4

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 3/4] crypto: arm64/aes - add NEON and Crypto Extension CBC-MAC driver
  2017-01-26 17:17 [PATCH 0/4] crypto: time invariant AES for CCM (and GCM/CTR) Ard Biesheuvel
  2017-01-26 17:17 ` [PATCH 1/4] crypto: testmgr - add test cases for cbcmac(aes) Ard Biesheuvel
  2017-01-26 17:17 ` [PATCH 2/4] crypto: ccm - switch to separate cbcmac driver Ard Biesheuvel
@ 2017-01-26 17:17 ` Ard Biesheuvel
  2017-01-26 17:17 ` [RFC PATCH 4/4] crypto: aes - add generic time invariant AES for CTR/CCM/GCM Ard Biesheuvel
  3 siblings, 0 replies; 8+ messages in thread
From: Ard Biesheuvel @ 2017-01-26 17:17 UTC (permalink / raw)
  To: linux-crypto, linux-arm-kernel; +Cc: herbert, Ard Biesheuvel

On ARMv8 implementations that do not support the Crypto Extensions,
such as the Raspberry Pi 3, the CCM driver falls back to the generic
table based AES implementation to perform the MAC part of the
algorithm, which is slow and not time invariant. So add a CBCMAC
implementation to the shared glue code between NEON AES and Crypto
Extensions AES, so that it can be used instead now that the CCM
driver has been updated to look for CBCMAC implementations other
than the one it supplies itself.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm64/crypto/aes-glue.c  | 102 ++++++++++++++++++++
 arch/arm64/crypto/aes-modes.S |  19 ++++
 2 files changed, 121 insertions(+)

diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 055bc3f61138..1f29570b83e9 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -11,6 +11,7 @@
 #include <asm/neon.h>
 #include <asm/hwcap.h>
 #include <crypto/aes.h>
+#include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
 #include <linux/module.h>
@@ -31,6 +32,7 @@
 #define aes_ctr_encrypt		ce_aes_ctr_encrypt
 #define aes_xts_encrypt		ce_aes_xts_encrypt
 #define aes_xts_decrypt		ce_aes_xts_decrypt
+#define aes_cbcmac_update	ce_aes_cbcmac_update
 MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
 #else
 #define MODE			"neon"
@@ -44,11 +46,13 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
 #define aes_ctr_encrypt		neon_aes_ctr_encrypt
 #define aes_xts_encrypt		neon_aes_xts_encrypt
 #define aes_xts_decrypt		neon_aes_xts_decrypt
+#define aes_cbcmac_update	neon_aes_cbcmac_update
 MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
 MODULE_ALIAS_CRYPTO("ecb(aes)");
 MODULE_ALIAS_CRYPTO("cbc(aes)");
 MODULE_ALIAS_CRYPTO("ctr(aes)");
 MODULE_ALIAS_CRYPTO("xts(aes)");
+MODULE_ALIAS_CRYPTO("cbcmac(aes)");
 #endif
 
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
@@ -75,11 +79,19 @@ asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
 				int rounds, int blocks, u8 const rk2[], u8 iv[],
 				int first);
 
+asmlinkage void aes_cbcmac_update(u8 const in[], u32 const rk[], int rounds,
+				  int blocks, u8 dg[]);
+
 struct crypto_aes_xts_ctx {
 	struct crypto_aes_ctx key1;
 	struct crypto_aes_ctx __aligned(8) key2;
 };
 
+struct cbcmac_desc_ctx {
+	unsigned int len;
+	u8 dg[];
+};
+
 static int skcipher_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
 			       unsigned int key_len)
 {
@@ -357,6 +369,89 @@ static struct skcipher_alg aes_algs[] = { {
 	.decrypt	= xts_decrypt,
 } };
 
+static int cbcmac_setkey(struct crypto_shash *tfm,
+			 const u8 *in_key, unsigned int key_len)
+{
+	struct crypto_aes_ctx *ctx = crypto_shash_ctx(tfm);
+	int err;
+
+	err = aes_expandkey(ctx, in_key, key_len);
+	if (err)
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+
+	return err;
+}
+
+static int cbcmac_init(struct shash_desc *desc)
+{
+	struct cbcmac_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	memset(ctx->dg, 0, AES_BLOCK_SIZE);
+	ctx->len = 0;
+
+	return 0;
+}
+
+static int cbcmac_update(struct shash_desc *desc, const u8 *p,
+			 unsigned int len)
+{
+	struct crypto_aes_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	struct cbcmac_desc_ctx *ctx = shash_desc_ctx(desc);
+	int rounds = 6 + tctx->key_length / 4;
+
+	while (len--) {
+		ctx->dg[ctx->len++] ^= *p++;
+
+		if (ctx->len == AES_BLOCK_SIZE) {
+			int blocks = len / AES_BLOCK_SIZE;
+
+			kernel_neon_begin();
+			aes_cbcmac_update(p, tctx->key_enc, rounds, blocks,
+					  ctx->dg);
+			kernel_neon_end();
+
+			ctx->len = 0;
+			len %= AES_BLOCK_SIZE;
+			p += blocks * AES_BLOCK_SIZE;
+		}
+	}
+
+	return 0;
+}
+
+static int cbcmac_final(struct shash_desc *desc, u8 *out)
+{
+	struct crypto_aes_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	struct cbcmac_desc_ctx *ctx = shash_desc_ctx(desc);
+	int rounds = 6 + tctx->key_length / 4;
+
+	if (ctx->len) {
+		kernel_neon_begin();
+		aes_cbcmac_update(NULL, tctx->key_enc, rounds, 0, ctx->dg);
+		kernel_neon_end();
+	}
+	memcpy(out, ctx->dg, AES_BLOCK_SIZE);
+
+	return 0;
+}
+
+static struct shash_alg cbcmac_alg = {
+	.base.cra_name		= "cbcmac(aes)",
+	.base.cra_driver_name	= "cbcmac-aes-" MODE,
+	.base.cra_priority	= PRIO,
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct crypto_aes_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.digestsize		= AES_BLOCK_SIZE,
+	.init			= cbcmac_init,
+	.update			= cbcmac_update,
+	.final			= cbcmac_final,
+	.setkey			= cbcmac_setkey,
+	.descsize		= sizeof(struct cbcmac_desc_ctx),
+};
+
 static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
 
 static void aes_exit(void)
@@ -367,6 +462,7 @@ static void aes_exit(void)
 		if (aes_simd_algs[i])
 			simd_skcipher_free(aes_simd_algs[i]);
 
+	crypto_unregister_shash(&cbcmac_alg);
 	crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
 }
 
@@ -383,6 +479,10 @@ static int __init aes_init(void)
 	if (err)
 		return err;
 
+	err = crypto_register_shash(&cbcmac_alg);
+	if (err)
+		goto unregister_ciphers;
+
 	for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
 		if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL))
 			continue;
@@ -402,6 +502,8 @@ static int __init aes_init(void)
 
 unregister_simds:
 	aes_exit();
+unregister_ciphers:
+	crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
 	return err;
 }
 
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index 92b982a8b112..aa96c9691af9 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -525,3 +525,22 @@ AES_ENTRY(aes_xts_decrypt)
 	FRAME_POP
 	ret
 AES_ENDPROC(aes_xts_decrypt)
+
+	/*
+	 * aes_cbcmac_update(u8 const in[], u32 const rk[], int rounds,
+	 *		    int blocks, u8 dg[])
+	 */
+AES_ENTRY(aes_cbcmac_update)
+	ld1		{v0.16b}, [x4]			/* get iv */
+	enc_prepare	w2, x1, x5
+
+.Lcbcmacloop:
+	ld1		{v1.16b}, [x0], #16		/* get next pt block */
+	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with dg */
+	encrypt_block	v0, w2, x1, x5, w6
+	subs		w3, w3, #1
+	bne		.Lcbcmacloop
+
+	st1		{v0.16b}, [x4]			/* return iv */
+	ret
+AES_ENDPROC(aes_cbcmac_update)
-- 
2.7.4

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [RFC PATCH 4/4] crypto: aes - add generic time invariant AES for CTR/CCM/GCM
  2017-01-26 17:17 [PATCH 0/4] crypto: time invariant AES for CCM (and GCM/CTR) Ard Biesheuvel
                   ` (2 preceding siblings ...)
  2017-01-26 17:17 ` [PATCH 3/4] crypto: arm64/aes - add NEON and Crypto Extension CBC-MAC driver Ard Biesheuvel
@ 2017-01-26 17:17 ` Ard Biesheuvel
  2017-01-26 18:35   ` Krzysztof Kwiatkowski
  3 siblings, 1 reply; 8+ messages in thread
From: Ard Biesheuvel @ 2017-01-26 17:17 UTC (permalink / raw)
  To: linux-crypto, linux-arm-kernel; +Cc: herbert, Ard Biesheuvel

Lookup table based AES is sensitive to timing attacks, which is
due to the fact that such table lookups are data dependent, and
the fact that 8 KB worth of tables covers a significant number of
cachelines on any architecture.

For network facing algorithms such as CTR, CCM or GCM, this presents
a security risk, which is why arch specific AES ports are typically
time invariant, either through the use of special instructions, or
by using SIMD algorithms that don't rely on table lookups.

For generic code, this is difficult to achieve without losing too
much performance, but we can improve the situation significantly by
switching to an implementation that only needs 256 bytes of table
data (the actual S-box itself), which can be prefetched at the start
of each block to eliminate data dependent latencies.

Note that this only implements AES encryption, which is all we need
for CTR and CBC-MAC. AES decryption can easily be implemented in a
similar way, but is significantly more costly.

This code runs at ~25 cycles per byte on ARM Cortex-A57 (while the
ordinary generic AES driver manages 18 cycles per byte on this
hardware).

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 crypto/Kconfig  |  14 +
 crypto/Makefile |   1 +
 crypto/aes_ti.c | 314 ++++++++++++++++++++
 3 files changed, 329 insertions(+)

diff --git a/crypto/Kconfig b/crypto/Kconfig
index e8269d1b0282..ce1f6be9e48f 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -896,6 +896,20 @@ config CRYPTO_AES
 
 	  See <http://csrc.nist.gov/CryptoToolkit/aes/> for more information.
 
+config CRYPTO_AES_TI
+	tristate "Generic time invariant AES in CTR and CBC-MAC modes"
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_HASH
+	select CRYPTO_AES
+	help
+	  This is a time invariant generic implementation of AES in CTR and
+	  CBC-MAC modes, intended for use by the generic CCM and GCM drivers,
+	  and other CTR based modes. Instead of using 8 lookup tables of 1 KB
+	  each, both for encryption and decryption, this implementation only
+	  uses a single S-box of 256 bytes, and attempts to eliminate data
+	  dependent latencies by prefetching the entire table into the cache
+	  at the start of each block.
+
 config CRYPTO_AES_586
 	tristate "AES cipher algorithms (i586)"
 	depends on (X86 || UML_X86) && !64BIT
diff --git a/crypto/Makefile b/crypto/Makefile
index b8f0e3eb0791..bcd834536163 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -99,6 +99,7 @@ obj-$(CONFIG_CRYPTO_TWOFISH) += twofish_generic.o
 obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o
 obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o
 obj-$(CONFIG_CRYPTO_AES) += aes_generic.o
+obj-$(CONFIG_CRYPTO_AES_TI) += aes_ti.o
 obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o
 obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o
 obj-$(CONFIG_CRYPTO_CAST5) += cast5_generic.o
diff --git a/crypto/aes_ti.c b/crypto/aes_ti.c
new file mode 100644
index 000000000000..5ad80e063681
--- /dev/null
+++ b/crypto/aes_ti.c
@@ -0,0 +1,314 @@
+/*
+ * Scalar (mostly) time invariant AES core transform for CTR/CCM/GCM
+ *
+ * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/aes.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+#include <asm/unaligned.h>
+
+struct aes_ti_ctx {
+	u32	rk[AES_MAX_KEYLENGTH_U32];
+	int	rounds;
+};
+
+struct cbcmac_desc_ctx {
+	unsigned int len;
+	u8 dg[];
+};
+
+__weak const u8 __cacheline_aligned __aesti_sbox[] = {
+	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
+	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
+	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
+	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
+	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
+	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
+	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
+	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
+	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
+	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
+	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
+	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
+	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
+	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
+	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
+	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
+	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
+};
+
+static int aesti_set_key(struct aes_ti_ctx *ctx, const u8 *in_key,
+			 unsigned int key_len)
+{
+	struct crypto_aes_ctx rk;
+	int err;
+
+	err = crypto_aes_expand_key(&rk, in_key, key_len);
+	if (err)
+		return err;
+
+	memcpy(ctx->rk, rk.key_enc, sizeof(ctx->rk));
+	ctx->rounds = 6 + key_len / 4;
+
+	/*
+	 * In order to force the compiler to emit data independent Sbox lookups
+	 * at the start of each block, xor the first round key with values at
+	 * fixed indexes in the Sbox.
+	 */
+	ctx->rk[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128];
+	ctx->rk[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160];
+	ctx->rk[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192];
+	ctx->rk[3] ^= __aesti_sbox[96] ^ __aesti_sbox[224];
+
+	return 0;
+}
+
+static u32 mul_by_x(u32 w)
+{
+	/* multiply by polynomial 'x' (0b10) in GF(2^8) */
+	return ((w & 0x80808080) >> 7) * 0x1b ^ ((w & 0x7f7f7f7f) << 1);
+}
+
+static u32 mix_columns(u32 x)
+{
+	u32 y = mul_by_x(x) ^ ror32(x, 16);
+
+	return y ^ ror32(x ^ y, 8);
+}
+
+static __always_inline u32 subshift(u32 in[], int pos)
+{
+	return (__aesti_sbox[in[pos] & 0xff]) ^
+	       (__aesti_sbox[(in[(pos + 1) % 4] >>  8) & 0xff] <<  8) ^
+	       (__aesti_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
+	       (__aesti_sbox[(in[(pos + 3) % 4] >> 24) & 0xff] << 24);
+}
+
+static void aesti_encrypt(struct aes_ti_ctx *ctx, u8 *out, const u8 *in)
+{
+	u32 st0[4], st1[4];
+	u32 *rkp = ctx->rk + 4;
+	int round;
+
+	st0[0] = get_unaligned_le32(in);
+	st0[1] = get_unaligned_le32(in + 4);
+	st0[2] = get_unaligned_le32(in + 8);
+	st0[3] = get_unaligned_le32(in + 12);
+
+	st0[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128] ^ ctx->rk[0];
+	st0[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160] ^ ctx->rk[1];
+	st0[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192] ^ ctx->rk[2];
+	st0[3] ^= __aesti_sbox[96] ^ __aesti_sbox[224] ^ ctx->rk[3];
+
+	for (round = 0;; round += 2) {
+		st1[0] = mix_columns(subshift(st0, 0)) ^ *rkp++;
+		st1[1] = mix_columns(subshift(st0, 1)) ^ *rkp++;
+		st1[2] = mix_columns(subshift(st0, 2)) ^ *rkp++;
+		st1[3] = mix_columns(subshift(st0, 3)) ^ *rkp++;
+
+		if (round == ctx->rounds - 2)
+			break;
+
+		st0[0] = mix_columns(subshift(st1, 0)) ^ *rkp++;
+		st0[1] = mix_columns(subshift(st1, 1)) ^ *rkp++;
+		st0[2] = mix_columns(subshift(st1, 2)) ^ *rkp++;
+		st0[3] = mix_columns(subshift(st1, 3)) ^ *rkp++;
+	}
+
+	put_unaligned_le32(subshift(st1, 0) ^ rkp[0], out);
+	put_unaligned_le32(subshift(st1, 1) ^ rkp[1], out + 4);
+	put_unaligned_le32(subshift(st1, 2) ^ rkp[2], out + 8);
+	put_unaligned_le32(subshift(st1, 3) ^ rkp[3], out + 12);
+}
+
+static int aesti_ctr_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
+			     unsigned int key_len)
+{
+	struct aes_ti_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = aesti_set_key(ctx, in_key, key_len);
+	if (err)
+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	return err;
+}
+
+static int aesti_ctr_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct aes_ti_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	u8 buf[AES_BLOCK_SIZE];
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, true);
+
+	while (walk.nbytes > 0) {
+		u8 *dst = walk.dst.virt.addr;
+		u8 *src = walk.src.virt.addr;
+		int nbytes = walk.nbytes;
+		int tail = 0;
+
+		if (nbytes < walk.total) {
+			nbytes = round_down(nbytes, AES_BLOCK_SIZE);
+			tail = walk.nbytes % AES_BLOCK_SIZE;
+		}
+
+		do {
+			int bsize = min(nbytes, AES_BLOCK_SIZE);
+
+			aesti_encrypt(ctx, buf, walk.iv);
+			if (dst != src)
+			       memcpy(dst, src, bsize);
+			crypto_xor(dst, buf, bsize);
+			crypto_inc(walk.iv, AES_BLOCK_SIZE);
+
+			dst += AES_BLOCK_SIZE;
+			src += AES_BLOCK_SIZE;
+			nbytes -= AES_BLOCK_SIZE;
+		} while (nbytes > 0);
+
+		err = skcipher_walk_done(&walk, tail);
+	}
+	return err;
+}
+
+static struct skcipher_alg ctr_alg = {
+	.base.cra_name		= "ctr(aes)",
+	.base.cra_driver_name	= "ctr-aes-ti",
+	.base.cra_priority	= 100 + 1,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct aes_ti_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.chunksize		= AES_BLOCK_SIZE,
+	.ivsize			= AES_BLOCK_SIZE,
+	.setkey			= aesti_ctr_set_key,
+	.encrypt		= aesti_ctr_encrypt,
+	.decrypt		= aesti_ctr_encrypt,
+};
+
+static int aesti_cbcmac_setkey(struct crypto_shash *tfm,
+			       const u8 *in_key, unsigned int key_len)
+{
+	struct aes_ti_ctx *ctx = crypto_shash_ctx(tfm);
+	int err;
+
+	err = aesti_set_key(ctx, in_key, key_len);
+	if (err)
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+
+	return err;
+}
+
+static int aesti_cbcmac_init(struct shash_desc *desc)
+{
+	struct cbcmac_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	memset(ctx->dg, 0, AES_BLOCK_SIZE);
+	ctx->len = 0;
+
+	return 0;
+}
+
+static int aesti_cbcmac_update(struct shash_desc *desc, const u8 *p,
+			       unsigned int len)
+{
+	struct aes_ti_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	struct cbcmac_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	while (len--) {
+		ctx->dg[ctx->len++] ^= *p++;
+
+		if (ctx->len == AES_BLOCK_SIZE) {
+			aesti_encrypt(tctx, ctx->dg, ctx->dg);
+			ctx->len = 0;
+		}
+	}
+
+	return 0;
+}
+
+static int aesti_cbcmac_final(struct shash_desc *desc, u8 *out)
+{
+	struct aes_ti_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	struct cbcmac_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	if (ctx->len)
+		aesti_encrypt(tctx, out, ctx->dg);
+	else
+		memcpy(out, ctx->dg, AES_BLOCK_SIZE);
+
+	return 0;
+}
+
+static struct shash_alg cbcmac_alg = {
+	.base.cra_name		= "cbcmac(aes)",
+	.base.cra_driver_name	= "cbcmac-aes-ti",
+	.base.cra_priority	= 100 + 1,
+	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct aes_ti_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.digestsize		= AES_BLOCK_SIZE,
+	.init			= aesti_cbcmac_init,
+	.update			= aesti_cbcmac_update,
+	.final			= aesti_cbcmac_final,
+	.setkey			= aesti_cbcmac_setkey,
+	.descsize		= sizeof(struct cbcmac_desc_ctx),
+};
+
+static int __init aes_init(void)
+{
+	int err;
+
+	err = crypto_register_skcipher(&ctr_alg);
+	if (err)
+		return err;
+
+	err = crypto_register_shash(&cbcmac_alg);
+	if (err)
+		crypto_unregister_skcipher(&ctr_alg);
+	return err;
+}
+
+static void __exit aes_fini(void)
+{
+	crypto_unregister_shash(&cbcmac_alg);
+	crypto_unregister_skcipher(&ctr_alg);
+}
+
+module_init(aes_init);
+module_exit(aes_fini);
+
+MODULE_DESCRIPTION("Generic time invariant AES transform in CTR and CBC-MAC modes");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("cbcmac(aes)");
+MODULE_ALIAS_CRYPTO("ctr(aes)");
-- 
2.7.4

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH 4/4] crypto: aes - add generic time invariant AES for CTR/CCM/GCM
  2017-01-26 17:17 ` [RFC PATCH 4/4] crypto: aes - add generic time invariant AES for CTR/CCM/GCM Ard Biesheuvel
@ 2017-01-26 18:35   ` Krzysztof Kwiatkowski
  2017-01-26 18:45     ` Ard Biesheuvel
  0 siblings, 1 reply; 8+ messages in thread
From: Krzysztof Kwiatkowski @ 2017-01-26 18:35 UTC (permalink / raw)
  To: Ard Biesheuvel, linux-crypto, linux-arm-kernel; +Cc: herbert

Ard,

This is really interesting implementation. Is there a way to test if
execution of this code is really constant time. Have you done any tests
like that? Adam Langley has proposed using modified version of valgrind
(ctgrind) for that, but I wonder if you maybe thought about any
alternative method?


Kind regards,
Kris


On 26/01/17 17:17, Ard Biesheuvel wrote:
> Lookup table based AES is sensitive to timing attacks, which is
> due to the fact that such table lookups are data dependent, and
> the fact that 8 KB worth of tables covers a significant number of
> cachelines on any architecture.
> 
> For network facing algorithms such as CTR, CCM or GCM, this presents
> a security risk, which is why arch specific AES ports are typically
> time invariant, either through the use of special instructions, or
> by using SIMD algorithms that don't rely on table lookups.
> 
> For generic code, this is difficult to achieve without losing too
> much performance, but we can improve the situation significantly by
> switching to an implementation that only needs 256 bytes of table
> data (the actual S-box itself), which can be prefetched at the start
> of each block to eliminate data dependent latencies.
> 
> Note that this only implements AES encryption, which is all we need
> for CTR and CBC-MAC. AES decryption can easily be implemented in a
> similar way, but is significantly more costly.
> 
> This code runs at ~25 cycles per byte on ARM Cortex-A57 (while the
> ordinary generic AES driver manages 18 cycles per byte on this
> hardware).
> 
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
>  crypto/Kconfig  |  14 +
>  crypto/Makefile |   1 +
>  crypto/aes_ti.c | 314 ++++++++++++++++++++
>  3 files changed, 329 insertions(+)
> 
> diff --git a/crypto/Kconfig b/crypto/Kconfig
> index e8269d1b0282..ce1f6be9e48f 100644
> --- a/crypto/Kconfig
> +++ b/crypto/Kconfig
> @@ -896,6 +896,20 @@ config CRYPTO_AES
>  
>  	  See <http://csrc.nist.gov/CryptoToolkit/aes/> for more information.
>  
> +config CRYPTO_AES_TI
> +	tristate "Generic time invariant AES in CTR and CBC-MAC modes"
> +	select CRYPTO_BLKCIPHER
> +	select CRYPTO_HASH
> +	select CRYPTO_AES
> +	help
> +	  This is a time invariant generic implementation of AES in CTR and
> +	  CBC-MAC modes, intended for use by the generic CCM and GCM drivers,
> +	  and other CTR based modes. Instead of using 8 lookup tables of 1 KB
> +	  each, both for encryption and decryption, this implementation only
> +	  uses a single S-box of 256 bytes, and attempts to eliminate data
> +	  dependent latencies by prefetching the entire table into the cache
> +	  at the start of each block.
> +
>  config CRYPTO_AES_586
>  	tristate "AES cipher algorithms (i586)"
>  	depends on (X86 || UML_X86) && !64BIT
> diff --git a/crypto/Makefile b/crypto/Makefile
> index b8f0e3eb0791..bcd834536163 100644
> --- a/crypto/Makefile
> +++ b/crypto/Makefile
> @@ -99,6 +99,7 @@ obj-$(CONFIG_CRYPTO_TWOFISH) += twofish_generic.o
>  obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o
>  obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o
>  obj-$(CONFIG_CRYPTO_AES) += aes_generic.o
> +obj-$(CONFIG_CRYPTO_AES_TI) += aes_ti.o
>  obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o
>  obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o
>  obj-$(CONFIG_CRYPTO_CAST5) += cast5_generic.o
> diff --git a/crypto/aes_ti.c b/crypto/aes_ti.c
> new file mode 100644
> index 000000000000..5ad80e063681
> --- /dev/null
> +++ b/crypto/aes_ti.c
> @@ -0,0 +1,314 @@
> +/*
> + * Scalar (mostly) time invariant AES core transform for CTR/CCM/GCM
> + *
> + * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <crypto/aes.h>
> +#include <crypto/internal/hash.h>
> +#include <crypto/internal/skcipher.h>
> +#include <linux/crypto.h>
> +#include <linux/module.h>
> +#include <asm/unaligned.h>
> +
> +struct aes_ti_ctx {
> +	u32	rk[AES_MAX_KEYLENGTH_U32];
> +	int	rounds;
> +};
> +
> +struct cbcmac_desc_ctx {
> +	unsigned int len;
> +	u8 dg[];
> +};
> +
> +__weak const u8 __cacheline_aligned __aesti_sbox[] = {
> +	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
> +	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
> +	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
> +	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
> +	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
> +	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
> +	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
> +	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
> +	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
> +	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
> +	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
> +	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
> +	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
> +	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
> +	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
> +	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
> +	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
> +	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
> +	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
> +	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
> +	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
> +	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
> +	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
> +	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
> +	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
> +	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
> +	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
> +	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
> +	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
> +	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
> +	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
> +	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
> +};
> +
> +static int aesti_set_key(struct aes_ti_ctx *ctx, const u8 *in_key,
> +			 unsigned int key_len)
> +{
> +	struct crypto_aes_ctx rk;
> +	int err;
> +
> +	err = crypto_aes_expand_key(&rk, in_key, key_len);
> +	if (err)
> +		return err;
> +
> +	memcpy(ctx->rk, rk.key_enc, sizeof(ctx->rk));
> +	ctx->rounds = 6 + key_len / 4;
> +
> +	/*
> +	 * In order to force the compiler to emit data independent Sbox lookups
> +	 * at the start of each block, xor the first round key with values at
> +	 * fixed indexes in the Sbox.
> +	 */
> +	ctx->rk[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128];
> +	ctx->rk[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160];
> +	ctx->rk[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192];
> +	ctx->rk[3] ^= __aesti_sbox[96] ^ __aesti_sbox[224];
> +
> +	return 0;
> +}
> +
> +static u32 mul_by_x(u32 w)
> +{
> +	/* multiply by polynomial 'x' (0b10) in GF(2^8) */
> +	return ((w & 0x80808080) >> 7) * 0x1b ^ ((w & 0x7f7f7f7f) << 1);
> +}
> +
> +static u32 mix_columns(u32 x)
> +{
> +	u32 y = mul_by_x(x) ^ ror32(x, 16);
> +
> +	return y ^ ror32(x ^ y, 8);
> +}
> +
> +static __always_inline u32 subshift(u32 in[], int pos)
> +{
> +	return (__aesti_sbox[in[pos] & 0xff]) ^
> +	       (__aesti_sbox[(in[(pos + 1) % 4] >>  8) & 0xff] <<  8) ^
> +	       (__aesti_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
> +	       (__aesti_sbox[(in[(pos + 3) % 4] >> 24) & 0xff] << 24);
> +}
> +
> +static void aesti_encrypt(struct aes_ti_ctx *ctx, u8 *out, const u8 *in)
> +{
> +	u32 st0[4], st1[4];
> +	u32 *rkp = ctx->rk + 4;
> +	int round;
> +
> +	st0[0] = get_unaligned_le32(in);
> +	st0[1] = get_unaligned_le32(in + 4);
> +	st0[2] = get_unaligned_le32(in + 8);
> +	st0[3] = get_unaligned_le32(in + 12);
> +
> +	st0[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128] ^ ctx->rk[0];
> +	st0[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160] ^ ctx->rk[1];
> +	st0[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192] ^ ctx->rk[2];
> +	st0[3] ^= __aesti_sbox[96] ^ __aesti_sbox[224] ^ ctx->rk[3];
> +
> +	for (round = 0;; round += 2) {
> +		st1[0] = mix_columns(subshift(st0, 0)) ^ *rkp++;
> +		st1[1] = mix_columns(subshift(st0, 1)) ^ *rkp++;
> +		st1[2] = mix_columns(subshift(st0, 2)) ^ *rkp++;
> +		st1[3] = mix_columns(subshift(st0, 3)) ^ *rkp++;
> +
> +		if (round == ctx->rounds - 2)
> +			break;
> +
> +		st0[0] = mix_columns(subshift(st1, 0)) ^ *rkp++;
> +		st0[1] = mix_columns(subshift(st1, 1)) ^ *rkp++;
> +		st0[2] = mix_columns(subshift(st1, 2)) ^ *rkp++;
> +		st0[3] = mix_columns(subshift(st1, 3)) ^ *rkp++;
> +	}
> +
> +	put_unaligned_le32(subshift(st1, 0) ^ rkp[0], out);
> +	put_unaligned_le32(subshift(st1, 1) ^ rkp[1], out + 4);
> +	put_unaligned_le32(subshift(st1, 2) ^ rkp[2], out + 8);
> +	put_unaligned_le32(subshift(st1, 3) ^ rkp[3], out + 12);
> +}
> +
> +static int aesti_ctr_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
> +			     unsigned int key_len)
> +{
> +	struct aes_ti_ctx *ctx = crypto_skcipher_ctx(tfm);
> +	int err;
> +
> +	err = aesti_set_key(ctx, in_key, key_len);
> +	if (err)
> +		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
> +	return err;
> +}
> +
> +static int aesti_ctr_encrypt(struct skcipher_request *req)
> +{
> +	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
> +	struct aes_ti_ctx *ctx = crypto_skcipher_ctx(tfm);
> +	struct skcipher_walk walk;
> +	u8 buf[AES_BLOCK_SIZE];
> +	int err;
> +
> +	err = skcipher_walk_virt(&walk, req, true);
> +
> +	while (walk.nbytes > 0) {
> +		u8 *dst = walk.dst.virt.addr;
> +		u8 *src = walk.src.virt.addr;
> +		int nbytes = walk.nbytes;
> +		int tail = 0;
> +
> +		if (nbytes < walk.total) {
> +			nbytes = round_down(nbytes, AES_BLOCK_SIZE);
> +			tail = walk.nbytes % AES_BLOCK_SIZE;
> +		}
> +
> +		do {
> +			int bsize = min(nbytes, AES_BLOCK_SIZE);
> +
> +			aesti_encrypt(ctx, buf, walk.iv);
> +			if (dst != src)
> +			       memcpy(dst, src, bsize);
> +			crypto_xor(dst, buf, bsize);
> +			crypto_inc(walk.iv, AES_BLOCK_SIZE);
> +
> +			dst += AES_BLOCK_SIZE;
> +			src += AES_BLOCK_SIZE;
> +			nbytes -= AES_BLOCK_SIZE;
> +		} while (nbytes > 0);
> +
> +		err = skcipher_walk_done(&walk, tail);
> +	}
> +	return err;
> +}
> +
> +static struct skcipher_alg ctr_alg = {
> +	.base.cra_name		= "ctr(aes)",
> +	.base.cra_driver_name	= "ctr-aes-ti",
> +	.base.cra_priority	= 100 + 1,
> +	.base.cra_blocksize	= 1,
> +	.base.cra_ctxsize	= sizeof(struct aes_ti_ctx),
> +	.base.cra_module	= THIS_MODULE,
> +
> +	.min_keysize		= AES_MIN_KEY_SIZE,
> +	.max_keysize		= AES_MAX_KEY_SIZE,
> +	.chunksize		= AES_BLOCK_SIZE,
> +	.ivsize			= AES_BLOCK_SIZE,
> +	.setkey			= aesti_ctr_set_key,
> +	.encrypt		= aesti_ctr_encrypt,
> +	.decrypt		= aesti_ctr_encrypt,
> +};
> +
> +static int aesti_cbcmac_setkey(struct crypto_shash *tfm,
> +			       const u8 *in_key, unsigned int key_len)
> +{
> +	struct aes_ti_ctx *ctx = crypto_shash_ctx(tfm);
> +	int err;
> +
> +	err = aesti_set_key(ctx, in_key, key_len);
> +	if (err)
> +		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
> +
> +	return err;
> +}
> +
> +static int aesti_cbcmac_init(struct shash_desc *desc)
> +{
> +	struct cbcmac_desc_ctx *ctx = shash_desc_ctx(desc);
> +
> +	memset(ctx->dg, 0, AES_BLOCK_SIZE);
> +	ctx->len = 0;
> +
> +	return 0;
> +}
> +
> +static int aesti_cbcmac_update(struct shash_desc *desc, const u8 *p,
> +			       unsigned int len)
> +{
> +	struct aes_ti_ctx *tctx = crypto_shash_ctx(desc->tfm);
> +	struct cbcmac_desc_ctx *ctx = shash_desc_ctx(desc);
> +
> +	while (len--) {
> +		ctx->dg[ctx->len++] ^= *p++;
> +
> +		if (ctx->len == AES_BLOCK_SIZE) {
> +			aesti_encrypt(tctx, ctx->dg, ctx->dg);
> +			ctx->len = 0;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static int aesti_cbcmac_final(struct shash_desc *desc, u8 *out)
> +{
> +	struct aes_ti_ctx *tctx = crypto_shash_ctx(desc->tfm);
> +	struct cbcmac_desc_ctx *ctx = shash_desc_ctx(desc);
> +
> +	if (ctx->len)
> +		aesti_encrypt(tctx, out, ctx->dg);
> +	else
> +		memcpy(out, ctx->dg, AES_BLOCK_SIZE);
> +
> +	return 0;
> +}
> +
> +static struct shash_alg cbcmac_alg = {
> +	.base.cra_name		= "cbcmac(aes)",
> +	.base.cra_driver_name	= "cbcmac-aes-ti",
> +	.base.cra_priority	= 100 + 1,
> +	.base.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
> +	.base.cra_blocksize	= 1,
> +	.base.cra_ctxsize	= sizeof(struct aes_ti_ctx),
> +	.base.cra_module	= THIS_MODULE,
> +
> +	.digestsize		= AES_BLOCK_SIZE,
> +	.init			= aesti_cbcmac_init,
> +	.update			= aesti_cbcmac_update,
> +	.final			= aesti_cbcmac_final,
> +	.setkey			= aesti_cbcmac_setkey,
> +	.descsize		= sizeof(struct cbcmac_desc_ctx),
> +};
> +
> +static int __init aes_init(void)
> +{
> +	int err;
> +
> +	err = crypto_register_skcipher(&ctr_alg);
> +	if (err)
> +		return err;
> +
> +	err = crypto_register_shash(&cbcmac_alg);
> +	if (err)
> +		crypto_unregister_skcipher(&ctr_alg);
> +	return err;
> +}
> +
> +static void __exit aes_fini(void)
> +{
> +	crypto_unregister_shash(&cbcmac_alg);
> +	crypto_unregister_skcipher(&ctr_alg);
> +}
> +
> +module_init(aes_init);
> +module_exit(aes_fini);
> +
> +MODULE_DESCRIPTION("Generic time invariant AES transform in CTR and CBC-MAC modes");
> +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
> +MODULE_LICENSE("GPL v2");
> +MODULE_ALIAS_CRYPTO("cbcmac(aes)");
> +MODULE_ALIAS_CRYPTO("ctr(aes)");
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH 4/4] crypto: aes - add generic time invariant AES for CTR/CCM/GCM
  2017-01-26 18:35   ` Krzysztof Kwiatkowski
@ 2017-01-26 18:45     ` Ard Biesheuvel
  0 siblings, 0 replies; 8+ messages in thread
From: Ard Biesheuvel @ 2017-01-26 18:45 UTC (permalink / raw)
  To: Krzysztof Kwiatkowski; +Cc: linux-crypto, linux-arm-kernel, Herbert Xu

On 26 January 2017 at 18:35, Krzysztof Kwiatkowski <kris@amongbytes.com> wrote:
> Ard,
>
> This is really interesting implementation. Is there a way to test if
> execution of this code is really constant time. Have you done any tests
> like that?

No, I haven't, and to be perfectly honest, I think it would only make
sense to do so on a loaded system, or the Sbox will be in the cache
all the time anyway.

> Adam Langley has proposed using modified version of valgrind
> (ctgrind) for that, but I wonder if you maybe thought about any
> alternative method?
>

I think it is quite feasible in the kernel to measure time spent in a
function each time it is invoked. I have never looked at ctgrind, but
if there is legitimate interest in this code, I will try to figure out
a way to find out how data dependent the latency of this algorithm is,
at least on hardware that I have access to.


>
> On 26/01/17 17:17, Ard Biesheuvel wrote:
>> Lookup table based AES is sensitive to timing attacks, which is
>> due to the fact that such table lookups are data dependent, and
>> the fact that 8 KB worth of tables covers a significant number of
>> cachelines on any architecture.
>>
>> For network facing algorithms such as CTR, CCM or GCM, this presents
>> a security risk, which is why arch specific AES ports are typically
>> time invariant, either through the use of special instructions, or
>> by using SIMD algorithms that don't rely on table lookups.
>>
>> For generic code, this is difficult to achieve without losing too
>> much performance, but we can improve the situation significantly by
>> switching to an implementation that only needs 256 bytes of table
>> data (the actual S-box itself), which can be prefetched at the start
>> of each block to eliminate data dependent latencies.
>>
>> Note that this only implements AES encryption, which is all we need
>> for CTR and CBC-MAC. AES decryption can easily be implemented in a
>> similar way, but is significantly more costly.
>>
>> This code runs at ~25 cycles per byte on ARM Cortex-A57 (while the
>> ordinary generic AES driver manages 18 cycles per byte on this
>> hardware).
>>
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>> ---
>>  crypto/Kconfig  |  14 +
>>  crypto/Makefile |   1 +
>>  crypto/aes_ti.c | 314 ++++++++++++++++++++
>>  3 files changed, 329 insertions(+)
>>
>> diff --git a/crypto/Kconfig b/crypto/Kconfig
>> index e8269d1b0282..ce1f6be9e48f 100644
>> --- a/crypto/Kconfig
>> +++ b/crypto/Kconfig
>> @@ -896,6 +896,20 @@ config CRYPTO_AES
>>
>>         See <http://csrc.nist.gov/CryptoToolkit/aes/> for more information.
>>
>> +config CRYPTO_AES_TI
>> +     tristate "Generic time invariant AES in CTR and CBC-MAC modes"
>> +     select CRYPTO_BLKCIPHER
>> +     select CRYPTO_HASH
>> +     select CRYPTO_AES
>> +     help
>> +       This is a time invariant generic implementation of AES in CTR and
>> +       CBC-MAC modes, intended for use by the generic CCM and GCM drivers,
>> +       and other CTR based modes. Instead of using 8 lookup tables of 1 KB
>> +       each, both for encryption and decryption, this implementation only
>> +       uses a single S-box of 256 bytes, and attempts to eliminate data
>> +       dependent latencies by prefetching the entire table into the cache
>> +       at the start of each block.
>> +
>>  config CRYPTO_AES_586
>>       tristate "AES cipher algorithms (i586)"
>>       depends on (X86 || UML_X86) && !64BIT
>> diff --git a/crypto/Makefile b/crypto/Makefile
>> index b8f0e3eb0791..bcd834536163 100644
>> --- a/crypto/Makefile
>> +++ b/crypto/Makefile
>> @@ -99,6 +99,7 @@ obj-$(CONFIG_CRYPTO_TWOFISH) += twofish_generic.o
>>  obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o
>>  obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o
>>  obj-$(CONFIG_CRYPTO_AES) += aes_generic.o
>> +obj-$(CONFIG_CRYPTO_AES_TI) += aes_ti.o
>>  obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o
>>  obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o
>>  obj-$(CONFIG_CRYPTO_CAST5) += cast5_generic.o
>> diff --git a/crypto/aes_ti.c b/crypto/aes_ti.c
>> new file mode 100644
>> index 000000000000..5ad80e063681
>> --- /dev/null
>> +++ b/crypto/aes_ti.c
>> @@ -0,0 +1,314 @@
>> +/*
>> + * Scalar (mostly) time invariant AES core transform for CTR/CCM/GCM
>> + *
>> + * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + */
>> +
>> +#include <crypto/aes.h>
>> +#include <crypto/internal/hash.h>
>> +#include <crypto/internal/skcipher.h>
>> +#include <linux/crypto.h>
>> +#include <linux/module.h>
>> +#include <asm/unaligned.h>
>> +
>> +struct aes_ti_ctx {
>> +     u32     rk[AES_MAX_KEYLENGTH_U32];
>> +     int     rounds;
>> +};
>> +
>> +struct cbcmac_desc_ctx {
>> +     unsigned int len;
>> +     u8 dg[];
>> +};
>> +
>> +__weak const u8 __cacheline_aligned __aesti_sbox[] = {
>> +     0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
>> +     0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
>> +     0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
>> +     0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
>> +     0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
>> +     0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
>> +     0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
>> +     0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
>> +     0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
>> +     0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
>> +     0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
>> +     0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
>> +     0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
>> +     0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
>> +     0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
>> +     0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
>> +     0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
>> +     0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
>> +     0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
>> +     0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
>> +     0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
>> +     0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
>> +     0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
>> +     0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
>> +     0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
>> +     0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
>> +     0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
>> +     0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
>> +     0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
>> +     0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
>> +     0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
>> +     0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
>> +};
>> +
>> +static int aesti_set_key(struct aes_ti_ctx *ctx, const u8 *in_key,
>> +                      unsigned int key_len)
>> +{
>> +     struct crypto_aes_ctx rk;
>> +     int err;
>> +
>> +     err = crypto_aes_expand_key(&rk, in_key, key_len);
>> +     if (err)
>> +             return err;
>> +
>> +     memcpy(ctx->rk, rk.key_enc, sizeof(ctx->rk));
>> +     ctx->rounds = 6 + key_len / 4;
>> +
>> +     /*
>> +      * In order to force the compiler to emit data independent Sbox lookups
>> +      * at the start of each block, xor the first round key with values at
>> +      * fixed indexes in the Sbox.
>> +      */
>> +     ctx->rk[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128];
>> +     ctx->rk[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160];
>> +     ctx->rk[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192];
>> +     ctx->rk[3] ^= __aesti_sbox[96] ^ __aesti_sbox[224];
>> +
>> +     return 0;
>> +}
>> +
>> +static u32 mul_by_x(u32 w)
>> +{
>> +     /* multiply by polynomial 'x' (0b10) in GF(2^8) */
>> +     return ((w & 0x80808080) >> 7) * 0x1b ^ ((w & 0x7f7f7f7f) << 1);
>> +}
>> +
>> +static u32 mix_columns(u32 x)
>> +{
>> +     u32 y = mul_by_x(x) ^ ror32(x, 16);
>> +
>> +     return y ^ ror32(x ^ y, 8);
>> +}
>> +
>> +static __always_inline u32 subshift(u32 in[], int pos)
>> +{
>> +     return (__aesti_sbox[in[pos] & 0xff]) ^
>> +            (__aesti_sbox[(in[(pos + 1) % 4] >>  8) & 0xff] <<  8) ^
>> +            (__aesti_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
>> +            (__aesti_sbox[(in[(pos + 3) % 4] >> 24) & 0xff] << 24);
>> +}
>> +
>> +static void aesti_encrypt(struct aes_ti_ctx *ctx, u8 *out, const u8 *in)
>> +{
>> +     u32 st0[4], st1[4];
>> +     u32 *rkp = ctx->rk + 4;
>> +     int round;
>> +
>> +     st0[0] = get_unaligned_le32(in);
>> +     st0[1] = get_unaligned_le32(in + 4);
>> +     st0[2] = get_unaligned_le32(in + 8);
>> +     st0[3] = get_unaligned_le32(in + 12);
>> +
>> +     st0[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128] ^ ctx->rk[0];
>> +     st0[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160] ^ ctx->rk[1];
>> +     st0[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192] ^ ctx->rk[2];
>> +     st0[3] ^= __aesti_sbox[96] ^ __aesti_sbox[224] ^ ctx->rk[3];
>> +
>> +     for (round = 0;; round += 2) {
>> +             st1[0] = mix_columns(subshift(st0, 0)) ^ *rkp++;
>> +             st1[1] = mix_columns(subshift(st0, 1)) ^ *rkp++;
>> +             st1[2] = mix_columns(subshift(st0, 2)) ^ *rkp++;
>> +             st1[3] = mix_columns(subshift(st0, 3)) ^ *rkp++;
>> +
>> +             if (round == ctx->rounds - 2)
>> +                     break;
>> +
>> +             st0[0] = mix_columns(subshift(st1, 0)) ^ *rkp++;
>> +             st0[1] = mix_columns(subshift(st1, 1)) ^ *rkp++;
>> +             st0[2] = mix_columns(subshift(st1, 2)) ^ *rkp++;
>> +             st0[3] = mix_columns(subshift(st1, 3)) ^ *rkp++;
>> +     }
>> +
>> +     put_unaligned_le32(subshift(st1, 0) ^ rkp[0], out);
>> +     put_unaligned_le32(subshift(st1, 1) ^ rkp[1], out + 4);
>> +     put_unaligned_le32(subshift(st1, 2) ^ rkp[2], out + 8);
>> +     put_unaligned_le32(subshift(st1, 3) ^ rkp[3], out + 12);
>> +}
>> +
>> +static int aesti_ctr_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
>> +                          unsigned int key_len)
>> +{
>> +     struct aes_ti_ctx *ctx = crypto_skcipher_ctx(tfm);
>> +     int err;
>> +
>> +     err = aesti_set_key(ctx, in_key, key_len);
>> +     if (err)
>> +             crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
>> +     return err;
>> +}
>> +
>> +static int aesti_ctr_encrypt(struct skcipher_request *req)
>> +{
>> +     struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
>> +     struct aes_ti_ctx *ctx = crypto_skcipher_ctx(tfm);
>> +     struct skcipher_walk walk;
>> +     u8 buf[AES_BLOCK_SIZE];
>> +     int err;
>> +
>> +     err = skcipher_walk_virt(&walk, req, true);
>> +
>> +     while (walk.nbytes > 0) {
>> +             u8 *dst = walk.dst.virt.addr;
>> +             u8 *src = walk.src.virt.addr;
>> +             int nbytes = walk.nbytes;
>> +             int tail = 0;
>> +
>> +             if (nbytes < walk.total) {
>> +                     nbytes = round_down(nbytes, AES_BLOCK_SIZE);
>> +                     tail = walk.nbytes % AES_BLOCK_SIZE;
>> +             }
>> +
>> +             do {
>> +                     int bsize = min(nbytes, AES_BLOCK_SIZE);
>> +
>> +                     aesti_encrypt(ctx, buf, walk.iv);
>> +                     if (dst != src)
>> +                            memcpy(dst, src, bsize);
>> +                     crypto_xor(dst, buf, bsize);
>> +                     crypto_inc(walk.iv, AES_BLOCK_SIZE);
>> +
>> +                     dst += AES_BLOCK_SIZE;
>> +                     src += AES_BLOCK_SIZE;
>> +                     nbytes -= AES_BLOCK_SIZE;
>> +             } while (nbytes > 0);
>> +
>> +             err = skcipher_walk_done(&walk, tail);
>> +     }
>> +     return err;
>> +}
>> +
>> +static struct skcipher_alg ctr_alg = {
>> +     .base.cra_name          = "ctr(aes)",
>> +     .base.cra_driver_name   = "ctr-aes-ti",
>> +     .base.cra_priority      = 100 + 1,
>> +     .base.cra_blocksize     = 1,
>> +     .base.cra_ctxsize       = sizeof(struct aes_ti_ctx),
>> +     .base.cra_module        = THIS_MODULE,
>> +
>> +     .min_keysize            = AES_MIN_KEY_SIZE,
>> +     .max_keysize            = AES_MAX_KEY_SIZE,
>> +     .chunksize              = AES_BLOCK_SIZE,
>> +     .ivsize                 = AES_BLOCK_SIZE,
>> +     .setkey                 = aesti_ctr_set_key,
>> +     .encrypt                = aesti_ctr_encrypt,
>> +     .decrypt                = aesti_ctr_encrypt,
>> +};
>> +
>> +static int aesti_cbcmac_setkey(struct crypto_shash *tfm,
>> +                            const u8 *in_key, unsigned int key_len)
>> +{
>> +     struct aes_ti_ctx *ctx = crypto_shash_ctx(tfm);
>> +     int err;
>> +
>> +     err = aesti_set_key(ctx, in_key, key_len);
>> +     if (err)
>> +             crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
>> +
>> +     return err;
>> +}
>> +
>> +static int aesti_cbcmac_init(struct shash_desc *desc)
>> +{
>> +     struct cbcmac_desc_ctx *ctx = shash_desc_ctx(desc);
>> +
>> +     memset(ctx->dg, 0, AES_BLOCK_SIZE);
>> +     ctx->len = 0;
>> +
>> +     return 0;
>> +}
>> +
>> +static int aesti_cbcmac_update(struct shash_desc *desc, const u8 *p,
>> +                            unsigned int len)
>> +{
>> +     struct aes_ti_ctx *tctx = crypto_shash_ctx(desc->tfm);
>> +     struct cbcmac_desc_ctx *ctx = shash_desc_ctx(desc);
>> +
>> +     while (len--) {
>> +             ctx->dg[ctx->len++] ^= *p++;
>> +
>> +             if (ctx->len == AES_BLOCK_SIZE) {
>> +                     aesti_encrypt(tctx, ctx->dg, ctx->dg);
>> +                     ctx->len = 0;
>> +             }
>> +     }
>> +
>> +     return 0;
>> +}
>> +
>> +static int aesti_cbcmac_final(struct shash_desc *desc, u8 *out)
>> +{
>> +     struct aes_ti_ctx *tctx = crypto_shash_ctx(desc->tfm);
>> +     struct cbcmac_desc_ctx *ctx = shash_desc_ctx(desc);
>> +
>> +     if (ctx->len)
>> +             aesti_encrypt(tctx, out, ctx->dg);
>> +     else
>> +             memcpy(out, ctx->dg, AES_BLOCK_SIZE);
>> +
>> +     return 0;
>> +}
>> +
>> +static struct shash_alg cbcmac_alg = {
>> +     .base.cra_name          = "cbcmac(aes)",
>> +     .base.cra_driver_name   = "cbcmac-aes-ti",
>> +     .base.cra_priority      = 100 + 1,
>> +     .base.cra_flags         = CRYPTO_ALG_TYPE_SHASH,
>> +     .base.cra_blocksize     = 1,
>> +     .base.cra_ctxsize       = sizeof(struct aes_ti_ctx),
>> +     .base.cra_module        = THIS_MODULE,
>> +
>> +     .digestsize             = AES_BLOCK_SIZE,
>> +     .init                   = aesti_cbcmac_init,
>> +     .update                 = aesti_cbcmac_update,
>> +     .final                  = aesti_cbcmac_final,
>> +     .setkey                 = aesti_cbcmac_setkey,
>> +     .descsize               = sizeof(struct cbcmac_desc_ctx),
>> +};
>> +
>> +static int __init aes_init(void)
>> +{
>> +     int err;
>> +
>> +     err = crypto_register_skcipher(&ctr_alg);
>> +     if (err)
>> +             return err;
>> +
>> +     err = crypto_register_shash(&cbcmac_alg);
>> +     if (err)
>> +             crypto_unregister_skcipher(&ctr_alg);
>> +     return err;
>> +}
>> +
>> +static void __exit aes_fini(void)
>> +{
>> +     crypto_unregister_shash(&cbcmac_alg);
>> +     crypto_unregister_skcipher(&ctr_alg);
>> +}
>> +
>> +module_init(aes_init);
>> +module_exit(aes_fini);
>> +
>> +MODULE_DESCRIPTION("Generic time invariant AES transform in CTR and CBC-MAC modes");
>> +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
>> +MODULE_LICENSE("GPL v2");
>> +MODULE_ALIAS_CRYPTO("cbcmac(aes)");
>> +MODULE_ALIAS_CRYPTO("ctr(aes)");
>>
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/4] crypto: ccm - switch to separate cbcmac driver
  2017-01-26 17:17 ` [PATCH 2/4] crypto: ccm - switch to separate cbcmac driver Ard Biesheuvel
@ 2017-01-27  9:41   ` Ard Biesheuvel
  0 siblings, 0 replies; 8+ messages in thread
From: Ard Biesheuvel @ 2017-01-27  9:41 UTC (permalink / raw)
  To: linux-crypto, linux-arm-kernel; +Cc: Herbert Xu, Ard Biesheuvel

On 26 January 2017 at 17:17, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> Update the generic CCM driver to defer CBC-MAC processing to a
> dedicated CBC-MAC ahash transform rather than open coding this
> transform (and much of the associated scatterwalk plumbing) in
> the CCM driver itself.
>
> This cleans up the code considerably, but more importantly, it allows
> the use of alternative CBC-MAC implementations that don't suffer from
> performance degradation due to significant setup time (e.g., the NEON
> based AES code needs to load the entire S-box into SIMD registers, which
> cannot be amortized over the entire input when using the AES cipher
> directly)
>
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
>  crypto/Kconfig |   1 +
>  crypto/ccm.c   | 373 +++++++++++++-------
>  2 files changed, 238 insertions(+), 136 deletions(-)
>
> diff --git a/crypto/Kconfig b/crypto/Kconfig
> index 160f08e721cc..e8269d1b0282 100644
> --- a/crypto/Kconfig
> +++ b/crypto/Kconfig
> @@ -263,6 +263,7 @@ comment "Authenticated Encryption with Associated Data"
>  config CRYPTO_CCM
>         tristate "CCM support"
>         select CRYPTO_CTR
> +       select CRYPTO_HASH
>         select CRYPTO_AEAD
>         help
>           Support for Counter with CBC MAC. Required for IPsec.
> diff --git a/crypto/ccm.c b/crypto/ccm.c
> index 26b924d1e582..635f11fc52e7 100644
> --- a/crypto/ccm.c
> +++ b/crypto/ccm.c
> @@ -11,6 +11,7 @@
>   */
>
>  #include <crypto/internal/aead.h>
> +#include <crypto/internal/hash.h>
>  #include <crypto/internal/skcipher.h>
>  #include <crypto/scatterwalk.h>
>  #include <linux/err.h>
> @@ -23,11 +24,11 @@
>
>  struct ccm_instance_ctx {
>         struct crypto_skcipher_spawn ctr;
> -       struct crypto_spawn cipher;
> +       struct crypto_ahash_spawn mac;
>  };
>
>  struct crypto_ccm_ctx {
> -       struct crypto_cipher *cipher;
> +       struct crypto_ahash *mac;
>         struct crypto_skcipher *ctr;
>  };
>
> @@ -44,7 +45,6 @@ struct crypto_rfc4309_req_ctx {
>
>  struct crypto_ccm_req_priv_ctx {
>         u8 odata[16];
> -       u8 idata[16];
>         u8 auth_tag[16];
>         u32 ilen;

This is unused now.

>         u32 flags;
> @@ -53,6 +53,15 @@ struct crypto_ccm_req_priv_ctx {
>         struct skcipher_request skreq;
>  };
>
> +struct cbcmac_tfm_ctx {
> +       struct crypto_cipher *child;
> +};
> +
> +struct cbcmac_desc_ctx {
> +       unsigned int len;
> +       u8 dg[];
> +};
> +
>  static inline struct crypto_ccm_req_priv_ctx *crypto_ccm_reqctx(
>         struct aead_request *req)
>  {
> @@ -84,7 +93,7 @@ static int crypto_ccm_setkey(struct crypto_aead *aead, const u8 *key,
>  {
>         struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead);
>         struct crypto_skcipher *ctr = ctx->ctr;
> -       struct crypto_cipher *tfm = ctx->cipher;
> +       struct crypto_ahash *mac = ctx->mac;
>         int err = 0;
>
>         crypto_skcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK);
> @@ -96,11 +105,11 @@ static int crypto_ccm_setkey(struct crypto_aead *aead, const u8 *key,
>         if (err)
>                 goto out;
>
> -       crypto_cipher_clear_flags(tfm, CRYPTO_TFM_REQ_MASK);
> -       crypto_cipher_set_flags(tfm, crypto_aead_get_flags(aead) &
> +       crypto_ahash_clear_flags(mac, CRYPTO_TFM_REQ_MASK);
> +       crypto_ahash_set_flags(mac, crypto_aead_get_flags(aead) &
>                                     CRYPTO_TFM_REQ_MASK);
> -       err = crypto_cipher_setkey(tfm, key, keylen);
> -       crypto_aead_set_flags(aead, crypto_cipher_get_flags(tfm) &
> +       err = crypto_ahash_setkey(mac, key, keylen);
> +       crypto_aead_set_flags(aead, crypto_ahash_get_flags(mac) &
>                               CRYPTO_TFM_RES_MASK);
>
>  out:
> @@ -167,119 +176,59 @@ static int format_adata(u8 *adata, unsigned int a)
>         return len;
>  }
>
> -static void compute_mac(struct crypto_cipher *tfm, u8 *data, int n,
> -                      struct crypto_ccm_req_priv_ctx *pctx)
> -{
> -       unsigned int bs = 16;
> -       u8 *odata = pctx->odata;
> -       u8 *idata = pctx->idata;
> -       int datalen, getlen;
> -
> -       datalen = n;
> -
> -       /* first time in here, block may be partially filled. */
> -       getlen = bs - pctx->ilen;
> -       if (datalen >= getlen) {
> -               memcpy(idata + pctx->ilen, data, getlen);
> -               crypto_xor(odata, idata, bs);
> -               crypto_cipher_encrypt_one(tfm, odata, odata);
> -               datalen -= getlen;
> -               data += getlen;
> -               pctx->ilen = 0;
> -       }
> -
> -       /* now encrypt rest of data */
> -       while (datalen >= bs) {
> -               crypto_xor(odata, data, bs);
> -               crypto_cipher_encrypt_one(tfm, odata, odata);
> -
> -               datalen -= bs;
> -               data += bs;
> -       }
> -
> -       /* check and see if there's leftover data that wasn't
> -        * enough to fill a block.
> -        */
> -       if (datalen) {
> -               memcpy(idata + pctx->ilen, data, datalen);
> -               pctx->ilen += datalen;
> -       }
> -}
> -
> -static void get_data_to_compute(struct crypto_cipher *tfm,
> -                              struct crypto_ccm_req_priv_ctx *pctx,
> -                              struct scatterlist *sg, unsigned int len)
> -{
> -       struct scatter_walk walk;
> -       u8 *data_src;
> -       int n;
> -
> -       scatterwalk_start(&walk, sg);
> -
> -       while (len) {
> -               n = scatterwalk_clamp(&walk, len);
> -               if (!n) {
> -                       scatterwalk_start(&walk, sg_next(walk.sg));
> -                       n = scatterwalk_clamp(&walk, len);
> -               }
> -               data_src = scatterwalk_map(&walk);
> -
> -               compute_mac(tfm, data_src, n, pctx);
> -               len -= n;
> -
> -               scatterwalk_unmap(data_src);
> -               scatterwalk_advance(&walk, n);
> -               scatterwalk_done(&walk, 0, len);
> -               if (len)
> -                       crypto_yield(pctx->flags);
> -       }
> -
> -       /* any leftover needs padding and then encrypted */
> -       if (pctx->ilen) {
> -               int padlen;
> -               u8 *odata = pctx->odata;
> -               u8 *idata = pctx->idata;
> -
> -               padlen = 16 - pctx->ilen;
> -               memset(idata + pctx->ilen, 0, padlen);
> -               crypto_xor(odata, idata, 16);
> -               crypto_cipher_encrypt_one(tfm, odata, odata);
> -               pctx->ilen = 0;
> -       }
> -}
> -
>  static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain,
>                            unsigned int cryptlen)
>  {
> +       struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req);
>         struct crypto_aead *aead = crypto_aead_reqtfm(req);
>         struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead);
> -       struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req);
> -       struct crypto_cipher *cipher = ctx->cipher;
> +       AHASH_REQUEST_ON_STACK(ahreq, ctx->mac);
>         unsigned int assoclen = req->assoclen;
> -       u8 *odata = pctx->odata;
> -       u8 *idata = pctx->idata;
> -       int err;
> +       struct scatterlist sg[3];
> +       u8 odata[16];
> +       u8 idata[16];
> +       int ilen, err;
>
>         /* format control data for input */
>         err = format_input(odata, req, cryptlen);
>         if (err)
>                 goto out;
>
> -       /* encrypt first block to use as start in computing mac  */
> -       crypto_cipher_encrypt_one(cipher, odata, odata);
> +       sg_init_table(sg, 3);
> +       sg_set_buf(&sg[0], odata, 16);
>
>         /* format associated data and compute into mac */
>         if (assoclen) {
> -               pctx->ilen = format_adata(idata, assoclen);
> -               get_data_to_compute(cipher, pctx, req->src, req->assoclen);
> +               ilen = format_adata(idata, assoclen);
> +               sg_set_buf(&sg[1], idata, ilen);
> +               sg_chain(sg, 3, req->src);
>         } else {
> -               pctx->ilen = 0;
> +               ilen = 0;
> +               sg_chain(sg, 2, req->src);
>         }
>
> -       /* compute plaintext into mac */
> -       if (cryptlen)
> -               get_data_to_compute(cipher, pctx, plain, cryptlen);
> +       ahash_request_set_tfm(ahreq, ctx->mac);


This needs

diff --git a/crypto/ccm.c b/crypto/ccm.c
index 635f11fc52e7..016059c0ffdc 100644
--- a/crypto/ccm.c
+++ b/crypto/ccm.c
@@ -208,6 +208,7 @@ static int crypto_ccm_auth(struct aead_request
*req, struct scatterlist *plain,
        }

        ahash_request_set_tfm(ahreq, ctx->mac);
+       ahash_request_set_callback(ahreq, pctx->flags, NULL, NULL);
        ahash_request_set_crypt(ahreq, sg, NULL, assoclen + ilen + 16);
        err = crypto_ahash_init(ahreq);
        if (err)

here

> +       ahash_request_set_crypt(ahreq, sg, NULL, assoclen + ilen + 16);
> +       err = crypto_ahash_init(ahreq);
> +       if (err)
> +               goto out;
> +       err = crypto_ahash_update(ahreq);
> +       if (err)
> +               goto out;
>
> +       /* we need to pad the MAC input to a round multiple of the block size */
> +       ilen = 16 - (assoclen + ilen) % 16;
> +       if (ilen < 16) {
> +               memset(idata, 0, ilen);
> +               sg_init_table(sg, 2);
> +               sg_set_buf(&sg[0], idata, ilen);
> +               sg_chain(sg, 2, plain);
> +               plain = sg;
> +               cryptlen += ilen;
> +       }
> +
> +       ahash_request_set_crypt(ahreq, plain, pctx->odata, cryptlen);
> +       err = crypto_ahash_finup(ahreq);
>  out:
>         return err;
>  }
> @@ -453,21 +402,21 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm)
>         struct aead_instance *inst = aead_alg_instance(tfm);
>         struct ccm_instance_ctx *ictx = aead_instance_ctx(inst);
>         struct crypto_ccm_ctx *ctx = crypto_aead_ctx(tfm);
> -       struct crypto_cipher *cipher;
> +       struct crypto_ahash *mac;
>         struct crypto_skcipher *ctr;
>         unsigned long align;
>         int err;
>
> -       cipher = crypto_spawn_cipher(&ictx->cipher);
> -       if (IS_ERR(cipher))
> -               return PTR_ERR(cipher);
> +       mac = crypto_spawn_ahash(&ictx->mac);
> +       if (IS_ERR(mac))
> +               return PTR_ERR(mac);
>
>         ctr = crypto_spawn_skcipher(&ictx->ctr);
>         err = PTR_ERR(ctr);
>         if (IS_ERR(ctr))
> -               goto err_free_cipher;
> +               goto err_free_mac;
>
> -       ctx->cipher = cipher;
> +       ctx->mac = mac;
>         ctx->ctr = ctr;
>
>         align = crypto_aead_alignmask(tfm);
> @@ -479,8 +428,8 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm)
>
>         return 0;
>
> -err_free_cipher:
> -       crypto_free_cipher(cipher);
> +err_free_mac:
> +       crypto_free_ahash(mac);
>         return err;
>  }
>
> @@ -488,7 +437,7 @@ static void crypto_ccm_exit_tfm(struct crypto_aead *tfm)
>  {
>         struct crypto_ccm_ctx *ctx = crypto_aead_ctx(tfm);
>
> -       crypto_free_cipher(ctx->cipher);
> +       crypto_free_ahash(ctx->mac);
>         crypto_free_skcipher(ctx->ctr);
>  }
>
> @@ -496,7 +445,7 @@ static void crypto_ccm_free(struct aead_instance *inst)
>  {
>         struct ccm_instance_ctx *ctx = aead_instance_ctx(inst);
>
> -       crypto_drop_spawn(&ctx->cipher);
> +       crypto_drop_ahash(&ctx->mac);
>         crypto_drop_skcipher(&ctx->ctr);
>         kfree(inst);
>  }
> @@ -505,12 +454,13 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
>                                     struct rtattr **tb,
>                                     const char *full_name,
>                                     const char *ctr_name,
> -                                   const char *cipher_name)
> +                                   const char *mac_name)
>  {
>         struct crypto_attr_type *algt;
>         struct aead_instance *inst;
>         struct skcipher_alg *ctr;
> -       struct crypto_alg *cipher;
> +       struct crypto_alg *mac_alg;
> +       struct hash_alg_common *mac;
>         struct ccm_instance_ctx *ictx;
>         int err;
>
> @@ -521,25 +471,26 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
>         if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
>                 return -EINVAL;
>
> -       cipher = crypto_alg_mod_lookup(cipher_name,  CRYPTO_ALG_TYPE_CIPHER,
> -                                      CRYPTO_ALG_TYPE_MASK);
> -       if (IS_ERR(cipher))
> -               return PTR_ERR(cipher);
> +       mac_alg = crypto_find_alg(mac_name, &crypto_ahash_type,
> +                                 CRYPTO_ALG_TYPE_HASH,
> +                                 CRYPTO_ALG_TYPE_AHASH_MASK |
> +                                 CRYPTO_ALG_ASYNC);
> +       if (IS_ERR(mac_alg))
> +               return PTR_ERR(mac_alg);
>
> +       mac = __crypto_hash_alg_common(mac_alg);
>         err = -EINVAL;
> -       if (cipher->cra_blocksize != 16)
> -               goto out_put_cipher;
> +       if (mac->digestsize != 16)
> +               goto out_put_mac;
>
>         inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL);
>         err = -ENOMEM;
>         if (!inst)
> -               goto out_put_cipher;
> +               goto out_put_mac;
>
>         ictx = aead_instance_ctx(inst);
> -
> -       err = crypto_init_spawn(&ictx->cipher, cipher,
> -                               aead_crypto_instance(inst),
> -                               CRYPTO_ALG_TYPE_MASK);
> +       err = crypto_init_ahash_spawn(&ictx->mac, mac,
> +                                     aead_crypto_instance(inst));
>         if (err)
>                 goto err_free_inst;
>
> @@ -548,7 +499,7 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
>                                    crypto_requires_sync(algt->type,
>                                                         algt->mask));
>         if (err)
> -               goto err_drop_cipher;
> +               goto err_drop_mac;
>
>         ctr = crypto_spawn_skcipher_alg(&ictx->ctr);
>
> @@ -564,16 +515,16 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
>         err = -ENAMETOOLONG;
>         if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
>                      "ccm_base(%s,%s)", ctr->base.cra_driver_name,
> -                    cipher->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
> +                    mac->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
>                 goto err_drop_ctr;
>
>         memcpy(inst->alg.base.cra_name, full_name, CRYPTO_MAX_ALG_NAME);
>
>         inst->alg.base.cra_flags = ctr->base.cra_flags & CRYPTO_ALG_ASYNC;
> -       inst->alg.base.cra_priority = (cipher->cra_priority +
> +       inst->alg.base.cra_priority = (mac->base.cra_priority +
>                                        ctr->base.cra_priority) / 2;
>         inst->alg.base.cra_blocksize = 1;
> -       inst->alg.base.cra_alignmask = cipher->cra_alignmask |
> +       inst->alg.base.cra_alignmask = mac->base.cra_alignmask |
>                                        ctr->base.cra_alignmask |
>                                        (__alignof__(u32) - 1);
>         inst->alg.ivsize = 16;
> @@ -593,23 +544,24 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
>         if (err)
>                 goto err_drop_ctr;
>
> -out_put_cipher:
> -       crypto_mod_put(cipher);
> +out_put_mac:
> +       crypto_mod_put(mac_alg);
>         return err;
>
>  err_drop_ctr:
>         crypto_drop_skcipher(&ictx->ctr);
> -err_drop_cipher:
> -       crypto_drop_spawn(&ictx->cipher);
> +err_drop_mac:
> +       crypto_drop_ahash(&ictx->mac);
>  err_free_inst:
>         kfree(inst);
> -       goto out_put_cipher;
> +       goto out_put_mac;
>  }
>
>  static int crypto_ccm_create(struct crypto_template *tmpl, struct rtattr **tb)
>  {
>         const char *cipher_name;
>         char ctr_name[CRYPTO_MAX_ALG_NAME];
> +       char mac_name[CRYPTO_MAX_ALG_NAME];
>         char full_name[CRYPTO_MAX_ALG_NAME];
>
>         cipher_name = crypto_attr_alg_name(tb[1]);
> @@ -620,12 +572,16 @@ static int crypto_ccm_create(struct crypto_template *tmpl, struct rtattr **tb)
>                      cipher_name) >= CRYPTO_MAX_ALG_NAME)
>                 return -ENAMETOOLONG;
>
> +       if (snprintf(mac_name, CRYPTO_MAX_ALG_NAME, "cbcmac(%s)",
> +                    cipher_name) >= CRYPTO_MAX_ALG_NAME)
> +               return -ENAMETOOLONG;
> +
>         if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "ccm(%s)", cipher_name) >=
>             CRYPTO_MAX_ALG_NAME)
>                 return -ENAMETOOLONG;
>
>         return crypto_ccm_create_common(tmpl, tb, full_name, ctr_name,
> -                                       cipher_name);
> +                                       mac_name);
>  }
>
>  static struct crypto_template crypto_ccm_tmpl = {
> @@ -899,14 +855,156 @@ static struct crypto_template crypto_rfc4309_tmpl = {
>         .module = THIS_MODULE,
>  };
>
> +static int crypto_cbcmac_digest_setkey(struct crypto_shash *parent,
> +                                    const u8 *inkey, unsigned int keylen)
> +{
> +       struct cbcmac_tfm_ctx *ctx = crypto_shash_ctx(parent);
> +
> +       return crypto_cipher_setkey(ctx->child, inkey, keylen);
> +}
> +
> +static int crypto_cbcmac_digest_init(struct shash_desc *pdesc)
> +{
> +       struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
> +       int bs = crypto_shash_digestsize(pdesc->tfm);
> +
> +       memset(ctx->dg, 0, bs);
> +       ctx->len = 0;
> +
> +       return 0;
> +}
> +
> +static int crypto_cbcmac_digest_update(struct shash_desc *pdesc, const u8 *p,
> +                                      unsigned int len)
> +{
> +       struct crypto_shash *parent = pdesc->tfm;
> +       struct cbcmac_tfm_ctx *tctx = crypto_shash_ctx(parent);
> +       struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
> +       struct crypto_cipher *tfm = tctx->child;
> +       int bs = crypto_shash_digestsize(parent);
> +
> +       while (len--) {
> +               ctx->dg[ctx->len++] ^= *p++;
> +
> +               if (ctx->len == bs) {
> +                       crypto_cipher_encrypt_one(tfm, ctx->dg, ctx->dg);
> +                       ctx->len = 0;
> +               }
> +       }
> +
> +       return 0;
> +}
> +
> +static int crypto_cbcmac_digest_final(struct shash_desc *pdesc, u8 *out)
> +{
> +       struct crypto_shash *parent = pdesc->tfm;
> +       struct cbcmac_tfm_ctx *tctx = crypto_shash_ctx(parent);
> +       struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
> +       struct crypto_cipher *tfm = tctx->child;
> +       int bs = crypto_shash_digestsize(parent);
> +
> +       if (ctx->len)
> +               crypto_cipher_encrypt_one(tfm, out, ctx->dg);
> +       else
> +               memcpy(out, ctx->dg, bs);
> +
> +       return 0;
> +}
> +
> +static int cbcmac_init_tfm(struct crypto_tfm *tfm)
> +{
> +       struct crypto_cipher *cipher;
> +       struct crypto_instance *inst = (void *)tfm->__crt_alg;
> +       struct crypto_spawn *spawn = crypto_instance_ctx(inst);
> +       struct cbcmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
> +
> +       cipher = crypto_spawn_cipher(spawn);
> +       if (IS_ERR(cipher))
> +               return PTR_ERR(cipher);
> +
> +       ctx->child = cipher;
> +
> +       return 0;
> +};
> +
> +static void cbcmac_exit_tfm(struct crypto_tfm *tfm)
> +{
> +       struct cbcmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
> +       crypto_free_cipher(ctx->child);
> +}
> +
> +static int cbcmac_create(struct crypto_template *tmpl, struct rtattr **tb)
> +{
> +       struct shash_instance *inst;
> +       struct crypto_alg *alg;
> +       int err;
> +
> +       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH);
> +       if (err)
> +               return err;
> +
> +       alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
> +                                 CRYPTO_ALG_TYPE_MASK);
> +       if (IS_ERR(alg))
> +               return PTR_ERR(alg);
> +
> +       inst = shash_alloc_instance("cbcmac", alg);
> +       err = PTR_ERR(inst);
> +       if (IS_ERR(inst))
> +               goto out_put_alg;
> +
> +       err = crypto_init_spawn(shash_instance_ctx(inst), alg,
> +                               shash_crypto_instance(inst),
> +                               CRYPTO_ALG_TYPE_MASK);
> +       if (err)
> +               goto out_free_inst;
> +
> +       inst->alg.base.cra_priority = alg->cra_priority;
> +       inst->alg.base.cra_blocksize = 1;
> +
> +       inst->alg.digestsize = alg->cra_blocksize;
> +       inst->alg.descsize = sizeof(struct cbcmac_desc_ctx) +
> +                            alg->cra_blocksize;
> +
> +       inst->alg.base.cra_ctxsize = sizeof(struct cbcmac_tfm_ctx);
> +       inst->alg.base.cra_init = cbcmac_init_tfm;
> +       inst->alg.base.cra_exit = cbcmac_exit_tfm;
> +
> +       inst->alg.init = crypto_cbcmac_digest_init;
> +       inst->alg.update = crypto_cbcmac_digest_update;
> +       inst->alg.final = crypto_cbcmac_digest_final;
> +       inst->alg.setkey = crypto_cbcmac_digest_setkey;
> +
> +       err = shash_register_instance(tmpl, inst);
> +
> +out_free_inst:
> +       if (err)
> +               shash_free_instance(shash_crypto_instance(inst));
> +
> +out_put_alg:
> +       crypto_mod_put(alg);
> +       return err;
> +}
> +
> +static struct crypto_template crypto_cbcmac_tmpl = {
> +       .name = "cbcmac",
> +       .create = cbcmac_create,
> +       .free = shash_free_instance,
> +       .module = THIS_MODULE,
> +};
> +
>  static int __init crypto_ccm_module_init(void)
>  {
>         int err;
>
> -       err = crypto_register_template(&crypto_ccm_base_tmpl);
> +       err = crypto_register_template(&crypto_cbcmac_tmpl);
>         if (err)
>                 goto out;
>
> +       err = crypto_register_template(&crypto_ccm_base_tmpl);
> +       if (err)
> +               goto out_undo_cbcmac;
> +
>         err = crypto_register_template(&crypto_ccm_tmpl);
>         if (err)
>                 goto out_undo_base;
> @@ -922,6 +1020,8 @@ static int __init crypto_ccm_module_init(void)
>         crypto_unregister_template(&crypto_ccm_tmpl);
>  out_undo_base:
>         crypto_unregister_template(&crypto_ccm_base_tmpl);
> +out_undo_cbcmac:
> +       crypto_register_template(&crypto_cbcmac_tmpl);
>         goto out;
>  }
>
> @@ -930,6 +1030,7 @@ static void __exit crypto_ccm_module_exit(void)
>         crypto_unregister_template(&crypto_rfc4309_tmpl);
>         crypto_unregister_template(&crypto_ccm_tmpl);
>         crypto_unregister_template(&crypto_ccm_base_tmpl);
> +       crypto_unregister_template(&crypto_cbcmac_tmpl);
>  }
>
>  module_init(crypto_ccm_module_init);
> --
> 2.7.4
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2017-01-27  9:41 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-26 17:17 [PATCH 0/4] crypto: time invariant AES for CCM (and GCM/CTR) Ard Biesheuvel
2017-01-26 17:17 ` [PATCH 1/4] crypto: testmgr - add test cases for cbcmac(aes) Ard Biesheuvel
2017-01-26 17:17 ` [PATCH 2/4] crypto: ccm - switch to separate cbcmac driver Ard Biesheuvel
2017-01-27  9:41   ` Ard Biesheuvel
2017-01-26 17:17 ` [PATCH 3/4] crypto: arm64/aes - add NEON and Crypto Extension CBC-MAC driver Ard Biesheuvel
2017-01-26 17:17 ` [RFC PATCH 4/4] crypto: aes - add generic time invariant AES for CTR/CCM/GCM Ard Biesheuvel
2017-01-26 18:35   ` Krzysztof Kwiatkowski
2017-01-26 18:45     ` Ard Biesheuvel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).