linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] crypto: arm64 - expose PMULL variants as separate algos
@ 2019-01-25  9:36 Ard Biesheuvel
  2019-01-25  9:36 ` [PATCH 1/2] crypto: arm64/crct10dif - register " Ard Biesheuvel
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Ard Biesheuvel @ 2019-01-25  9:36 UTC (permalink / raw)
  To: linux-crypto; +Cc: ebiggers, herbert, linux-arm-kernel, Ard Biesheuvel

Both the GHASH and the CRC-T10DIF SIMD routines are based on 64-bit
polynomial multiplication, but fall back to the slower 8-bit polynomial
instructions if the optional 64-bit PMULL instruction is not implemented.

This means we can only ever test one of the two on a single system,
which is unfortunate. So instead, register both versions on hardware
that supports them, and let the crypto API decide which is the preferred
one based on priorities. That way, both will be tested, but only the
fastest available one will be used.

Ard Biesheuvel (2):
  crypto: arm64/crct10dif - register PMULL variants as separate algos
  crypto: arm64/ghash - register PMULL variants as separate algos

 arch/arm64/crypto/crct10dif-ce-glue.c |  54 +++++++--
 arch/arm64/crypto/ghash-ce-glue.c     | 118 +++++++++++++++-----
 2 files changed, 132 insertions(+), 40 deletions(-)

-- 
2.17.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/2] crypto: arm64/crct10dif - register PMULL variants as separate algos
  2019-01-25  9:36 [PATCH 0/2] crypto: arm64 - expose PMULL variants as separate algos Ard Biesheuvel
@ 2019-01-25  9:36 ` Ard Biesheuvel
  2019-01-25  9:36 ` [PATCH 2/2] crypto: arm64/ghash " Ard Biesheuvel
  2019-02-01  6:51 ` [PATCH 0/2] crypto: arm64 - expose " Herbert Xu
  2 siblings, 0 replies; 4+ messages in thread
From: Ard Biesheuvel @ 2019-01-25  9:36 UTC (permalink / raw)
  To: linux-crypto; +Cc: ebiggers, herbert, linux-arm-kernel, Ard Biesheuvel

The arm64 CRC-T10DIF implementation either uses 8-bit or 64-bit
polynomial multiplication instructions, since the latter are
faster but not mandatory in the architecture.

Since that prevents us from testing both implementations on the
same system, let's expose both implementations to the crypto API,
with the priorities reflecting that the P64 version is the
preferred one if available.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm64/crypto/crct10dif-ce-glue.c | 54 +++++++++++++++-----
 1 file changed, 42 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c
index 567c24f3d224..242757cc6da9 100644
--- a/arch/arm64/crypto/crct10dif-ce-glue.c
+++ b/arch/arm64/crypto/crct10dif-ce-glue.c
@@ -25,8 +25,6 @@
 asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 buf[], u64 len);
 asmlinkage u16 crc_t10dif_pmull_p8(u16 init_crc, const u8 buf[], u64 len);
 
-static u16 (*crc_t10dif_pmull)(u16 init_crc, const u8 buf[], u64 len);
-
 static int crct10dif_init(struct shash_desc *desc)
 {
 	u16 *crc = shash_desc_ctx(desc);
@@ -35,14 +33,30 @@ static int crct10dif_init(struct shash_desc *desc)
 	return 0;
 }
 
-static int crct10dif_update(struct shash_desc *desc, const u8 *data,
+static int crct10dif_update_pmull_p8(struct shash_desc *desc, const u8 *data,
+			    unsigned int length)
+{
+	u16 *crc = shash_desc_ctx(desc);
+
+	if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && may_use_simd()) {
+		kernel_neon_begin();
+		*crc = crc_t10dif_pmull_p8(*crc, data, length);
+		kernel_neon_end();
+	} else {
+		*crc = crc_t10dif_generic(*crc, data, length);
+	}
+
+	return 0;
+}
+
+static int crct10dif_update_pmull_p64(struct shash_desc *desc, const u8 *data,
 			    unsigned int length)
 {
 	u16 *crc = shash_desc_ctx(desc);
 
 	if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && may_use_simd()) {
 		kernel_neon_begin();
-		*crc = crc_t10dif_pmull(*crc, data, length);
+		*crc = crc_t10dif_pmull_p64(*crc, data, length);
 		kernel_neon_end();
 	} else {
 		*crc = crc_t10dif_generic(*crc, data, length);
@@ -59,10 +73,22 @@ static int crct10dif_final(struct shash_desc *desc, u8 *out)
 	return 0;
 }
 
-static struct shash_alg crc_t10dif_alg = {
+static struct shash_alg crc_t10dif_alg[] = {{
 	.digestsize		= CRC_T10DIF_DIGEST_SIZE,
 	.init			= crct10dif_init,
-	.update			= crct10dif_update,
+	.update			= crct10dif_update_pmull_p8,
+	.final			= crct10dif_final,
+	.descsize		= CRC_T10DIF_DIGEST_SIZE,
+
+	.base.cra_name		= "crct10dif",
+	.base.cra_driver_name	= "crct10dif-arm64-neon",
+	.base.cra_priority	= 100,
+	.base.cra_blocksize	= CRC_T10DIF_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+}, {
+	.digestsize		= CRC_T10DIF_DIGEST_SIZE,
+	.init			= crct10dif_init,
+	.update			= crct10dif_update_pmull_p64,
 	.final			= crct10dif_final,
 	.descsize		= CRC_T10DIF_DIGEST_SIZE,
 
@@ -71,21 +97,25 @@ static struct shash_alg crc_t10dif_alg = {
 	.base.cra_priority	= 200,
 	.base.cra_blocksize	= CRC_T10DIF_BLOCK_SIZE,
 	.base.cra_module	= THIS_MODULE,
-};
+}};
 
 static int __init crc_t10dif_mod_init(void)
 {
 	if (elf_hwcap & HWCAP_PMULL)
-		crc_t10dif_pmull = crc_t10dif_pmull_p64;
+		return crypto_register_shashes(crc_t10dif_alg,
+					       ARRAY_SIZE(crc_t10dif_alg));
 	else
-		crc_t10dif_pmull = crc_t10dif_pmull_p8;
-
-	return crypto_register_shash(&crc_t10dif_alg);
+		/* only register the first array element */
+		return crypto_register_shash(crc_t10dif_alg);
 }
 
 static void __exit crc_t10dif_mod_exit(void)
 {
-	crypto_unregister_shash(&crc_t10dif_alg);
+	if (elf_hwcap & HWCAP_PMULL)
+		crypto_unregister_shashes(crc_t10dif_alg,
+					  ARRAY_SIZE(crc_t10dif_alg));
+	else
+		crypto_unregister_shash(crc_t10dif_alg);
 }
 
 module_cpu_feature_match(ASIMD, crc_t10dif_mod_init);
-- 
2.17.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2] crypto: arm64/ghash - register PMULL variants as separate algos
  2019-01-25  9:36 [PATCH 0/2] crypto: arm64 - expose PMULL variants as separate algos Ard Biesheuvel
  2019-01-25  9:36 ` [PATCH 1/2] crypto: arm64/crct10dif - register " Ard Biesheuvel
@ 2019-01-25  9:36 ` Ard Biesheuvel
  2019-02-01  6:51 ` [PATCH 0/2] crypto: arm64 - expose " Herbert Xu
  2 siblings, 0 replies; 4+ messages in thread
From: Ard Biesheuvel @ 2019-01-25  9:36 UTC (permalink / raw)
  To: linux-crypto; +Cc: ebiggers, herbert, linux-arm-kernel, Ard Biesheuvel

The arm64 GHASH implementation either uses 8-bit or 64-bit
polynomial multiplication instructions, since the latter are
faster but not mandatory in the architecture.

Since that prevents us from testing both implementations on the
same system, let's expose both implementations to the crypto API,
with the priorities reflecting that the P64 version is the
preferred one if available.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm64/crypto/ghash-ce-glue.c | 118 +++++++++++++++-----
 1 file changed, 90 insertions(+), 28 deletions(-)

diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 067d8937d5af..791ad422c427 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -60,10 +60,6 @@ asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
 				      struct ghash_key const *k,
 				      const char *head);
 
-static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
-				  struct ghash_key const *k,
-				  const char *head);
-
 asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[],
 				  const u8 src[], struct ghash_key const *k,
 				  u8 ctr[], u32 const rk[], int rounds,
@@ -87,11 +83,15 @@ static int ghash_init(struct shash_desc *desc)
 }
 
 static void ghash_do_update(int blocks, u64 dg[], const char *src,
-			    struct ghash_key *key, const char *head)
+			    struct ghash_key *key, const char *head,
+			    void (*simd_update)(int blocks, u64 dg[],
+						const char *src,
+						struct ghash_key const *k,
+						const char *head))
 {
 	if (likely(may_use_simd())) {
 		kernel_neon_begin();
-		pmull_ghash_update(blocks, dg, src, key, head);
+		simd_update(blocks, dg, src, key, head);
 		kernel_neon_end();
 	} else {
 		be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
@@ -119,8 +119,12 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src,
 /* avoid hogging the CPU for too long */
 #define MAX_BLOCKS	(SZ_64K / GHASH_BLOCK_SIZE)
 
-static int ghash_update(struct shash_desc *desc, const u8 *src,
-			unsigned int len)
+static int __ghash_update(struct shash_desc *desc, const u8 *src,
+			  unsigned int len,
+			  void (*simd_update)(int blocks, u64 dg[],
+					      const char *src,
+					      struct ghash_key const *k,
+					      const char *head))
 {
 	struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
 	unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
@@ -146,7 +150,8 @@ static int ghash_update(struct shash_desc *desc, const u8 *src,
 			int chunk = min(blocks, MAX_BLOCKS);
 
 			ghash_do_update(chunk, ctx->digest, src, key,
-					partial ? ctx->buf : NULL);
+					partial ? ctx->buf : NULL,
+					simd_update);
 
 			blocks -= chunk;
 			src += chunk * GHASH_BLOCK_SIZE;
@@ -158,7 +163,19 @@ static int ghash_update(struct shash_desc *desc, const u8 *src,
 	return 0;
 }
 
-static int ghash_final(struct shash_desc *desc, u8 *dst)
+static int ghash_update_p8(struct shash_desc *desc, const u8 *src,
+			   unsigned int len)
+{
+	return __ghash_update(desc, src, len, pmull_ghash_update_p8);
+}
+
+static int ghash_update_p64(struct shash_desc *desc, const u8 *src,
+			    unsigned int len)
+{
+	return __ghash_update(desc, src, len, pmull_ghash_update_p64);
+}
+
+static int ghash_final_p8(struct shash_desc *desc, u8 *dst)
 {
 	struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
 	unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
@@ -168,7 +185,28 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
 
 		memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
 
-		ghash_do_update(1, ctx->digest, ctx->buf, key, NULL);
+		ghash_do_update(1, ctx->digest, ctx->buf, key, NULL,
+				pmull_ghash_update_p8);
+	}
+	put_unaligned_be64(ctx->digest[1], dst);
+	put_unaligned_be64(ctx->digest[0], dst + 8);
+
+	*ctx = (struct ghash_desc_ctx){};
+	return 0;
+}
+
+static int ghash_final_p64(struct shash_desc *desc, u8 *dst)
+{
+	struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+	unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+
+	if (partial) {
+		struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+
+		memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
+
+		ghash_do_update(1, ctx->digest, ctx->buf, key, NULL,
+				pmull_ghash_update_p64);
 	}
 	put_unaligned_be64(ctx->digest[1], dst);
 	put_unaligned_be64(ctx->digest[0], dst + 8);
@@ -224,7 +262,21 @@ static int ghash_setkey(struct crypto_shash *tfm,
 	return __ghash_setkey(key, inkey, keylen);
 }
 
-static struct shash_alg ghash_alg = {
+static struct shash_alg ghash_alg[] = {{
+	.base.cra_name		= "ghash",
+	.base.cra_driver_name	= "ghash-neon",
+	.base.cra_priority	= 100,
+	.base.cra_blocksize	= GHASH_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct ghash_key),
+	.base.cra_module	= THIS_MODULE,
+
+	.digestsize		= GHASH_DIGEST_SIZE,
+	.init			= ghash_init,
+	.update			= ghash_update_p8,
+	.final			= ghash_final_p8,
+	.setkey			= ghash_setkey,
+	.descsize		= sizeof(struct ghash_desc_ctx),
+}, {
 	.base.cra_name		= "ghash",
 	.base.cra_driver_name	= "ghash-ce",
 	.base.cra_priority	= 200,
@@ -234,11 +286,11 @@ static struct shash_alg ghash_alg = {
 
 	.digestsize		= GHASH_DIGEST_SIZE,
 	.init			= ghash_init,
-	.update			= ghash_update,
-	.final			= ghash_final,
+	.update			= ghash_update_p64,
+	.final			= ghash_final_p64,
 	.setkey			= ghash_setkey,
 	.descsize		= sizeof(struct ghash_desc_ctx),
-};
+}};
 
 static int num_rounds(struct crypto_aes_ctx *ctx)
 {
@@ -301,7 +353,8 @@ static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
 		int blocks = count / GHASH_BLOCK_SIZE;
 
 		ghash_do_update(blocks, dg, src, &ctx->ghash_key,
-				*buf_count ? buf : NULL);
+				*buf_count ? buf : NULL,
+				pmull_ghash_update_p64);
 
 		src += blocks * GHASH_BLOCK_SIZE;
 		count %= GHASH_BLOCK_SIZE;
@@ -345,7 +398,8 @@ static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
 
 	if (buf_count) {
 		memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count);
-		ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
+		ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL,
+				pmull_ghash_update_p64);
 	}
 }
 
@@ -358,7 +412,8 @@ static void gcm_final(struct aead_request *req, struct gcm_aes_ctx *ctx,
 	lengths.a = cpu_to_be64(req->assoclen * 8);
 	lengths.b = cpu_to_be64(cryptlen * 8);
 
-	ghash_do_update(1, dg, (void *)&lengths, &ctx->ghash_key, NULL);
+	ghash_do_update(1, dg, (void *)&lengths, &ctx->ghash_key, NULL,
+			pmull_ghash_update_p64);
 
 	put_unaligned_be64(dg[1], mac);
 	put_unaligned_be64(dg[0], mac + 8);
@@ -434,7 +489,7 @@ static int gcm_encrypt(struct aead_request *req)
 
 			ghash_do_update(walk.nbytes / AES_BLOCK_SIZE, dg,
 					walk.dst.virt.addr, &ctx->ghash_key,
-					NULL);
+					NULL, pmull_ghash_update_p64);
 
 			err = skcipher_walk_done(&walk,
 						 walk.nbytes % (2 * AES_BLOCK_SIZE));
@@ -469,7 +524,8 @@ static int gcm_encrypt(struct aead_request *req)
 
 		memcpy(buf, dst, nbytes);
 		memset(buf + nbytes, 0, GHASH_BLOCK_SIZE - nbytes);
-		ghash_do_update(!!nbytes, dg, buf, &ctx->ghash_key, head);
+		ghash_do_update(!!nbytes, dg, buf, &ctx->ghash_key, head,
+				pmull_ghash_update_p64);
 
 		err = skcipher_walk_done(&walk, 0);
 	}
@@ -558,7 +614,8 @@ static int gcm_decrypt(struct aead_request *req)
 			u8 *src = walk.src.virt.addr;
 
 			ghash_do_update(blocks, dg, walk.src.virt.addr,
-					&ctx->ghash_key, NULL);
+					&ctx->ghash_key, NULL,
+					pmull_ghash_update_p64);
 
 			do {
 				__aes_arm64_encrypt(ctx->aes_key.key_enc,
@@ -602,7 +659,8 @@ static int gcm_decrypt(struct aead_request *req)
 
 		memcpy(buf, src, nbytes);
 		memset(buf + nbytes, 0, GHASH_BLOCK_SIZE - nbytes);
-		ghash_do_update(!!nbytes, dg, buf, &ctx->ghash_key, head);
+		ghash_do_update(!!nbytes, dg, buf, &ctx->ghash_key, head,
+				pmull_ghash_update_p64);
 
 		crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, iv,
 			       walk.nbytes);
@@ -650,26 +708,30 @@ static int __init ghash_ce_mod_init(void)
 		return -ENODEV;
 
 	if (elf_hwcap & HWCAP_PMULL)
-		pmull_ghash_update = pmull_ghash_update_p64;
-
+		ret = crypto_register_shashes(ghash_alg,
+					      ARRAY_SIZE(ghash_alg));
 	else
-		pmull_ghash_update = pmull_ghash_update_p8;
+		/* only register the first array element */
+		ret = crypto_register_shash(ghash_alg);
 
-	ret = crypto_register_shash(&ghash_alg);
 	if (ret)
 		return ret;
 
 	if (elf_hwcap & HWCAP_PMULL) {
 		ret = crypto_register_aead(&gcm_aes_alg);
 		if (ret)
-			crypto_unregister_shash(&ghash_alg);
+			crypto_unregister_shashes(ghash_alg,
+						  ARRAY_SIZE(ghash_alg));
 	}
 	return ret;
 }
 
 static void __exit ghash_ce_mod_exit(void)
 {
-	crypto_unregister_shash(&ghash_alg);
+	if (elf_hwcap & HWCAP_PMULL)
+		crypto_unregister_shashes(ghash_alg, ARRAY_SIZE(ghash_alg));
+	else
+		crypto_unregister_shash(ghash_alg);
 	crypto_unregister_aead(&gcm_aes_alg);
 }
 
-- 
2.17.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 0/2] crypto: arm64 - expose PMULL variants as separate algos
  2019-01-25  9:36 [PATCH 0/2] crypto: arm64 - expose PMULL variants as separate algos Ard Biesheuvel
  2019-01-25  9:36 ` [PATCH 1/2] crypto: arm64/crct10dif - register " Ard Biesheuvel
  2019-01-25  9:36 ` [PATCH 2/2] crypto: arm64/ghash " Ard Biesheuvel
@ 2019-02-01  6:51 ` Herbert Xu
  2 siblings, 0 replies; 4+ messages in thread
From: Herbert Xu @ 2019-02-01  6:51 UTC (permalink / raw)
  To: Ard Biesheuvel; +Cc: ebiggers, linux-crypto, linux-arm-kernel

On Fri, Jan 25, 2019 at 10:36:25AM +0100, Ard Biesheuvel wrote:
> Both the GHASH and the CRC-T10DIF SIMD routines are based on 64-bit
> polynomial multiplication, but fall back to the slower 8-bit polynomial
> instructions if the optional 64-bit PMULL instruction is not implemented.
> 
> This means we can only ever test one of the two on a single system,
> which is unfortunate. So instead, register both versions on hardware
> that supports them, and let the crypto API decide which is the preferred
> one based on priorities. That way, both will be tested, but only the
> fastest available one will be used.
> 
> Ard Biesheuvel (2):
>   crypto: arm64/crct10dif - register PMULL variants as separate algos
>   crypto: arm64/ghash - register PMULL variants as separate algos
> 
>  arch/arm64/crypto/crct10dif-ce-glue.c |  54 +++++++--
>  arch/arm64/crypto/ghash-ce-glue.c     | 118 +++++++++++++++-----
>  2 files changed, 132 insertions(+), 40 deletions(-)

All applied.  Thanks.
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-02-01  6:51 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-25  9:36 [PATCH 0/2] crypto: arm64 - expose PMULL variants as separate algos Ard Biesheuvel
2019-01-25  9:36 ` [PATCH 1/2] crypto: arm64/crct10dif - register " Ard Biesheuvel
2019-01-25  9:36 ` [PATCH 2/2] crypto: arm64/ghash " Ard Biesheuvel
2019-02-01  6:51 ` [PATCH 0/2] crypto: arm64 - expose " Herbert Xu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).