linux-crypto.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] crypto: arm64/crc32 - merge CRC32 and PMULL instruction based drivers
@ 2017-02-01 15:35 Ard Biesheuvel
  2017-02-02 16:47 ` Matthias Brugger
  2017-02-11 10:51 ` Herbert Xu
  0 siblings, 2 replies; 3+ messages in thread
From: Ard Biesheuvel @ 2017-02-01 15:35 UTC (permalink / raw)
  To: linux-arm-kernel, linux-crypto
  Cc: will.deacon, herbert, yazen.ghannam, steve.capper, agraf,
	mbrugger, Ard Biesheuvel

The PMULL based CRC32 implementation already contains code based on the
separate, optional CRC32 instructions to fallback to when operating on
small quantities of data. We can expose these routines directly on systems
that lack the 64x64 PMULL instructions but do implement the CRC32 ones,
which makes the driver that is based solely on those CRC32 instructions
redundant. So remove it.

Note that this aligns arm64 with ARM, whose accelerated CRC32 driver
also combines the CRC32 extension based and the PMULL based versions.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---

This is a meaningful patch by itself imho, but also fixes the issue reported
by Matthias where their v4.8 based GCC does not understand the -mcpu=generic+crc
command line option, resulting in failed builds.

 arch/arm64/configs/defconfig      |   1 -
 arch/arm64/crypto/Kconfig         |   9 +-
 arch/arm64/crypto/Makefile        |   4 -
 arch/arm64/crypto/crc32-arm64.c   | 290 --------------------
 arch/arm64/crypto/crc32-ce-glue.c |  49 +++-
 5 files changed, 41 insertions(+), 312 deletions(-)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 33b744d54739..6fc6f5a2a6e5 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -516,4 +516,3 @@ CONFIG_CRYPTO_GHASH_ARM64_CE=y
 CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
 CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
 # CONFIG_CRYPTO_AES_ARM64_NEON_BLK is not set
-CONFIG_CRYPTO_CRC32_ARM64=y
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index bed7feddfeed..d92293747d63 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -37,8 +37,8 @@ config CRYPTO_CRCT10DIF_ARM64_CE
 	select CRYPTO_HASH
 
 config CRYPTO_CRC32_ARM64_CE
-	tristate "CRC32 and CRC32C digest algorithms using PMULL instructions"
-	depends on KERNEL_MODE_NEON && CRC32
+	tristate "CRC32 and CRC32C digest algorithms using ARMv8 extensions"
+	depends on CRC32
 	select CRYPTO_HASH
 
 config CRYPTO_AES_ARM64
@@ -71,11 +71,6 @@ config CRYPTO_AES_ARM64_NEON_BLK
 	select CRYPTO_AES
 	select CRYPTO_SIMD
 
-config CRYPTO_CRC32_ARM64
-	tristate "CRC32 and CRC32C using optional ARMv8 instructions"
-	depends on ARM64
-	select CRYPTO_HASH
-
 config CRYPTO_CHACHA20_NEON
 	tristate "NEON accelerated ChaCha20 symmetric cipher"
 	depends on KERNEL_MODE_NEON
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index d1ae1b9cbe70..b5edc5918c28 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -55,10 +55,6 @@ AFLAGS_aes-neon.o	:= -DINTERLEAVE=4
 
 CFLAGS_aes-glue-ce.o	:= -DUSE_V8_CRYPTO_EXTENSIONS
 
-obj-$(CONFIG_CRYPTO_CRC32_ARM64) += crc32-arm64.o
-
-CFLAGS_crc32-arm64.o	:= -mcpu=generic+crc
-
 $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
 	$(call if_changed_rule,cc_o_c)
 
diff --git a/arch/arm64/crypto/crc32-arm64.c b/arch/arm64/crypto/crc32-arm64.c
deleted file mode 100644
index 6a37c3c6b11d..000000000000
--- a/arch/arm64/crypto/crc32-arm64.c
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * crc32-arm64.c - CRC32 and CRC32C using optional ARMv8 instructions
- *
- * Module based on crypto/crc32c_generic.c
- *
- * CRC32 loop taken from Ed Nevill's Hadoop CRC patch
- * http://mail-archives.apache.org/mod_mbox/hadoop-common-dev/201406.mbox/%3C1403687030.3355.19.camel%40localhost.localdomain%3E
- *
- * Using inline assembly instead of intrinsics in order to be backwards
- * compatible with older compilers.
- *
- * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/unaligned/access_ok.h>
-#include <linux/cpufeature.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-
-#include <crypto/internal/hash.h>
-
-MODULE_AUTHOR("Yazen Ghannam <yazen.ghannam@linaro.org>");
-MODULE_DESCRIPTION("CRC32 and CRC32C using optional ARMv8 instructions");
-MODULE_LICENSE("GPL v2");
-
-#define CRC32X(crc, value) __asm__("crc32x %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
-#define CRC32W(crc, value) __asm__("crc32w %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
-#define CRC32H(crc, value) __asm__("crc32h %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
-#define CRC32B(crc, value) __asm__("crc32b %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
-#define CRC32CX(crc, value) __asm__("crc32cx %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
-#define CRC32CW(crc, value) __asm__("crc32cw %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
-#define CRC32CH(crc, value) __asm__("crc32ch %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
-#define CRC32CB(crc, value) __asm__("crc32cb %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
-
-static u32 crc32_arm64_le_hw(u32 crc, const u8 *p, unsigned int len)
-{
-	s64 length = len;
-
-	while ((length -= sizeof(u64)) >= 0) {
-		CRC32X(crc, get_unaligned_le64(p));
-		p += sizeof(u64);
-	}
-
-	/* The following is more efficient than the straight loop */
-	if (length & sizeof(u32)) {
-		CRC32W(crc, get_unaligned_le32(p));
-		p += sizeof(u32);
-	}
-	if (length & sizeof(u16)) {
-		CRC32H(crc, get_unaligned_le16(p));
-		p += sizeof(u16);
-	}
-	if (length & sizeof(u8))
-		CRC32B(crc, *p);
-
-	return crc;
-}
-
-static u32 crc32c_arm64_le_hw(u32 crc, const u8 *p, unsigned int len)
-{
-	s64 length = len;
-
-	while ((length -= sizeof(u64)) >= 0) {
-		CRC32CX(crc, get_unaligned_le64(p));
-		p += sizeof(u64);
-	}
-
-	/* The following is more efficient than the straight loop */
-	if (length & sizeof(u32)) {
-		CRC32CW(crc, get_unaligned_le32(p));
-		p += sizeof(u32);
-	}
-	if (length & sizeof(u16)) {
-		CRC32CH(crc, get_unaligned_le16(p));
-		p += sizeof(u16);
-	}
-	if (length & sizeof(u8))
-		CRC32CB(crc, *p);
-
-	return crc;
-}
-
-#define CHKSUM_BLOCK_SIZE	1
-#define CHKSUM_DIGEST_SIZE	4
-
-struct chksum_ctx {
-	u32 key;
-};
-
-struct chksum_desc_ctx {
-	u32 crc;
-};
-
-static int chksum_init(struct shash_desc *desc)
-{
-	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	ctx->crc = mctx->key;
-
-	return 0;
-}
-
-/*
- * Setting the seed allows arbitrary accumulators and flexible XOR policy
- * If your algorithm starts with ~0, then XOR with ~0 before you set
- * the seed.
- */
-static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
-			 unsigned int keylen)
-{
-	struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
-
-	if (keylen != sizeof(mctx->key)) {
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-	mctx->key = get_unaligned_le32(key);
-	return 0;
-}
-
-static int chksum_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int length)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	ctx->crc = crc32_arm64_le_hw(ctx->crc, data, length);
-	return 0;
-}
-
-static int chksumc_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int length)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	ctx->crc = crc32c_arm64_le_hw(ctx->crc, data, length);
-	return 0;
-}
-
-static int chksum_final(struct shash_desc *desc, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	put_unaligned_le32(ctx->crc, out);
-	return 0;
-}
-
-static int chksumc_final(struct shash_desc *desc, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	put_unaligned_le32(~ctx->crc, out);
-	return 0;
-}
-
-static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
-{
-	put_unaligned_le32(crc32_arm64_le_hw(crc, data, len), out);
-	return 0;
-}
-
-static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
-{
-	put_unaligned_le32(~crc32c_arm64_le_hw(crc, data, len), out);
-	return 0;
-}
-
-static int chksum_finup(struct shash_desc *desc, const u8 *data,
-			unsigned int len, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	return __chksum_finup(ctx->crc, data, len, out);
-}
-
-static int chksumc_finup(struct shash_desc *desc, const u8 *data,
-			unsigned int len, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	return __chksumc_finup(ctx->crc, data, len, out);
-}
-
-static int chksum_digest(struct shash_desc *desc, const u8 *data,
-			 unsigned int length, u8 *out)
-{
-	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
-
-	return __chksum_finup(mctx->key, data, length, out);
-}
-
-static int chksumc_digest(struct shash_desc *desc, const u8 *data,
-			 unsigned int length, u8 *out)
-{
-	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
-
-	return __chksumc_finup(mctx->key, data, length, out);
-}
-
-static int crc32_cra_init(struct crypto_tfm *tfm)
-{
-	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
-
-	mctx->key = 0;
-	return 0;
-}
-
-static int crc32c_cra_init(struct crypto_tfm *tfm)
-{
-	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
-
-	mctx->key = ~0;
-	return 0;
-}
-
-static struct shash_alg crc32_alg = {
-	.digestsize		=	CHKSUM_DIGEST_SIZE,
-	.setkey			=	chksum_setkey,
-	.init			=	chksum_init,
-	.update			=	chksum_update,
-	.final			=	chksum_final,
-	.finup			=	chksum_finup,
-	.digest			=	chksum_digest,
-	.descsize		=	sizeof(struct chksum_desc_ctx),
-	.base			=	{
-		.cra_name		=	"crc32",
-		.cra_driver_name	=	"crc32-arm64-hw",
-		.cra_priority		=	300,
-		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
-		.cra_alignmask		=	0,
-		.cra_ctxsize		=	sizeof(struct chksum_ctx),
-		.cra_module		=	THIS_MODULE,
-		.cra_init		=	crc32_cra_init,
-	}
-};
-
-static struct shash_alg crc32c_alg = {
-	.digestsize		=	CHKSUM_DIGEST_SIZE,
-	.setkey			=	chksum_setkey,
-	.init			=	chksum_init,
-	.update			=	chksumc_update,
-	.final			=	chksumc_final,
-	.finup			=	chksumc_finup,
-	.digest			=	chksumc_digest,
-	.descsize		=	sizeof(struct chksum_desc_ctx),
-	.base			=	{
-		.cra_name		=	"crc32c",
-		.cra_driver_name	=	"crc32c-arm64-hw",
-		.cra_priority		=	300,
-		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
-		.cra_alignmask		=	0,
-		.cra_ctxsize		=	sizeof(struct chksum_ctx),
-		.cra_module		=	THIS_MODULE,
-		.cra_init		=	crc32c_cra_init,
-	}
-};
-
-static int __init crc32_mod_init(void)
-{
-	int err;
-
-	err = crypto_register_shash(&crc32_alg);
-
-	if (err)
-		return err;
-
-	err = crypto_register_shash(&crc32c_alg);
-
-	if (err) {
-		crypto_unregister_shash(&crc32_alg);
-		return err;
-	}
-
-	return 0;
-}
-
-static void __exit crc32_mod_exit(void)
-{
-	crypto_unregister_shash(&crc32_alg);
-	crypto_unregister_shash(&crc32c_alg);
-}
-
-module_cpu_feature_match(CRC32, crc32_mod_init);
-module_exit(crc32_mod_exit);
diff --git a/arch/arm64/crypto/crc32-ce-glue.c b/arch/arm64/crypto/crc32-ce-glue.c
index 8594127d5e01..eccb1ae90064 100644
--- a/arch/arm64/crypto/crc32-ce-glue.c
+++ b/arch/arm64/crypto/crc32-ce-glue.c
@@ -72,6 +72,24 @@ static int crc32_pmull_init(struct shash_desc *desc)
 	return 0;
 }
 
+static int crc32_update(struct shash_desc *desc, const u8 *data,
+			unsigned int length)
+{
+	u32 *crc = shash_desc_ctx(desc);
+
+	*crc = crc32_armv8_le(*crc, data, length);
+	return 0;
+}
+
+static int crc32c_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int length)
+{
+	u32 *crc = shash_desc_ctx(desc);
+
+	*crc = crc32c_armv8_le(*crc, data, length);
+	return 0;
+}
+
 static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
 			 unsigned int length)
 {
@@ -156,7 +174,7 @@ static int crc32c_pmull_final(struct shash_desc *desc, u8 *out)
 static struct shash_alg crc32_pmull_algs[] = { {
 	.setkey			= crc32_pmull_setkey,
 	.init			= crc32_pmull_init,
-	.update			= crc32_pmull_update,
+	.update			= crc32_update,
 	.final			= crc32_pmull_final,
 	.descsize		= sizeof(u32),
 	.digestsize		= sizeof(u32),
@@ -171,7 +189,7 @@ static struct shash_alg crc32_pmull_algs[] = { {
 }, {
 	.setkey			= crc32_pmull_setkey,
 	.init			= crc32_pmull_init,
-	.update			= crc32c_pmull_update,
+	.update			= crc32c_update,
 	.final			= crc32c_pmull_final,
 	.descsize		= sizeof(u32),
 	.digestsize		= sizeof(u32),
@@ -187,14 +205,20 @@ static struct shash_alg crc32_pmull_algs[] = { {
 
 static int __init crc32_pmull_mod_init(void)
 {
-	if (elf_hwcap & HWCAP_CRC32) {
-		fallback_crc32 = crc32_armv8_le;
-		fallback_crc32c = crc32c_armv8_le;
-	} else {
-		fallback_crc32 = crc32_le;
-		fallback_crc32c = __crc32c_le;
+	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_PMULL)) {
+		crc32_pmull_algs[0].update = crc32_pmull_update;
+		crc32_pmull_algs[1].update = crc32c_pmull_update;
+
+		if (elf_hwcap & HWCAP_CRC32) {
+			fallback_crc32 = crc32_armv8_le;
+			fallback_crc32c = crc32c_armv8_le;
+		} else {
+			fallback_crc32 = crc32_le;
+			fallback_crc32c = __crc32c_le;
+		}
+	} else if (!(elf_hwcap & HWCAP_CRC32)) {
+		return -ENODEV;
 	}
-
 	return crypto_register_shashes(crc32_pmull_algs,
 				       ARRAY_SIZE(crc32_pmull_algs));
 }
@@ -205,7 +229,12 @@ static void __exit crc32_pmull_mod_exit(void)
 				  ARRAY_SIZE(crc32_pmull_algs));
 }
 
-module_cpu_feature_match(PMULL, crc32_pmull_mod_init);
+static const struct cpu_feature crc32_cpu_feature[] = {
+	{ cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
+};
+MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
+
+module_init(crc32_pmull_mod_init);
 module_exit(crc32_pmull_mod_exit);
 
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-- 
2.7.4

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] crypto: arm64/crc32 - merge CRC32 and PMULL instruction based drivers
  2017-02-01 15:35 [PATCH] crypto: arm64/crc32 - merge CRC32 and PMULL instruction based drivers Ard Biesheuvel
@ 2017-02-02 16:47 ` Matthias Brugger
  2017-02-11 10:51 ` Herbert Xu
  1 sibling, 0 replies; 3+ messages in thread
From: Matthias Brugger @ 2017-02-02 16:47 UTC (permalink / raw)
  To: Ard Biesheuvel, linux-arm-kernel, linux-crypto
  Cc: will.deacon, herbert, yazen.ghannam, steve.capper, agraf



On 02/01/2017 04:35 PM, Ard Biesheuvel wrote:
> The PMULL based CRC32 implementation already contains code based on the
> separate, optional CRC32 instructions to fallback to when operating on
> small quantities of data. We can expose these routines directly on systems
> that lack the 64x64 PMULL instructions but do implement the CRC32 ones,
> which makes the driver that is based solely on those CRC32 instructions
> redundant. So remove it.
>
> Note that this aligns arm64 with ARM, whose accelerated CRC32 driver
> also combines the CRC32 extension based and the PMULL based versions.
>
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
>
> This is a meaningful patch by itself imho, but also fixes the issue reported
> by Matthias where their v4.8 based GCC does not understand the -mcpu=generic+crc
> command line option, resulting in failed builds.
>

This patch fixes the issue, thanks.
Tested-by: Matthias Brugger <mbrugger@suse.com>

>  arch/arm64/configs/defconfig      |   1 -
>  arch/arm64/crypto/Kconfig         |   9 +-
>  arch/arm64/crypto/Makefile        |   4 -
>  arch/arm64/crypto/crc32-arm64.c   | 290 --------------------
>  arch/arm64/crypto/crc32-ce-glue.c |  49 +++-
>  5 files changed, 41 insertions(+), 312 deletions(-)
>
> diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
> index 33b744d54739..6fc6f5a2a6e5 100644
> --- a/arch/arm64/configs/defconfig
> +++ b/arch/arm64/configs/defconfig
> @@ -516,4 +516,3 @@ CONFIG_CRYPTO_GHASH_ARM64_CE=y
>  CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
>  CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
>  # CONFIG_CRYPTO_AES_ARM64_NEON_BLK is not set
> -CONFIG_CRYPTO_CRC32_ARM64=y
> diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
> index bed7feddfeed..d92293747d63 100644
> --- a/arch/arm64/crypto/Kconfig
> +++ b/arch/arm64/crypto/Kconfig
> @@ -37,8 +37,8 @@ config CRYPTO_CRCT10DIF_ARM64_CE
>  	select CRYPTO_HASH
>
>  config CRYPTO_CRC32_ARM64_CE
> -	tristate "CRC32 and CRC32C digest algorithms using PMULL instructions"
> -	depends on KERNEL_MODE_NEON && CRC32
> +	tristate "CRC32 and CRC32C digest algorithms using ARMv8 extensions"
> +	depends on CRC32
>  	select CRYPTO_HASH
>
>  config CRYPTO_AES_ARM64
> @@ -71,11 +71,6 @@ config CRYPTO_AES_ARM64_NEON_BLK
>  	select CRYPTO_AES
>  	select CRYPTO_SIMD
>
> -config CRYPTO_CRC32_ARM64
> -	tristate "CRC32 and CRC32C using optional ARMv8 instructions"
> -	depends on ARM64
> -	select CRYPTO_HASH
> -
>  config CRYPTO_CHACHA20_NEON
>  	tristate "NEON accelerated ChaCha20 symmetric cipher"
>  	depends on KERNEL_MODE_NEON
> diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
> index d1ae1b9cbe70..b5edc5918c28 100644
> --- a/arch/arm64/crypto/Makefile
> +++ b/arch/arm64/crypto/Makefile
> @@ -55,10 +55,6 @@ AFLAGS_aes-neon.o	:= -DINTERLEAVE=4
>
>  CFLAGS_aes-glue-ce.o	:= -DUSE_V8_CRYPTO_EXTENSIONS
>
> -obj-$(CONFIG_CRYPTO_CRC32_ARM64) += crc32-arm64.o
> -
> -CFLAGS_crc32-arm64.o	:= -mcpu=generic+crc
> -
>  $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
>  	$(call if_changed_rule,cc_o_c)
>
> diff --git a/arch/arm64/crypto/crc32-arm64.c b/arch/arm64/crypto/crc32-arm64.c
> deleted file mode 100644
> index 6a37c3c6b11d..000000000000
> --- a/arch/arm64/crypto/crc32-arm64.c
> +++ /dev/null
> @@ -1,290 +0,0 @@
> -/*
> - * crc32-arm64.c - CRC32 and CRC32C using optional ARMv8 instructions
> - *
> - * Module based on crypto/crc32c_generic.c
> - *
> - * CRC32 loop taken from Ed Nevill's Hadoop CRC patch
> - * http://mail-archives.apache.org/mod_mbox/hadoop-common-dev/201406.mbox/%3C1403687030.3355.19.camel%40localhost.localdomain%3E
> - *
> - * Using inline assembly instead of intrinsics in order to be backwards
> - * compatible with older compilers.
> - *
> - * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License version 2 as
> - * published by the Free Software Foundation.
> - */
> -
> -#include <linux/unaligned/access_ok.h>
> -#include <linux/cpufeature.h>
> -#include <linux/init.h>
> -#include <linux/kernel.h>
> -#include <linux/module.h>
> -#include <linux/string.h>
> -
> -#include <crypto/internal/hash.h>
> -
> -MODULE_AUTHOR("Yazen Ghannam <yazen.ghannam@linaro.org>");
> -MODULE_DESCRIPTION("CRC32 and CRC32C using optional ARMv8 instructions");
> -MODULE_LICENSE("GPL v2");
> -
> -#define CRC32X(crc, value) __asm__("crc32x %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
> -#define CRC32W(crc, value) __asm__("crc32w %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
> -#define CRC32H(crc, value) __asm__("crc32h %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
> -#define CRC32B(crc, value) __asm__("crc32b %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
> -#define CRC32CX(crc, value) __asm__("crc32cx %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
> -#define CRC32CW(crc, value) __asm__("crc32cw %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
> -#define CRC32CH(crc, value) __asm__("crc32ch %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
> -#define CRC32CB(crc, value) __asm__("crc32cb %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
> -
> -static u32 crc32_arm64_le_hw(u32 crc, const u8 *p, unsigned int len)
> -{
> -	s64 length = len;
> -
> -	while ((length -= sizeof(u64)) >= 0) {
> -		CRC32X(crc, get_unaligned_le64(p));
> -		p += sizeof(u64);
> -	}
> -
> -	/* The following is more efficient than the straight loop */
> -	if (length & sizeof(u32)) {
> -		CRC32W(crc, get_unaligned_le32(p));
> -		p += sizeof(u32);
> -	}
> -	if (length & sizeof(u16)) {
> -		CRC32H(crc, get_unaligned_le16(p));
> -		p += sizeof(u16);
> -	}
> -	if (length & sizeof(u8))
> -		CRC32B(crc, *p);
> -
> -	return crc;
> -}
> -
> -static u32 crc32c_arm64_le_hw(u32 crc, const u8 *p, unsigned int len)
> -{
> -	s64 length = len;
> -
> -	while ((length -= sizeof(u64)) >= 0) {
> -		CRC32CX(crc, get_unaligned_le64(p));
> -		p += sizeof(u64);
> -	}
> -
> -	/* The following is more efficient than the straight loop */
> -	if (length & sizeof(u32)) {
> -		CRC32CW(crc, get_unaligned_le32(p));
> -		p += sizeof(u32);
> -	}
> -	if (length & sizeof(u16)) {
> -		CRC32CH(crc, get_unaligned_le16(p));
> -		p += sizeof(u16);
> -	}
> -	if (length & sizeof(u8))
> -		CRC32CB(crc, *p);
> -
> -	return crc;
> -}
> -
> -#define CHKSUM_BLOCK_SIZE	1
> -#define CHKSUM_DIGEST_SIZE	4
> -
> -struct chksum_ctx {
> -	u32 key;
> -};
> -
> -struct chksum_desc_ctx {
> -	u32 crc;
> -};
> -
> -static int chksum_init(struct shash_desc *desc)
> -{
> -	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
> -	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
> -
> -	ctx->crc = mctx->key;
> -
> -	return 0;
> -}
> -
> -/*
> - * Setting the seed allows arbitrary accumulators and flexible XOR policy
> - * If your algorithm starts with ~0, then XOR with ~0 before you set
> - * the seed.
> - */
> -static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
> -			 unsigned int keylen)
> -{
> -	struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
> -
> -	if (keylen != sizeof(mctx->key)) {
> -		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
> -		return -EINVAL;
> -	}
> -	mctx->key = get_unaligned_le32(key);
> -	return 0;
> -}
> -
> -static int chksum_update(struct shash_desc *desc, const u8 *data,
> -			 unsigned int length)
> -{
> -	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
> -
> -	ctx->crc = crc32_arm64_le_hw(ctx->crc, data, length);
> -	return 0;
> -}
> -
> -static int chksumc_update(struct shash_desc *desc, const u8 *data,
> -			 unsigned int length)
> -{
> -	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
> -
> -	ctx->crc = crc32c_arm64_le_hw(ctx->crc, data, length);
> -	return 0;
> -}
> -
> -static int chksum_final(struct shash_desc *desc, u8 *out)
> -{
> -	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
> -
> -	put_unaligned_le32(ctx->crc, out);
> -	return 0;
> -}
> -
> -static int chksumc_final(struct shash_desc *desc, u8 *out)
> -{
> -	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
> -
> -	put_unaligned_le32(~ctx->crc, out);
> -	return 0;
> -}
> -
> -static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
> -{
> -	put_unaligned_le32(crc32_arm64_le_hw(crc, data, len), out);
> -	return 0;
> -}
> -
> -static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
> -{
> -	put_unaligned_le32(~crc32c_arm64_le_hw(crc, data, len), out);
> -	return 0;
> -}
> -
> -static int chksum_finup(struct shash_desc *desc, const u8 *data,
> -			unsigned int len, u8 *out)
> -{
> -	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
> -
> -	return __chksum_finup(ctx->crc, data, len, out);
> -}
> -
> -static int chksumc_finup(struct shash_desc *desc, const u8 *data,
> -			unsigned int len, u8 *out)
> -{
> -	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
> -
> -	return __chksumc_finup(ctx->crc, data, len, out);
> -}
> -
> -static int chksum_digest(struct shash_desc *desc, const u8 *data,
> -			 unsigned int length, u8 *out)
> -{
> -	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
> -
> -	return __chksum_finup(mctx->key, data, length, out);
> -}
> -
> -static int chksumc_digest(struct shash_desc *desc, const u8 *data,
> -			 unsigned int length, u8 *out)
> -{
> -	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
> -
> -	return __chksumc_finup(mctx->key, data, length, out);
> -}
> -
> -static int crc32_cra_init(struct crypto_tfm *tfm)
> -{
> -	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
> -
> -	mctx->key = 0;
> -	return 0;
> -}
> -
> -static int crc32c_cra_init(struct crypto_tfm *tfm)
> -{
> -	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
> -
> -	mctx->key = ~0;
> -	return 0;
> -}
> -
> -static struct shash_alg crc32_alg = {
> -	.digestsize		=	CHKSUM_DIGEST_SIZE,
> -	.setkey			=	chksum_setkey,
> -	.init			=	chksum_init,
> -	.update			=	chksum_update,
> -	.final			=	chksum_final,
> -	.finup			=	chksum_finup,
> -	.digest			=	chksum_digest,
> -	.descsize		=	sizeof(struct chksum_desc_ctx),
> -	.base			=	{
> -		.cra_name		=	"crc32",
> -		.cra_driver_name	=	"crc32-arm64-hw",
> -		.cra_priority		=	300,
> -		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
> -		.cra_alignmask		=	0,
> -		.cra_ctxsize		=	sizeof(struct chksum_ctx),
> -		.cra_module		=	THIS_MODULE,
> -		.cra_init		=	crc32_cra_init,
> -	}
> -};
> -
> -static struct shash_alg crc32c_alg = {
> -	.digestsize		=	CHKSUM_DIGEST_SIZE,
> -	.setkey			=	chksum_setkey,
> -	.init			=	chksum_init,
> -	.update			=	chksumc_update,
> -	.final			=	chksumc_final,
> -	.finup			=	chksumc_finup,
> -	.digest			=	chksumc_digest,
> -	.descsize		=	sizeof(struct chksum_desc_ctx),
> -	.base			=	{
> -		.cra_name		=	"crc32c",
> -		.cra_driver_name	=	"crc32c-arm64-hw",
> -		.cra_priority		=	300,
> -		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
> -		.cra_alignmask		=	0,
> -		.cra_ctxsize		=	sizeof(struct chksum_ctx),
> -		.cra_module		=	THIS_MODULE,
> -		.cra_init		=	crc32c_cra_init,
> -	}
> -};
> -
> -static int __init crc32_mod_init(void)
> -{
> -	int err;
> -
> -	err = crypto_register_shash(&crc32_alg);
> -
> -	if (err)
> -		return err;
> -
> -	err = crypto_register_shash(&crc32c_alg);
> -
> -	if (err) {
> -		crypto_unregister_shash(&crc32_alg);
> -		return err;
> -	}
> -
> -	return 0;
> -}
> -
> -static void __exit crc32_mod_exit(void)
> -{
> -	crypto_unregister_shash(&crc32_alg);
> -	crypto_unregister_shash(&crc32c_alg);
> -}
> -
> -module_cpu_feature_match(CRC32, crc32_mod_init);
> -module_exit(crc32_mod_exit);
> diff --git a/arch/arm64/crypto/crc32-ce-glue.c b/arch/arm64/crypto/crc32-ce-glue.c
> index 8594127d5e01..eccb1ae90064 100644
> --- a/arch/arm64/crypto/crc32-ce-glue.c
> +++ b/arch/arm64/crypto/crc32-ce-glue.c
> @@ -72,6 +72,24 @@ static int crc32_pmull_init(struct shash_desc *desc)
>  	return 0;
>  }
>
> +static int crc32_update(struct shash_desc *desc, const u8 *data,
> +			unsigned int length)
> +{
> +	u32 *crc = shash_desc_ctx(desc);
> +
> +	*crc = crc32_armv8_le(*crc, data, length);
> +	return 0;
> +}
> +
> +static int crc32c_update(struct shash_desc *desc, const u8 *data,
> +			 unsigned int length)
> +{
> +	u32 *crc = shash_desc_ctx(desc);
> +
> +	*crc = crc32c_armv8_le(*crc, data, length);
> +	return 0;
> +}
> +
>  static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
>  			 unsigned int length)
>  {
> @@ -156,7 +174,7 @@ static int crc32c_pmull_final(struct shash_desc *desc, u8 *out)
>  static struct shash_alg crc32_pmull_algs[] = { {
>  	.setkey			= crc32_pmull_setkey,
>  	.init			= crc32_pmull_init,
> -	.update			= crc32_pmull_update,
> +	.update			= crc32_update,
>  	.final			= crc32_pmull_final,
>  	.descsize		= sizeof(u32),
>  	.digestsize		= sizeof(u32),
> @@ -171,7 +189,7 @@ static struct shash_alg crc32_pmull_algs[] = { {
>  }, {
>  	.setkey			= crc32_pmull_setkey,
>  	.init			= crc32_pmull_init,
> -	.update			= crc32c_pmull_update,
> +	.update			= crc32c_update,
>  	.final			= crc32c_pmull_final,
>  	.descsize		= sizeof(u32),
>  	.digestsize		= sizeof(u32),
> @@ -187,14 +205,20 @@ static struct shash_alg crc32_pmull_algs[] = { {
>
>  static int __init crc32_pmull_mod_init(void)
>  {
> -	if (elf_hwcap & HWCAP_CRC32) {
> -		fallback_crc32 = crc32_armv8_le;
> -		fallback_crc32c = crc32c_armv8_le;
> -	} else {
> -		fallback_crc32 = crc32_le;
> -		fallback_crc32c = __crc32c_le;
> +	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_PMULL)) {
> +		crc32_pmull_algs[0].update = crc32_pmull_update;
> +		crc32_pmull_algs[1].update = crc32c_pmull_update;
> +
> +		if (elf_hwcap & HWCAP_CRC32) {
> +			fallback_crc32 = crc32_armv8_le;
> +			fallback_crc32c = crc32c_armv8_le;
> +		} else {
> +			fallback_crc32 = crc32_le;
> +			fallback_crc32c = __crc32c_le;
> +		}
> +	} else if (!(elf_hwcap & HWCAP_CRC32)) {
> +		return -ENODEV;
>  	}
> -
>  	return crypto_register_shashes(crc32_pmull_algs,
>  				       ARRAY_SIZE(crc32_pmull_algs));
>  }
> @@ -205,7 +229,12 @@ static void __exit crc32_pmull_mod_exit(void)
>  				  ARRAY_SIZE(crc32_pmull_algs));
>  }
>
> -module_cpu_feature_match(PMULL, crc32_pmull_mod_init);
> +static const struct cpu_feature crc32_cpu_feature[] = {
> +	{ cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
> +};
> +MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
> +
> +module_init(crc32_pmull_mod_init);
>  module_exit(crc32_pmull_mod_exit);
>
>  MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] crypto: arm64/crc32 - merge CRC32 and PMULL instruction based drivers
  2017-02-01 15:35 [PATCH] crypto: arm64/crc32 - merge CRC32 and PMULL instruction based drivers Ard Biesheuvel
  2017-02-02 16:47 ` Matthias Brugger
@ 2017-02-11 10:51 ` Herbert Xu
  1 sibling, 0 replies; 3+ messages in thread
From: Herbert Xu @ 2017-02-11 10:51 UTC (permalink / raw)
  To: Ard Biesheuvel
  Cc: linux-arm-kernel, linux-crypto, will.deacon, yazen.ghannam,
	steve.capper, agraf, mbrugger

On Wed, Feb 01, 2017 at 03:35:40PM +0000, Ard Biesheuvel wrote:
> The PMULL based CRC32 implementation already contains code based on the
> separate, optional CRC32 instructions to fallback to when operating on
> small quantities of data. We can expose these routines directly on systems
> that lack the 64x64 PMULL instructions but do implement the CRC32 ones,
> which makes the driver that is based solely on those CRC32 instructions
> redundant. So remove it.
> 
> Note that this aligns arm64 with ARM, whose accelerated CRC32 driver
> also combines the CRC32 extension based and the PMULL based versions.
> 
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

Patch applied.  Thanks.
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2017-02-11 19:00 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-01 15:35 [PATCH] crypto: arm64/crc32 - merge CRC32 and PMULL instruction based drivers Ard Biesheuvel
2017-02-02 16:47 ` Matthias Brugger
2017-02-11 10:51 ` Herbert Xu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).