All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/2] MIPS CRC instruction support
@ 2017-10-04 10:48 Marcin Nowakowski
  2017-10-04 10:48 ` [PATCH v2 1/2] MIPS: add crc instruction support flag to elf_hwcap Marcin Nowakowski
  2017-10-04 10:48   ` Marcin Nowakowski
  0 siblings, 2 replies; 6+ messages in thread
From: Marcin Nowakowski @ 2017-10-04 10:48 UTC (permalink / raw)
  To: Linux MIPS Mailing List; +Cc: Ralf Baechle, Marcin Nowakowski

MIPSr6 architecture introduces a new CRC32(C) instruction.
The following patches add a crypto acceleration module for
crc32 and crc32c algorithms using the new instructions.

Marcin Nowakowski (2):
  MIPS: add crc instruction support flag to elf_hwcap
  MIPS: crypto: Add crc32 and crc32c hw accelerated module

 arch/mips/Kconfig                  |   4 +
 arch/mips/Makefile                 |   3 +
 arch/mips/crypto/Makefile          |   5 +
 arch/mips/crypto/crc32-mips.c      | 364 +++++++++++++++++++++++++++++++++++++
 arch/mips/include/asm/mipsregs.h   |   1 +
 arch/mips/include/uapi/asm/hwcap.h |   1 +
 arch/mips/kernel/cpu-probe.c       |   3 +
 crypto/Kconfig                     |   9 +
 8 files changed, 390 insertions(+)
 create mode 100644 arch/mips/crypto/Makefile
 create mode 100644 arch/mips/crypto/crc32-mips.c

-- 
2.7.4

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 1/2] MIPS: add crc instruction support flag to elf_hwcap
  2017-10-04 10:48 [PATCH v2 0/2] MIPS CRC instruction support Marcin Nowakowski
@ 2017-10-04 10:48 ` Marcin Nowakowski
  2017-10-04 10:48   ` Marcin Nowakowski
  1 sibling, 0 replies; 6+ messages in thread
From: Marcin Nowakowski @ 2017-10-04 10:48 UTC (permalink / raw)
  To: Linux MIPS Mailing List; +Cc: Ralf Baechle, Marcin Nowakowski

Indicate that CRC32 and CRC32C instuctions are supported by the CPU
through elf_hwcap flags.

This will be used by a follow-up commit that introduces crc32(c) crypto
acceleration modules and is required by GENERIC_CPU_AUTOPROBE feature.

Signed-off-by: Marcin Nowakowski <marcin.nowakowski@imgtec.com>
---
 arch/mips/include/asm/mipsregs.h   | 1 +
 arch/mips/include/uapi/asm/hwcap.h | 1 +
 arch/mips/kernel/cpu-probe.c       | 3 +++
 3 files changed, 5 insertions(+)

diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index a681092..9db53cc 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -664,6 +664,7 @@
 #define MIPS_CONF5_FRE		(_ULCAST_(1) << 8)
 #define MIPS_CONF5_UFE		(_ULCAST_(1) << 9)
 #define MIPS_CONF5_CA2		(_ULCAST_(1) << 14)
+#define MIPS_CONF5_CRCP		(_ULCAST_(1) << 18)
 #define MIPS_CONF5_MSAEN	(_ULCAST_(1) << 27)
 #define MIPS_CONF5_EVA		(_ULCAST_(1) << 28)
 #define MIPS_CONF5_CV		(_ULCAST_(1) << 29)
diff --git a/arch/mips/include/uapi/asm/hwcap.h b/arch/mips/include/uapi/asm/hwcap.h
index c7484a7..c7d2cb6 100644
--- a/arch/mips/include/uapi/asm/hwcap.h
+++ b/arch/mips/include/uapi/asm/hwcap.h
@@ -4,5 +4,6 @@
 /* HWCAP flags */
 #define HWCAP_MIPS_R6		(1 << 0)
 #define HWCAP_MIPS_MSA		(1 << 1)
+#define HWCAP_MIPS_CRC32	(1 << 2)
 
 #endif /* _UAPI_ASM_HWCAP_H */
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index cf3fd54..6b07b73 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -848,6 +848,9 @@ static inline unsigned int decode_config5(struct cpuinfo_mips *c)
 	if (config5 & MIPS_CONF5_CA2)
 		c->ases |= MIPS_ASE_MIPS16E2;
 
+	if (config5 & MIPS_CONF5_CRCP)
+		elf_hwcap |= HWCAP_MIPS_CRC32;
+
 	return config5 & MIPS_CONF_M;
 }
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2 2/2] MIPS: crypto: Add crc32 and crc32c hw accelerated module
@ 2017-10-04 10:48   ` Marcin Nowakowski
  0 siblings, 0 replies; 6+ messages in thread
From: Marcin Nowakowski @ 2017-10-04 10:48 UTC (permalink / raw)
  To: Linux MIPS Mailing List
  Cc: Ralf Baechle, Marcin Nowakowski, linux-crypto, Herbert Xu,
	David S. Miller

This module registers crc32 and crc32c algorithms that use the
optional CRC32[bhwd] and CRC32C[bhwd] instructions in MIPSr6 cores.

Signed-off-by: Marcin Nowakowski <marcin.nowakowski@imgtec.com>
Cc: linux-crypto@vger.kernel.org
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>

---
v2:
 - minor code refactoring as suggested by JamesH which produces
   a better assembly output for 32-bit builds
---
 arch/mips/Kconfig             |   4 +
 arch/mips/Makefile            |   3 +
 arch/mips/crypto/Makefile     |   5 +
 arch/mips/crypto/crc32-mips.c | 364 ++++++++++++++++++++++++++++++++++++++++++
 crypto/Kconfig                |   9 ++
 5 files changed, 385 insertions(+)
 create mode 100644 arch/mips/crypto/Makefile
 create mode 100644 arch/mips/crypto/crc32-mips.c

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index cb7fcc4..0f96812 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2036,6 +2036,7 @@ config CPU_MIPSR6
 	select CPU_HAS_RIXI
 	select HAVE_ARCH_BITREVERSE
 	select MIPS_ASID_BITS_VARIABLE
+	select MIPS_CRC_SUPPORT
 	select MIPS_SPRAM
 
 config EVA
@@ -2503,6 +2504,9 @@ config MIPS_ASID_BITS
 config MIPS_ASID_BITS_VARIABLE
 	bool
 
+config MIPS_CRC_SUPPORT
+	bool
+
 #
 # - Highmem only makes sense for the 32-bit kernel.
 # - The current highmem code will only work properly on physically indexed
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index a96d97a..aa77536 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -216,6 +216,8 @@ cflags-$(toolchain-msa)			+= -DTOOLCHAIN_SUPPORTS_MSA
 endif
 toolchain-virt				:= $(call cc-option-yn,$(mips-cflags) -mvirt)
 cflags-$(toolchain-virt)		+= -DTOOLCHAIN_SUPPORTS_VIRT
+toolchain-crc				:= $(call cc-option-yn,$(mips-cflags) -Wa$(comma)-mcrc)
+cflags-$(toolchain-crc)			+= -DTOOLCHAIN_SUPPORTS_CRC
 
 #
 # Firmware support
@@ -324,6 +326,7 @@ libs-y			+= arch/mips/math-emu/
 # See arch/mips/Kbuild for content of core part of the kernel
 core-y += arch/mips/
 
+drivers-$(CONFIG_MIPS_CRC_SUPPORT) += arch/mips/crypto/
 drivers-$(CONFIG_OPROFILE)	+= arch/mips/oprofile/
 
 # suspend and hibernation support
diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile
new file mode 100644
index 0000000..665c725
--- /dev/null
+++ b/arch/mips/crypto/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for MIPS crypto files..
+#
+
+obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o
diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c
new file mode 100644
index 0000000..f2f5db0
--- /dev/null
+++ b/arch/mips/crypto/crc32-mips.c
@@ -0,0 +1,364 @@
+/*
+ * crc32-mips.c - CRC32 and CRC32C using optional MIPSr6 instructions
+ *
+ * Module based on arm64/crypto/crc32-arm.c
+ *
+ * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
+ * Copyright (C) 2017 Imagination Technologies, Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/unaligned/access_ok.h>
+#include <linux/cpufeature.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <asm/mipsregs.h>
+
+#include <crypto/internal/hash.h>
+
+enum crc_op_size {
+	b, h, w, d,
+};
+
+enum crc_type {
+	crc32,
+	crc32c,
+};
+
+#ifdef TOOLCHAIN_SUPPORTS_CRC
+
+#define _CRC32(crc, value, size, type)		\
+do {						\
+	__asm__ __volatile__(			\
+	".set	push\n\t"			\
+	".set	crc\n\t"			\
+	#type #size "	%0, %1, %0\n\t"		\
+	".set	pop"			\
+	: "+r" (crc)				\
+	: "r" (value)				\
+);						\
+} while(0)
+
+#define CRC_REGISTER
+
+#else	/* TOOLCHAIN_SUPPORTS_CRC */
+/*
+ * Crc argument is currently ignored and the assembly below assumes
+ * the crc is stored in $2. As the register number is encoded in the
+ * instruction we can't let the compiler chose the register it wants.
+ * An alternative is to change the code to do
+ * move $2, %0
+ * crc32
+ * move %0, $2
+ * but that adds unnecessary operations that the crc32 operation is
+ * designed to avoid. This issue can go away once the assembler
+ * is extended to support this operation and the compiler can make
+ * the right register choice automatically
+ */
+
+#define _CRC32(crc, value, size, type)						\
+do {										\
+	__asm__ __volatile__(							\
+	".set	push\n\t"							\
+	".set	noat\n\t"							\
+	"move	$at, %1\n\t"							\
+	"# " #type #size "	%0, $at, %0\n\t"				\
+	_ASM_INSN_IF_MIPS(0x7c00000f | (2 << 16) | (1 << 21) | (%2 << 6) | (%3 << 8))	\
+	_ASM_INSN32_IF_MM(0x00000030 | (1 << 16) | (2 << 21) | (%2 << 14) | (%3 << 3))	\
+	".set	pop"							\
+	: "+r" (crc)								\
+	: "r" (value), "i" (size), "i" (type)					\
+);										\
+} while(0)
+
+#define CRC_REGISTER __asm__("$2")
+#endif	/* !TOOLCHAIN_SUPPORTS_CRC */
+
+#define CRC32(crc, value, size) \
+	_CRC32(crc, value, size, crc32)
+
+#define CRC32C(crc, value, size) \
+	_CRC32(crc, value, size, crc32c)
+
+static u32 crc32_mips_le_hw(u32 crc_, const u8 *p, unsigned int len)
+{
+	register u32 crc CRC_REGISTER = crc_;
+
+#ifdef CONFIG_64BIT
+	while (len >= sizeof(u64)) {
+		register u64 value = get_unaligned_le64(p);
+
+		CRC32(crc, value, d);
+		p += sizeof(u64);
+		len -= sizeof(u64);
+	}
+
+	if (len & sizeof(u32)) {
+#else /* !CONFIG_64BIT */
+	while (len >= sizeof(u32)) {
+#endif
+		register u32 value = get_unaligned_le32(p);
+
+		CRC32(crc, value, w);
+		p += sizeof(u32);
+		len -= sizeof(u32);
+	}
+
+	if (len & sizeof(u16)) {
+		register u16 value = get_unaligned_le16(p);
+
+		CRC32(crc, value, h);
+		p += sizeof(u16);
+	}
+
+	if (len & sizeof(u8)) {
+		register u8 value = *p++;
+
+		CRC32(crc, value, b);
+	}
+
+	return crc;
+}
+
+static u32 crc32c_mips_le_hw(u32 crc_, const u8 *p, unsigned int len)
+{
+	register u32 crc CRC_REGISTER = crc_;
+
+#ifdef CONFIG_64BIT
+	while (len >= sizeof(u64)) {
+		register u64 value = get_unaligned_le64(p);
+
+		CRC32C(crc, value, d);
+		p += sizeof(u64);
+		len -= sizeof(u64);
+	}
+
+	if (len & sizeof(u32)) {
+#else /* !CONFIG_64BIT */
+	while (len >= sizeof(u32)) {
+#endif
+		register u32 value = get_unaligned_le32(p);
+
+		CRC32C(crc, value, w);
+		p += sizeof(u32);
+		len -= sizeof(u32);
+	}
+
+	if (len & sizeof(u16)) {
+		register u16 value = get_unaligned_le16(p);
+
+		CRC32C(crc, value, h);
+		p += sizeof(u16);
+	}
+
+	if (len & sizeof(u8)) {
+		register u8 value = *p++;
+
+		CRC32C(crc, value, b);
+	}
+	return crc;
+}
+
+#define CHKSUM_BLOCK_SIZE	1
+#define CHKSUM_DIGEST_SIZE	4
+
+struct chksum_ctx {
+	u32 key;
+};
+
+struct chksum_desc_ctx {
+	u32 crc;
+};
+
+static int chksum_init(struct shash_desc *desc)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = mctx->key;
+
+	return 0;
+}
+
+/*
+ * Setting the seed allows arbitrary accumulators and flexible XOR policy
+ * If your algorithm starts with ~0, then XOR with ~0 before you set
+ * the seed.
+ */
+static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
+			 unsigned int keylen)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
+
+	if (keylen != sizeof(mctx->key)) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	mctx->key = get_unaligned_le32(key);
+	return 0;
+}
+
+static int chksum_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int length)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = crc32_mips_le_hw(ctx->crc, data, length);
+	return 0;
+}
+
+static int chksumc_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int length)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = crc32c_mips_le_hw(ctx->crc, data, length);
+	return 0;
+}
+
+static int chksum_final(struct shash_desc *desc, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	put_unaligned_le32(ctx->crc, out);
+	return 0;
+}
+
+static int chksumc_final(struct shash_desc *desc, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	put_unaligned_le32(~ctx->crc, out);
+	return 0;
+}
+
+static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
+{
+	put_unaligned_le32(crc32_mips_le_hw(crc, data, len), out);
+	return 0;
+}
+
+static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
+{
+	put_unaligned_le32(~crc32c_mips_le_hw(crc, data, len), out);
+	return 0;
+}
+
+static int chksum_finup(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	return __chksum_finup(ctx->crc, data, len, out);
+}
+
+static int chksumc_finup(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	return __chksumc_finup(ctx->crc, data, len, out);
+}
+
+static int chksum_digest(struct shash_desc *desc, const u8 *data,
+			 unsigned int length, u8 *out)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+
+	return __chksum_finup(mctx->key, data, length, out);
+}
+
+static int chksumc_digest(struct shash_desc *desc, const u8 *data,
+			 unsigned int length, u8 *out)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+
+	return __chksumc_finup(mctx->key, data, length, out);
+}
+
+static int chksum_cra_init(struct crypto_tfm *tfm)
+{
+	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
+
+	mctx->key = ~0;
+	return 0;
+}
+
+static struct shash_alg crc32_alg = {
+	.digestsize		=	CHKSUM_DIGEST_SIZE,
+	.setkey			=	chksum_setkey,
+	.init			=	chksum_init,
+	.update			=	chksum_update,
+	.final			=	chksum_final,
+	.finup			=	chksum_finup,
+	.digest			=	chksum_digest,
+	.descsize		=	sizeof(struct chksum_desc_ctx),
+	.base			=	{
+		.cra_name		=	"crc32",
+		.cra_driver_name	=	"crc32-mips-hw",
+		.cra_priority		=	300,
+		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
+		.cra_alignmask		=	0,
+		.cra_ctxsize		=	sizeof(struct chksum_ctx),
+		.cra_module		=	THIS_MODULE,
+		.cra_init		=	chksum_cra_init,
+	}
+};
+
+static struct shash_alg crc32c_alg = {
+	.digestsize		=	CHKSUM_DIGEST_SIZE,
+	.setkey			=	chksum_setkey,
+	.init			=	chksum_init,
+	.update			=	chksumc_update,
+	.final			=	chksumc_final,
+	.finup			=	chksumc_finup,
+	.digest			=	chksumc_digest,
+	.descsize		=	sizeof(struct chksum_desc_ctx),
+	.base			=	{
+		.cra_name		=	"crc32c",
+		.cra_driver_name	=	"crc32c-mips-hw",
+		.cra_priority		=	300,
+		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
+		.cra_alignmask		=	0,
+		.cra_ctxsize		=	sizeof(struct chksum_ctx),
+		.cra_module		=	THIS_MODULE,
+		.cra_init		=	chksum_cra_init,
+	}
+};
+
+static int __init crc32_mod_init(void)
+{
+	int err;
+
+	err = crypto_register_shash(&crc32_alg);
+
+	if (err)
+		return err;
+
+	err = crypto_register_shash(&crc32c_alg);
+
+	if (err) {
+		crypto_unregister_shash(&crc32_alg);
+		return err;
+	}
+
+	return 0;
+}
+
+static void __exit crc32_mod_exit(void)
+{
+	crypto_unregister_shash(&crc32_alg);
+	crypto_unregister_shash(&crc32c_alg);
+}
+
+MODULE_AUTHOR("Marcin Nowakowski <marcin.nowakowski@imgtec.com");
+MODULE_DESCRIPTION("CRC32 and CRC32C using optional MIPS instructions");
+MODULE_LICENSE("GPL v2");
+
+module_cpu_feature_match(MIPS_CRC32, crc32_mod_init);
+module_exit(crc32_mod_exit);
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 28b1a0d..661971a 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -494,6 +494,15 @@ config CRYPTO_CRC32_PCLMUL
 	  which will enable any routine to use the CRC-32-IEEE 802.3 checksum
 	  and gain better performance as compared with the table implementation.
 
+config CRYPTO_CRC32_MIPS
+	tristate "CRC32c and CRC32 CRC algorithm (MIPS)"
+	depends on MIPS_CRC_SUPPORT
+	select CRYPTO_HASH
+	help
+	  CRC32c and CRC32 CRC algorithms implemented using mips crypto
+	  instructions, when available.
+
+
 config CRYPTO_CRCT10DIF
 	tristate "CRCT10DIF algorithm"
 	select CRYPTO_HASH
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2 2/2] MIPS: crypto: Add crc32 and crc32c hw accelerated module
@ 2017-10-04 10:48   ` Marcin Nowakowski
  0 siblings, 0 replies; 6+ messages in thread
From: Marcin Nowakowski @ 2017-10-04 10:48 UTC (permalink / raw)
  To: Linux MIPS Mailing List
  Cc: Ralf Baechle, Marcin Nowakowski, linux-crypto, Herbert Xu,
	David S. Miller

This module registers crc32 and crc32c algorithms that use the
optional CRC32[bhwd] and CRC32C[bhwd] instructions in MIPSr6 cores.

Signed-off-by: Marcin Nowakowski <marcin.nowakowski@imgtec.com>
Cc: linux-crypto@vger.kernel.org
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>

---
v2:
 - minor code refactoring as suggested by JamesH which produces
   a better assembly output for 32-bit builds
---
 arch/mips/Kconfig             |   4 +
 arch/mips/Makefile            |   3 +
 arch/mips/crypto/Makefile     |   5 +
 arch/mips/crypto/crc32-mips.c | 364 ++++++++++++++++++++++++++++++++++++++++++
 crypto/Kconfig                |   9 ++
 5 files changed, 385 insertions(+)
 create mode 100644 arch/mips/crypto/Makefile
 create mode 100644 arch/mips/crypto/crc32-mips.c

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index cb7fcc4..0f96812 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2036,6 +2036,7 @@ config CPU_MIPSR6
 	select CPU_HAS_RIXI
 	select HAVE_ARCH_BITREVERSE
 	select MIPS_ASID_BITS_VARIABLE
+	select MIPS_CRC_SUPPORT
 	select MIPS_SPRAM
 
 config EVA
@@ -2503,6 +2504,9 @@ config MIPS_ASID_BITS
 config MIPS_ASID_BITS_VARIABLE
 	bool
 
+config MIPS_CRC_SUPPORT
+	bool
+
 #
 # - Highmem only makes sense for the 32-bit kernel.
 # - The current highmem code will only work properly on physically indexed
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index a96d97a..aa77536 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -216,6 +216,8 @@ cflags-$(toolchain-msa)			+= -DTOOLCHAIN_SUPPORTS_MSA
 endif
 toolchain-virt				:= $(call cc-option-yn,$(mips-cflags) -mvirt)
 cflags-$(toolchain-virt)		+= -DTOOLCHAIN_SUPPORTS_VIRT
+toolchain-crc				:= $(call cc-option-yn,$(mips-cflags) -Wa$(comma)-mcrc)
+cflags-$(toolchain-crc)			+= -DTOOLCHAIN_SUPPORTS_CRC
 
 #
 # Firmware support
@@ -324,6 +326,7 @@ libs-y			+= arch/mips/math-emu/
 # See arch/mips/Kbuild for content of core part of the kernel
 core-y += arch/mips/
 
+drivers-$(CONFIG_MIPS_CRC_SUPPORT) += arch/mips/crypto/
 drivers-$(CONFIG_OPROFILE)	+= arch/mips/oprofile/
 
 # suspend and hibernation support
diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile
new file mode 100644
index 0000000..665c725
--- /dev/null
+++ b/arch/mips/crypto/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for MIPS crypto files..
+#
+
+obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o
diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c
new file mode 100644
index 0000000..f2f5db0
--- /dev/null
+++ b/arch/mips/crypto/crc32-mips.c
@@ -0,0 +1,364 @@
+/*
+ * crc32-mips.c - CRC32 and CRC32C using optional MIPSr6 instructions
+ *
+ * Module based on arm64/crypto/crc32-arm.c
+ *
+ * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
+ * Copyright (C) 2017 Imagination Technologies, Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/unaligned/access_ok.h>
+#include <linux/cpufeature.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <asm/mipsregs.h>
+
+#include <crypto/internal/hash.h>
+
+enum crc_op_size {
+	b, h, w, d,
+};
+
+enum crc_type {
+	crc32,
+	crc32c,
+};
+
+#ifdef TOOLCHAIN_SUPPORTS_CRC
+
+#define _CRC32(crc, value, size, type)		\
+do {						\
+	__asm__ __volatile__(			\
+	".set	push\n\t"			\
+	".set	crc\n\t"			\
+	#type #size "	%0, %1, %0\n\t"		\
+	".set	pop"			\
+	: "+r" (crc)				\
+	: "r" (value)				\
+);						\
+} while(0)
+
+#define CRC_REGISTER
+
+#else	/* TOOLCHAIN_SUPPORTS_CRC */
+/*
+ * Crc argument is currently ignored and the assembly below assumes
+ * the crc is stored in $2. As the register number is encoded in the
+ * instruction we can't let the compiler chose the register it wants.
+ * An alternative is to change the code to do
+ * move $2, %0
+ * crc32
+ * move %0, $2
+ * but that adds unnecessary operations that the crc32 operation is
+ * designed to avoid. This issue can go away once the assembler
+ * is extended to support this operation and the compiler can make
+ * the right register choice automatically
+ */
+
+#define _CRC32(crc, value, size, type)						\
+do {										\
+	__asm__ __volatile__(							\
+	".set	push\n\t"							\
+	".set	noat\n\t"							\
+	"move	$at, %1\n\t"							\
+	"# " #type #size "	%0, $at, %0\n\t"				\
+	_ASM_INSN_IF_MIPS(0x7c00000f | (2 << 16) | (1 << 21) | (%2 << 6) | (%3 << 8))	\
+	_ASM_INSN32_IF_MM(0x00000030 | (1 << 16) | (2 << 21) | (%2 << 14) | (%3 << 3))	\
+	".set	pop"							\
+	: "+r" (crc)								\
+	: "r" (value), "i" (size), "i" (type)					\
+);										\
+} while(0)
+
+#define CRC_REGISTER __asm__("$2")
+#endif	/* !TOOLCHAIN_SUPPORTS_CRC */
+
+#define CRC32(crc, value, size) \
+	_CRC32(crc, value, size, crc32)
+
+#define CRC32C(crc, value, size) \
+	_CRC32(crc, value, size, crc32c)
+
+static u32 crc32_mips_le_hw(u32 crc_, const u8 *p, unsigned int len)
+{
+	register u32 crc CRC_REGISTER = crc_;
+
+#ifdef CONFIG_64BIT
+	while (len >= sizeof(u64)) {
+		register u64 value = get_unaligned_le64(p);
+
+		CRC32(crc, value, d);
+		p += sizeof(u64);
+		len -= sizeof(u64);
+	}
+
+	if (len & sizeof(u32)) {
+#else /* !CONFIG_64BIT */
+	while (len >= sizeof(u32)) {
+#endif
+		register u32 value = get_unaligned_le32(p);
+
+		CRC32(crc, value, w);
+		p += sizeof(u32);
+		len -= sizeof(u32);
+	}
+
+	if (len & sizeof(u16)) {
+		register u16 value = get_unaligned_le16(p);
+
+		CRC32(crc, value, h);
+		p += sizeof(u16);
+	}
+
+	if (len & sizeof(u8)) {
+		register u8 value = *p++;
+
+		CRC32(crc, value, b);
+	}
+
+	return crc;
+}
+
+static u32 crc32c_mips_le_hw(u32 crc_, const u8 *p, unsigned int len)
+{
+	register u32 crc CRC_REGISTER = crc_;
+
+#ifdef CONFIG_64BIT
+	while (len >= sizeof(u64)) {
+		register u64 value = get_unaligned_le64(p);
+
+		CRC32C(crc, value, d);
+		p += sizeof(u64);
+		len -= sizeof(u64);
+	}
+
+	if (len & sizeof(u32)) {
+#else /* !CONFIG_64BIT */
+	while (len >= sizeof(u32)) {
+#endif
+		register u32 value = get_unaligned_le32(p);
+
+		CRC32C(crc, value, w);
+		p += sizeof(u32);
+		len -= sizeof(u32);
+	}
+
+	if (len & sizeof(u16)) {
+		register u16 value = get_unaligned_le16(p);
+
+		CRC32C(crc, value, h);
+		p += sizeof(u16);
+	}
+
+	if (len & sizeof(u8)) {
+		register u8 value = *p++;
+
+		CRC32C(crc, value, b);
+	}
+	return crc;
+}
+
+#define CHKSUM_BLOCK_SIZE	1
+#define CHKSUM_DIGEST_SIZE	4
+
+struct chksum_ctx {
+	u32 key;
+};
+
+struct chksum_desc_ctx {
+	u32 crc;
+};
+
+static int chksum_init(struct shash_desc *desc)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = mctx->key;
+
+	return 0;
+}
+
+/*
+ * Setting the seed allows arbitrary accumulators and flexible XOR policy
+ * If your algorithm starts with ~0, then XOR with ~0 before you set
+ * the seed.
+ */
+static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
+			 unsigned int keylen)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
+
+	if (keylen != sizeof(mctx->key)) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	mctx->key = get_unaligned_le32(key);
+	return 0;
+}
+
+static int chksum_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int length)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = crc32_mips_le_hw(ctx->crc, data, length);
+	return 0;
+}
+
+static int chksumc_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int length)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = crc32c_mips_le_hw(ctx->crc, data, length);
+	return 0;
+}
+
+static int chksum_final(struct shash_desc *desc, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	put_unaligned_le32(ctx->crc, out);
+	return 0;
+}
+
+static int chksumc_final(struct shash_desc *desc, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	put_unaligned_le32(~ctx->crc, out);
+	return 0;
+}
+
+static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
+{
+	put_unaligned_le32(crc32_mips_le_hw(crc, data, len), out);
+	return 0;
+}
+
+static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
+{
+	put_unaligned_le32(~crc32c_mips_le_hw(crc, data, len), out);
+	return 0;
+}
+
+static int chksum_finup(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	return __chksum_finup(ctx->crc, data, len, out);
+}
+
+static int chksumc_finup(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	return __chksumc_finup(ctx->crc, data, len, out);
+}
+
+static int chksum_digest(struct shash_desc *desc, const u8 *data,
+			 unsigned int length, u8 *out)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+
+	return __chksum_finup(mctx->key, data, length, out);
+}
+
+static int chksumc_digest(struct shash_desc *desc, const u8 *data,
+			 unsigned int length, u8 *out)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+
+	return __chksumc_finup(mctx->key, data, length, out);
+}
+
+static int chksum_cra_init(struct crypto_tfm *tfm)
+{
+	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
+
+	mctx->key = ~0;
+	return 0;
+}
+
+static struct shash_alg crc32_alg = {
+	.digestsize		=	CHKSUM_DIGEST_SIZE,
+	.setkey			=	chksum_setkey,
+	.init			=	chksum_init,
+	.update			=	chksum_update,
+	.final			=	chksum_final,
+	.finup			=	chksum_finup,
+	.digest			=	chksum_digest,
+	.descsize		=	sizeof(struct chksum_desc_ctx),
+	.base			=	{
+		.cra_name		=	"crc32",
+		.cra_driver_name	=	"crc32-mips-hw",
+		.cra_priority		=	300,
+		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
+		.cra_alignmask		=	0,
+		.cra_ctxsize		=	sizeof(struct chksum_ctx),
+		.cra_module		=	THIS_MODULE,
+		.cra_init		=	chksum_cra_init,
+	}
+};
+
+static struct shash_alg crc32c_alg = {
+	.digestsize		=	CHKSUM_DIGEST_SIZE,
+	.setkey			=	chksum_setkey,
+	.init			=	chksum_init,
+	.update			=	chksumc_update,
+	.final			=	chksumc_final,
+	.finup			=	chksumc_finup,
+	.digest			=	chksumc_digest,
+	.descsize		=	sizeof(struct chksum_desc_ctx),
+	.base			=	{
+		.cra_name		=	"crc32c",
+		.cra_driver_name	=	"crc32c-mips-hw",
+		.cra_priority		=	300,
+		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
+		.cra_alignmask		=	0,
+		.cra_ctxsize		=	sizeof(struct chksum_ctx),
+		.cra_module		=	THIS_MODULE,
+		.cra_init		=	chksum_cra_init,
+	}
+};
+
+static int __init crc32_mod_init(void)
+{
+	int err;
+
+	err = crypto_register_shash(&crc32_alg);
+
+	if (err)
+		return err;
+
+	err = crypto_register_shash(&crc32c_alg);
+
+	if (err) {
+		crypto_unregister_shash(&crc32_alg);
+		return err;
+	}
+
+	return 0;
+}
+
+static void __exit crc32_mod_exit(void)
+{
+	crypto_unregister_shash(&crc32_alg);
+	crypto_unregister_shash(&crc32c_alg);
+}
+
+MODULE_AUTHOR("Marcin Nowakowski <marcin.nowakowski@imgtec.com");
+MODULE_DESCRIPTION("CRC32 and CRC32C using optional MIPS instructions");
+MODULE_LICENSE("GPL v2");
+
+module_cpu_feature_match(MIPS_CRC32, crc32_mod_init);
+module_exit(crc32_mod_exit);
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 28b1a0d..661971a 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -494,6 +494,15 @@ config CRYPTO_CRC32_PCLMUL
 	  which will enable any routine to use the CRC-32-IEEE 802.3 checksum
 	  and gain better performance as compared with the table implementation.
 
+config CRYPTO_CRC32_MIPS
+	tristate "CRC32c and CRC32 CRC algorithm (MIPS)"
+	depends on MIPS_CRC_SUPPORT
+	select CRYPTO_HASH
+	help
+	  CRC32c and CRC32 CRC algorithms implemented using mips crypto
+	  instructions, when available.
+
+
 config CRYPTO_CRCT10DIF
 	tristate "CRCT10DIF algorithm"
 	select CRYPTO_HASH
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 2/2] MIPS: crypto: Add crc32 and crc32c hw accelerated module
  2017-10-04 10:48   ` Marcin Nowakowski
  (?)
@ 2017-10-04 15:36   ` Ralf Baechle
  2017-11-16  0:04     ` James Hogan
  -1 siblings, 1 reply; 6+ messages in thread
From: Ralf Baechle @ 2017-10-04 15:36 UTC (permalink / raw)
  To: Marcin Nowakowski
  Cc: Linux MIPS Mailing List, linux-crypto, Herbert Xu, David S. Miller

On Wed, Oct 04, 2017 at 12:48:53PM +0200, Marcin Nowakowski wrote:

>  arch/mips/Kconfig             |   4 +
>  arch/mips/Makefile            |   3 +
>  arch/mips/crypto/Makefile     |   5 +
>  arch/mips/crypto/crc32-mips.c | 364 ++++++++++++++++++++++++++++++++++++++++++
>  crypto/Kconfig                |   9 ++
>  5 files changed, 385 insertions(+)
>  create mode 100644 arch/mips/crypto/Makefile
>  create mode 100644 arch/mips/crypto/crc32-mips.c
> 
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index cb7fcc4..0f96812 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -2036,6 +2036,7 @@ config CPU_MIPSR6
>  	select CPU_HAS_RIXI
>  	select HAVE_ARCH_BITREVERSE
>  	select MIPS_ASID_BITS_VARIABLE
> +	select MIPS_CRC_SUPPORT
>  	select MIPS_SPRAM
>  
>  config EVA
> @@ -2503,6 +2504,9 @@ config MIPS_ASID_BITS
>  config MIPS_ASID_BITS_VARIABLE
>  	bool
>  
> +config MIPS_CRC_SUPPORT
> +	bool
> +
>  #
>  # - Highmem only makes sense for the 32-bit kernel.
>  # - The current highmem code will only work properly on physically indexed
> diff --git a/arch/mips/Makefile b/arch/mips/Makefile
> index a96d97a..aa77536 100644
> --- a/arch/mips/Makefile
> +++ b/arch/mips/Makefile
> @@ -216,6 +216,8 @@ cflags-$(toolchain-msa)			+= -DTOOLCHAIN_SUPPORTS_MSA
>  endif
>  toolchain-virt				:= $(call cc-option-yn,$(mips-cflags) -mvirt)
>  cflags-$(toolchain-virt)		+= -DTOOLCHAIN_SUPPORTS_VIRT
> +toolchain-crc				:= $(call cc-option-yn,$(mips-cflags) -Wa$(comma)-mcrc)
> +cflags-$(toolchain-crc)			+= -DTOOLCHAIN_SUPPORTS_CRC
>  
>  #
>  # Firmware support
> @@ -324,6 +326,7 @@ libs-y			+= arch/mips/math-emu/
>  # See arch/mips/Kbuild for content of core part of the kernel
>  core-y += arch/mips/
>  
> +drivers-$(CONFIG_MIPS_CRC_SUPPORT) += arch/mips/crypto/
>  drivers-$(CONFIG_OPROFILE)	+= arch/mips/oprofile/
>  
>  # suspend and hibernation support
> diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile
> new file mode 100644
> index 0000000..665c725
> --- /dev/null
> +++ b/arch/mips/crypto/Makefile
> @@ -0,0 +1,5 @@
> +#
> +# Makefile for MIPS crypto files..
> +#
> +
> +obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o
> diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c
> new file mode 100644
> index 0000000..f2f5db0
> --- /dev/null
> +++ b/arch/mips/crypto/crc32-mips.c
> @@ -0,0 +1,364 @@
> +/*
> + * crc32-mips.c - CRC32 and CRC32C using optional MIPSr6 instructions
> + *
> + * Module based on arm64/crypto/crc32-arm.c
> + *
> + * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
> + * Copyright (C) 2017 Imagination Technologies, Ltd.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/unaligned/access_ok.h>
> +#include <linux/cpufeature.h>
> +#include <linux/init.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/string.h>
> +#include <asm/mipsregs.h>
> +
> +#include <crypto/internal/hash.h>
> +
> +enum crc_op_size {
> +	b, h, w, d,
> +};
> +
> +enum crc_type {
> +	crc32,
> +	crc32c,
> +};
> +
> +#ifdef TOOLCHAIN_SUPPORTS_CRC
> +
> +#define _CRC32(crc, value, size, type)		\
> +do {						\
> +	__asm__ __volatile__(			\
> +	".set	push\n\t"			\
> +	".set	crc\n\t"			\
> +	#type #size "	%0, %1, %0\n\t"		\
> +	".set	pop"			\
> +	: "+r" (crc)				\
> +	: "r" (value)				\
> +);						\
> +} while(0)
> +
> +#define CRC_REGISTER
> +
> +#else	/* TOOLCHAIN_SUPPORTS_CRC */
> +/*
> + * Crc argument is currently ignored and the assembly below assumes
> + * the crc is stored in $2. As the register number is encoded in the
> + * instruction we can't let the compiler chose the register it wants.
> + * An alternative is to change the code to do
> + * move $2, %0
> + * crc32
> + * move %0, $2
> + * but that adds unnecessary operations that the crc32 operation is
> + * designed to avoid. This issue can go away once the assembler
> + * is extended to support this operation and the compiler can make
> + * the right register choice automatically
> + */
> +
> +#define _CRC32(crc, value, size, type)						\
> +do {										\
> +	__asm__ __volatile__(							\
> +	".set	push\n\t"							\
> +	".set	noat\n\t"							\
> +	"move	$at, %1\n\t"							\
> +	"# " #type #size "	%0, $at, %0\n\t"				\
> +	_ASM_INSN_IF_MIPS(0x7c00000f | (2 << 16) | (1 << 21) | (%2 << 6) | (%3 << 8))	\
> +	_ASM_INSN32_IF_MM(0x00000030 | (1 << 16) | (2 << 21) | (%2 << 14) | (%3 << 3))	\
> +	".set	pop"							\
> +	: "+r" (crc)								\
> +	: "r" (value), "i" (size), "i" (type)					\
> +);										\
> +} while(0)
> +
> +#define CRC_REGISTER __asm__("$2")
> +#endif	/* !TOOLCHAIN_SUPPORTS_CRC */

Over there years we've added so many inlines for instructions not yet
supported by gas and the simply approach always requires extra move
instructions.  So I finally cooked up something better which only relies
on things that are in gas for as long as I can remember.  This illustrates
it:

	.macro	parse var r
	.ifc	\r, $0
\var	=	0
	.endif
	.ifc	\r, $1
\var	=	1
	.endif
	.ifc	\r, $2
\var	=	2
	.endif
	.ifc	\r, $3
\var	=	3
	.endif
	.endm

	.macro	definsn opcode r1 r2 r3
	parse	__definsn_r1 \r1
	parse	__definsn_r2 \r2
	parse	__definsn_r3 \r3
	.word	\opcode | (__definsn_r1 << 16) | (__definsn_r2 << 20) | (__definsn_r3 << 24)
	.endm

	.macro	foo r1, r2, r3
	definsn	42, \r1, \r2, \r3
	.endm

	foo	$1, $2, $3
	foo	$3, $0, $1

The advantages are obvious, the hypothetical instruction foo can be used
just like it was actually supported by gas and no bloat by move
instructions to shuffle operands in and results out.  Which for some
very convoluted cases may also save you from an ICE.

Above skeleton still needs a little polish, either by extending the
parsing of register numbers to all 32 registers or by implementing that
using another loop which probably would be implemented using a recursive
gas macro.

> +
> +#define CRC32(crc, value, size) \
> +	_CRC32(crc, value, size, crc32)
> +
> +#define CRC32C(crc, value, size) \
> +	_CRC32(crc, value, size, crc32c)
> +
> +static u32 crc32_mips_le_hw(u32 crc_, const u8 *p, unsigned int len)
> +{
> +	register u32 crc CRC_REGISTER = crc_;
> +
> +#ifdef CONFIG_64BIT
> +	while (len >= sizeof(u64)) {
> +		register u64 value = get_unaligned_le64(p);

Nice try but gcc will happily ignore the register keyword.  It only
honors register when explicitly assigning a value to a register, something
like register int foo asm("$42");.  Same for the other instances of
register below.

Cheers,

  Ralf

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 2/2] MIPS: crypto: Add crc32 and crc32c hw accelerated module
  2017-10-04 15:36   ` Ralf Baechle
@ 2017-11-16  0:04     ` James Hogan
  0 siblings, 0 replies; 6+ messages in thread
From: James Hogan @ 2017-11-16  0:04 UTC (permalink / raw)
  To: Ralf Baechle; +Cc: Marcin Nowakowski, Linux MIPS Mailing List

[-- Attachment #1: Type: text/plain, Size: 6233 bytes --]

(trim cc)

On Wed, Oct 04, 2017 at 05:36:00PM +0200, Ralf Baechle wrote:
> On Wed, Oct 04, 2017 at 12:48:53PM +0200, Marcin Nowakowski wrote:
> > +#define _CRC32(crc, value, size, type)						\
> > +do {										\
> > +	__asm__ __volatile__(							\
> > +	".set	push\n\t"							\
> > +	".set	noat\n\t"							\
> > +	"move	$at, %1\n\t"							\
> > +	"# " #type #size "	%0, $at, %0\n\t"				\
> > +	_ASM_INSN_IF_MIPS(0x7c00000f | (2 << 16) | (1 << 21) | (%2 << 6) | (%3 << 8))	\
> > +	_ASM_INSN32_IF_MM(0x00000030 | (1 << 16) | (2 << 21) | (%2 << 14) | (%3 << 3))	\
> > +	".set	pop"							\
> > +	: "+r" (crc)								\
> > +	: "r" (value), "i" (size), "i" (type)					\
> > +);										\
> > +} while(0)
> > +
> > +#define CRC_REGISTER __asm__("$2")
> > +#endif	/* !TOOLCHAIN_SUPPORTS_CRC */
> 
> Over there years we've added so many inlines for instructions not yet
> supported by gas and the simply approach always requires extra move
> instructions.  So I finally cooked up something better which only relies
> on things that are in gas for as long as I can remember.  This illustrates
> it:
> 
> 	.macro	parse var r
> 	.ifc	\r, $0
> \var	=	0
> 	.endif
> 	.ifc	\r, $1
> \var	=	1
> 	.endif
> 	.ifc	\r, $2
> \var	=	2
> 	.endif
> 	.ifc	\r, $3
> \var	=	3
> 	.endif
> 	.endm
> 
> 	.macro	definsn opcode r1 r2 r3
> 	parse	__definsn_r1 \r1
> 	parse	__definsn_r2 \r2
> 	parse	__definsn_r3 \r3
> 	.word	\opcode | (__definsn_r1 << 16) | (__definsn_r2 << 20) | (__definsn_r3 << 24)
> 	.endm
> 
> 	.macro	foo r1, r2, r3
> 	definsn	42, \r1, \r2, \r3
> 	.endm
> 
> 	foo	$1, $2, $3
> 	foo	$3, $0, $1
> 
> The advantages are obvious, the hypothetical instruction foo can be used
> just like it was actually supported by gas and no bloat by move
> instructions to shuffle operands in and results out.  Which for some
> very convoluted cases may also save you from an ICE.
> 
> Above skeleton still needs a little polish, either by extending the
> parsing of register numbers to all 32 registers or by implementing that
> using another loop which probably would be implemented using a recursive
> gas macro.

Very nice, e.g.

diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index 6b1f1ad0542c..3a16fad43efb 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -1180,6 +1180,31 @@ static inline int mm_insn_16bit(u16 insn)
 #define _ASM_INSN_IF_MIPS(_enc)
 #endif
 
+/*
+ * Helper assembly macro for decoding a register name.
+ */
+#define _IFC_REG(n)			\
+	".ifc	\\r, $"#n"\n\t"		\
+	"\\var	="#n"\n\t"		\
+	".endif\n\t"
+
+__asm__(".macro	parse var r\n\t"
+	"\\var	= -1\n\t"
+	_IFC_REG( 0) _IFC_REG( 1) _IFC_REG( 2) _IFC_REG( 3)
+	_IFC_REG( 4) _IFC_REG( 5) _IFC_REG( 6) _IFC_REG( 7)
+	_IFC_REG( 8) _IFC_REG( 9) _IFC_REG(10) _IFC_REG(11)
+	_IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15)
+	_IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19)
+	_IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23)
+	_IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27)
+	_IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31)
+	".iflt	\\var\n\t"
+	".error \"Unable to parse register name \\r\"\n\t"
+	".endif\n\t"
+	".endm");
+
+#undef _IFC_REG
+
 /*
  * TLB Invalidate Flush
  */
@@ -1412,10 +1437,10 @@ do {									\
 	"	.set	push					\n"	\
 	"	.set	noat					\n"	\
 	"	.set	mips32r2				\n"	\
-	"	# mfhc0 $1, %1					\n"	\
-	_ASM_INSN_IF_MIPS(0x40410000 | ((%1 & 0x1f) << 11))		\
-	_ASM_INSN32_IF_MM(0x002000f4 | ((%1 & 0x1f) << 16))		\
-	"	move	%0, $1					\n"	\
+	"	# mfhc0 %0, %1					\n"	\
+	"	parse	__dst, %0				\n"	\
+	_ASM_INSN_IF_MIPS(0x40400000 | (__dst << 16) | ((%1 & 0x1f) << 11)) \
+	_ASM_INSN32_IF_MM(0x000000f4 | (__dst << 21) | ((%1 & 0x1f) << 16)) \
 	"	.set	pop					\n"	\
 	: "=r" (__res)							\
 	: "i" (source));						\

gives us this in arch/mips/lib/dump_tlb.o:

- 430:	40411000 	mfhc0	at,c0_entrylo0
- 434:	00203825 	move	a3,at
+ 430:	40471000 	mfhc0	a3,c0_entrylo0

And we can easily enough simplify a lot of mostly duplicated code by
doing something like this (can't do it with mfhc0 as we don't yet have a
case that uses the assembler):

@@ -1855,14 +1855,25 @@ do {									\
  * Macros to access the guest system control coprocessor
  */
 
-#ifdef TOOLCHAIN_SUPPORTS_VIRT
+#ifndef TOOLCHAIN_SUPPORTS_VIRT
+__asm__(".macro	mfgc0 rt, rs, sel\n\t"
+	"parse __rt, \\rt\n\t"
+	"parse __rs, \\rs\n\t"
+	"# mfgc0\t\\rt, \\rs, %2\n\t"
+	_ASM_INSN_IF_MIPS(0x40600000 | __rt << 16 | __rs << 11 | \\sel)
+	_ASM_INSN32_IF_MM(0x000004fc | __rt << 21 | __rs << 16 | \\sel << 11)
+	".endm");
+#define _ASM_SET_VIRT ""
+#else
+#define _ASM_SET_VIRT ".set\tvirt\n\t"
+#endif
 
 #define __read_32bit_gc0_register(source, sel)				\
 ({ int __res;								\
 	__asm__ __volatile__(						\
 		".set\tpush\n\t"					\
 		".set\tmips32r2\n\t"					\
-		".set\tvirt\n\t"					\
+		_ASM_SET_VIRT						\
 		"mfgc0\t%0, $%1, %2\n\t"				\
 		".set\tpop"						\
 		: "=r" (__res)						\
@@ -1870,6 +1881,8 @@ do {									\
 	__res;								\
 })
 
+#ifdef TOOLCHAIN_SUPPORTS_VIRT
+
 #define __read_64bit_gc0_register(source, sel)				\
 ({ unsigned long long __res;						\
 	__asm__ __volatile__(						\
@@ -1909,21 +1922,6 @@ do {									\
 
 #else	/* TOOLCHAIN_SUPPORTS_VIRT */
 
-#define __read_32bit_gc0_register(source, sel)				\
-({ int __res;								\
-	__asm__ __volatile__(						\
-		".set\tpush\n\t"					\
-		".set\tnoat\n\t"					\
-		"# mfgc0\t$1, $%1, %2\n\t"				\
-		_ASM_INSN_IF_MIPS(0x40610000 | %1 << 11 | %2)		\
-		_ASM_INSN32_IF_MM(0x002004fc | %1 << 16 | %2 << 11)	\
-		"move\t%0, $1\n\t"					\
-		".set\tpop"						\
-		: "=r" (__res)						\
-		: "i" (source), "i" (sel));				\
-	__res;								\
-})
-
 #define __read_64bit_gc0_register(source, sel)				\
 ({ unsigned long long __res;						\
 	__asm__ __volatile__(						\

A worthwhile cleanup IMO to fix all cases like this to use that parsing
macro, though it'll take a bit of care.

The use of __asm__ in global scope to define the macros to be accessible
from inline asm is slightly hacky.

Any comments / objections?

Cheers
James

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-11-16  0:05 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-04 10:48 [PATCH v2 0/2] MIPS CRC instruction support Marcin Nowakowski
2017-10-04 10:48 ` [PATCH v2 1/2] MIPS: add crc instruction support flag to elf_hwcap Marcin Nowakowski
2017-10-04 10:48 ` [PATCH v2 2/2] MIPS: crypto: Add crc32 and crc32c hw accelerated module Marcin Nowakowski
2017-10-04 10:48   ` Marcin Nowakowski
2017-10-04 15:36   ` Ralf Baechle
2017-11-16  0:04     ` James Hogan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.