Linux-Crypto Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 0/5] STM32 CRC update
@ 2020-05-12 14:11 Nicolas Toromanoff
  2020-05-12 14:11 ` [PATCH 1/5] crypto: stm32/crc: fix ext4 chksum BUG_ON() Nicolas Toromanoff
                   ` (5 more replies)
  0 siblings, 6 replies; 14+ messages in thread
From: Nicolas Toromanoff @ 2020-05-12 14:11 UTC (permalink / raw)
  To: Herbert Xu, David S . Miller, Maxime Coquelin, Alexandre Torgue
  Cc: Nicolas Toromanoff, linux-crypto, linux-stm32, linux-arm-kernel,
	linux-kernel

This set of patches update the STM32 CRC driver.
It contains bug fix.

First fixes issue if we enable STM32 CRC32 hardware accelerator with
ext4 (with metadata-chksum enable) and other fs that use same direct
access to update crc32 API without previous init.
Second fixes some issues raise by the extra self-test.
Third fixes wrong hw usage if there is multiple IP on the SOC.
Forth fixes "sleep while atomic" in tcrypt test, and some other places
(ext4)
Last fixes concurrent accesses. As state is saved in the hardware cell
and not in stack as other CRC32 drivers, we need to create atomic
section to protect concurrent CRC32 calculus.

This patch series applies to cryptodev/master.

Nicolas Toromanoff (5):
  crypto: stm32/crc: fix ext4 chksum BUG_ON()
  crypto: stm32/crc: fix run-time self test issue.
  crypto: stm32/crc: fix multi-instance
  crypto: stm32/crc: don't sleep in runtime pm
  crypto: stm32/crc: protect from concurrent accesses

 drivers/crypto/stm32/stm32-crc32.c | 230 ++++++++++++++++++++---------
 1 file changed, 161 insertions(+), 69 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/5] crypto: stm32/crc: fix ext4 chksum BUG_ON()
  2020-05-12 14:11 [PATCH 0/5] STM32 CRC update Nicolas Toromanoff
@ 2020-05-12 14:11 ` Nicolas Toromanoff
  2020-05-12 14:11 ` [PATCH 2/5] crypto: stm32/crc: fix run-time self test issue Nicolas Toromanoff
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 14+ messages in thread
From: Nicolas Toromanoff @ 2020-05-12 14:11 UTC (permalink / raw)
  To: Herbert Xu, David S . Miller, Maxime Coquelin, Alexandre Torgue
  Cc: Nicolas Toromanoff, linux-crypto, linux-stm32, linux-arm-kernel,
	linux-kernel

Allow use of crc_update without prior call to crc_init.
And change (and fix) driver to use CRC device even on unaligned buffers.

Fixes: b51dbe90912a ("crypto: stm32 - Support for STM32 CRC32 crypto module")

Signed-off-by: Nicolas Toromanoff <nicolas.toromanoff@st.com>
---
 drivers/crypto/stm32/stm32-crc32.c | 98 +++++++++++++++---------------
 1 file changed, 48 insertions(+), 50 deletions(-)

diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c
index 8e92e4ac79f1..c6156bf6c603 100644
--- a/drivers/crypto/stm32/stm32-crc32.c
+++ b/drivers/crypto/stm32/stm32-crc32.c
@@ -28,8 +28,10 @@
 
 /* Registers values */
 #define CRC_CR_RESET            BIT(0)
-#define CRC_CR_REVERSE          (BIT(7) | BIT(6) | BIT(5))
 #define CRC_INIT_DEFAULT        0xFFFFFFFF
+#define CRC_CR_REV_IN_WORD      (BIT(6) | BIT(5))
+#define CRC_CR_REV_IN_BYTE      BIT(5)
+#define CRC_CR_REV_OUT          BIT(7)
 
 #define CRC_AUTOSUSPEND_DELAY	50
 
@@ -38,8 +40,6 @@ struct stm32_crc {
 	struct device    *dev;
 	void __iomem     *regs;
 	struct clk       *clk;
-	u8               pending_data[sizeof(u32)];
-	size_t           nb_pending_bytes;
 };
 
 struct stm32_crc_list {
@@ -59,7 +59,6 @@ struct stm32_crc_ctx {
 
 struct stm32_crc_desc_ctx {
 	u32    partial; /* crc32c: partial in first 4 bytes of that struct */
-	struct stm32_crc *crc;
 };
 
 static int stm32_crc32_cra_init(struct crypto_tfm *tfm)
@@ -99,25 +98,22 @@ static int stm32_crc_init(struct shash_desc *desc)
 	struct stm32_crc *crc;
 
 	spin_lock_bh(&crc_list.lock);
-	list_for_each_entry(crc, &crc_list.dev_list, list) {
-		ctx->crc = crc;
-		break;
-	}
+	crc = list_first_entry(&crc_list.dev_list, struct stm32_crc, list);
 	spin_unlock_bh(&crc_list.lock);
 
-	pm_runtime_get_sync(ctx->crc->dev);
+	pm_runtime_get_sync(crc->dev);
 
 	/* Reset, set key, poly and configure in bit reverse mode */
-	writel_relaxed(bitrev32(mctx->key), ctx->crc->regs + CRC_INIT);
-	writel_relaxed(bitrev32(mctx->poly), ctx->crc->regs + CRC_POL);
-	writel_relaxed(CRC_CR_RESET | CRC_CR_REVERSE, ctx->crc->regs + CRC_CR);
+	writel_relaxed(bitrev32(mctx->key), crc->regs + CRC_INIT);
+	writel_relaxed(bitrev32(mctx->poly), crc->regs + CRC_POL);
+	writel_relaxed(CRC_CR_RESET | CRC_CR_REV_IN_WORD | CRC_CR_REV_OUT,
+		       crc->regs + CRC_CR);
 
 	/* Store partial result */
-	ctx->partial = readl_relaxed(ctx->crc->regs + CRC_DR);
-	ctx->crc->nb_pending_bytes = 0;
+	ctx->partial = readl_relaxed(crc->regs + CRC_DR);
 
-	pm_runtime_mark_last_busy(ctx->crc->dev);
-	pm_runtime_put_autosuspend(ctx->crc->dev);
+	pm_runtime_mark_last_busy(crc->dev);
+	pm_runtime_put_autosuspend(crc->dev);
 
 	return 0;
 }
@@ -126,31 +122,49 @@ static int stm32_crc_update(struct shash_desc *desc, const u8 *d8,
 			    unsigned int length)
 {
 	struct stm32_crc_desc_ctx *ctx = shash_desc_ctx(desc);
-	struct stm32_crc *crc = ctx->crc;
-	u32 *d32;
-	unsigned int i;
+	struct stm32_crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
+	struct stm32_crc *crc;
+
+	spin_lock_bh(&crc_list.lock);
+	crc = list_first_entry(&crc_list.dev_list, struct stm32_crc, list);
+	spin_unlock_bh(&crc_list.lock);
 
 	pm_runtime_get_sync(crc->dev);
 
-	if (unlikely(crc->nb_pending_bytes)) {
-		while (crc->nb_pending_bytes != sizeof(u32) && length) {
-			/* Fill in pending data */
-			crc->pending_data[crc->nb_pending_bytes++] = *(d8++);
+	/*
+	 * Restore previously calculated CRC for this context as init value
+	 * Restore polynomial configuration
+	 * Configure in register for word input data,
+	 * Configure out register in reversed bit mode data.
+	 */
+	writel_relaxed(bitrev32(ctx->partial), crc->regs + CRC_INIT);
+	writel_relaxed(bitrev32(mctx->poly), crc->regs + CRC_POL);
+	writel_relaxed(CRC_CR_RESET | CRC_CR_REV_IN_WORD | CRC_CR_REV_OUT,
+		       crc->regs + CRC_CR);
+
+	if (d8 != PTR_ALIGN(d8, sizeof(u32))) {
+		/* Configure for byte data */
+		writel_relaxed(CRC_CR_REV_IN_BYTE | CRC_CR_REV_OUT,
+			       crc->regs + CRC_CR);
+		while (d8 != PTR_ALIGN(d8, sizeof(u32)) && length) {
+			writeb_relaxed(*d8++, crc->regs + CRC_DR);
 			length--;
 		}
-
-		if (crc->nb_pending_bytes == sizeof(u32)) {
-			/* Process completed pending data */
-			writel_relaxed(*(u32 *)crc->pending_data,
-				       crc->regs + CRC_DR);
-			crc->nb_pending_bytes = 0;
-		}
+		/* Configure for word data */
+		writel_relaxed(CRC_CR_REV_IN_WORD | CRC_CR_REV_OUT,
+			       crc->regs + CRC_CR);
 	}
 
-	d32 = (u32 *)d8;
-	for (i = 0; i < length >> 2; i++)
-		/* Process 32 bits data */
-		writel_relaxed(*(d32++), crc->regs + CRC_DR);
+	for (; length >= sizeof(u32); d8 += sizeof(u32), length -= sizeof(u32))
+		writel_relaxed(*((u32 *)d8), crc->regs + CRC_DR);
+
+	if (length) {
+		/* Configure for byte data */
+		writel_relaxed(CRC_CR_REV_IN_BYTE | CRC_CR_REV_OUT,
+			       crc->regs + CRC_CR);
+		while (length--)
+			writeb_relaxed(*d8++, crc->regs + CRC_DR);
+	}
 
 	/* Store partial result */
 	ctx->partial = readl_relaxed(crc->regs + CRC_DR);
@@ -158,22 +172,6 @@ static int stm32_crc_update(struct shash_desc *desc, const u8 *d8,
 	pm_runtime_mark_last_busy(crc->dev);
 	pm_runtime_put_autosuspend(crc->dev);
 
-	/* Check for pending data (non 32 bits) */
-	length &= 3;
-	if (likely(!length))
-		return 0;
-
-	if ((crc->nb_pending_bytes + length) >= sizeof(u32)) {
-		/* Shall not happen */
-		dev_err(crc->dev, "Pending data overflow\n");
-		return -EINVAL;
-	}
-
-	d8 = (const u8 *)d32;
-	for (i = 0; i < length; i++)
-		/* Store pending data */
-		crc->pending_data[crc->nb_pending_bytes++] = *(d8++);
-
 	return 0;
 }
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 2/5] crypto: stm32/crc: fix run-time self test issue.
  2020-05-12 14:11 [PATCH 0/5] STM32 CRC update Nicolas Toromanoff
  2020-05-12 14:11 ` [PATCH 1/5] crypto: stm32/crc: fix ext4 chksum BUG_ON() Nicolas Toromanoff
@ 2020-05-12 14:11 ` Nicolas Toromanoff
  2020-05-12 14:11 ` [PATCH 3/5] crypto: stm32/crc: fix multi-instance Nicolas Toromanoff
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 14+ messages in thread
From: Nicolas Toromanoff @ 2020-05-12 14:11 UTC (permalink / raw)
  To: Herbert Xu, David S . Miller, Maxime Coquelin, Alexandre Torgue
  Cc: Nicolas Toromanoff, linux-crypto, linux-stm32, linux-arm-kernel,
	linux-kernel

Fix wrong crc32 initialisation value:
"alg: shash: stm32_crc32 test failed (wrong result) on test vector 0,
cfg="init+update+final aligned buffer"
cra_name="crc32c" expects an init value of 0XFFFFFFFF,
cra_name="crc32" expects an init value of 0.

Fixes: b51dbe90912a ("crypto: stm32 - Support for STM32 CRC32 crypto module")

Signed-off-by: Nicolas Toromanoff <nicolas.toromanoff@st.com>
---
 drivers/crypto/stm32/stm32-crc32.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c
index c6156bf6c603..1c3e411b7acb 100644
--- a/drivers/crypto/stm32/stm32-crc32.c
+++ b/drivers/crypto/stm32/stm32-crc32.c
@@ -28,10 +28,10 @@
 
 /* Registers values */
 #define CRC_CR_RESET            BIT(0)
-#define CRC_INIT_DEFAULT        0xFFFFFFFF
 #define CRC_CR_REV_IN_WORD      (BIT(6) | BIT(5))
 #define CRC_CR_REV_IN_BYTE      BIT(5)
 #define CRC_CR_REV_OUT          BIT(7)
+#define CRC32C_INIT_DEFAULT     0xFFFFFFFF
 
 #define CRC_AUTOSUSPEND_DELAY	50
 
@@ -65,7 +65,7 @@ static int stm32_crc32_cra_init(struct crypto_tfm *tfm)
 {
 	struct stm32_crc_ctx *mctx = crypto_tfm_ctx(tfm);
 
-	mctx->key = CRC_INIT_DEFAULT;
+	mctx->key = 0;
 	mctx->poly = CRC32_POLY_LE;
 	return 0;
 }
@@ -74,7 +74,7 @@ static int stm32_crc32c_cra_init(struct crypto_tfm *tfm)
 {
 	struct stm32_crc_ctx *mctx = crypto_tfm_ctx(tfm);
 
-	mctx->key = CRC_INIT_DEFAULT;
+	mctx->key = CRC32C_INIT_DEFAULT;
 	mctx->poly = CRC32C_POLY_LE;
 	return 0;
 }
-- 
2.17.1


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 3/5] crypto: stm32/crc: fix multi-instance
  2020-05-12 14:11 [PATCH 0/5] STM32 CRC update Nicolas Toromanoff
  2020-05-12 14:11 ` [PATCH 1/5] crypto: stm32/crc: fix ext4 chksum BUG_ON() Nicolas Toromanoff
  2020-05-12 14:11 ` [PATCH 2/5] crypto: stm32/crc: fix run-time self test issue Nicolas Toromanoff
@ 2020-05-12 14:11 ` Nicolas Toromanoff
  2020-05-12 14:11 ` [PATCH 4/5] crypto: stm32/crc: don't sleep in runtime pm Nicolas Toromanoff
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 14+ messages in thread
From: Nicolas Toromanoff @ 2020-05-12 14:11 UTC (permalink / raw)
  To: Herbert Xu, David S . Miller, Maxime Coquelin, Alexandre Torgue
  Cc: Nicolas Toromanoff, linux-crypto, linux-stm32, linux-arm-kernel,
	linux-kernel

Ensure CRC algorithm is registered only once in crypto framework when
there are several instances of CRC devices.

Update the CRC device list management to avoid that only the first CRC
instance is used.

Fixes: b51dbe90912a ("crypto: stm32 - Support for STM32 CRC32 crypto module")

Signed-off-by: Nicolas Toromanoff <nicolas.toromanoff@st.com>
---
 drivers/crypto/stm32/stm32-crc32.c | 48 ++++++++++++++++++++++--------
 1 file changed, 36 insertions(+), 12 deletions(-)

diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c
index 1c3e411b7acb..10304511f9b4 100644
--- a/drivers/crypto/stm32/stm32-crc32.c
+++ b/drivers/crypto/stm32/stm32-crc32.c
@@ -91,16 +91,29 @@ static int stm32_crc_setkey(struct crypto_shash *tfm, const u8 *key,
 	return 0;
 }
 
-static int stm32_crc_init(struct shash_desc *desc)
+static struct stm32_crc *stm32_crc_get_next_crc(void)
 {
-	struct stm32_crc_desc_ctx *ctx = shash_desc_ctx(desc);
-	struct stm32_crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
 	struct stm32_crc *crc;
 
 	spin_lock_bh(&crc_list.lock);
 	crc = list_first_entry(&crc_list.dev_list, struct stm32_crc, list);
+	if (crc)
+		list_move_tail(&crc->list, &crc_list.dev_list);
 	spin_unlock_bh(&crc_list.lock);
 
+	return crc;
+}
+
+static int stm32_crc_init(struct shash_desc *desc)
+{
+	struct stm32_crc_desc_ctx *ctx = shash_desc_ctx(desc);
+	struct stm32_crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
+	struct stm32_crc *crc;
+
+	crc = stm32_crc_get_next_crc();
+	if (!crc)
+		return -ENODEV;
+
 	pm_runtime_get_sync(crc->dev);
 
 	/* Reset, set key, poly and configure in bit reverse mode */
@@ -125,9 +138,9 @@ static int stm32_crc_update(struct shash_desc *desc, const u8 *d8,
 	struct stm32_crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
 	struct stm32_crc *crc;
 
-	spin_lock_bh(&crc_list.lock);
-	crc = list_first_entry(&crc_list.dev_list, struct stm32_crc, list);
-	spin_unlock_bh(&crc_list.lock);
+	crc = stm32_crc_get_next_crc();
+	if (!crc)
+		return -ENODEV;
 
 	pm_runtime_get_sync(crc->dev);
 
@@ -200,6 +213,8 @@ static int stm32_crc_digest(struct shash_desc *desc, const u8 *data,
 	return stm32_crc_init(desc) ?: stm32_crc_finup(desc, data, length, out);
 }
 
+static unsigned int refcnt;
+static DEFINE_MUTEX(refcnt_lock);
 static struct shash_alg algs[] = {
 	/* CRC-32 */
 	{
@@ -290,12 +305,18 @@ static int stm32_crc_probe(struct platform_device *pdev)
 	list_add(&crc->list, &crc_list.dev_list);
 	spin_unlock(&crc_list.lock);
 
-	ret = crypto_register_shashes(algs, ARRAY_SIZE(algs));
-	if (ret) {
-		dev_err(dev, "Failed to register\n");
-		clk_disable_unprepare(crc->clk);
-		return ret;
+	mutex_lock(&refcnt_lock);
+	if (!refcnt) {
+		ret = crypto_register_shashes(algs, ARRAY_SIZE(algs));
+		if (ret) {
+			mutex_unlock(&refcnt_lock);
+			dev_err(dev, "Failed to register\n");
+			clk_disable_unprepare(crc->clk);
+			return ret;
+		}
 	}
+	refcnt++;
+	mutex_unlock(&refcnt_lock);
 
 	dev_info(dev, "Initialized\n");
 
@@ -316,7 +337,10 @@ static int stm32_crc_remove(struct platform_device *pdev)
 	list_del(&crc->list);
 	spin_unlock(&crc_list.lock);
 
-	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+	mutex_lock(&refcnt_lock);
+	if (!--refcnt)
+		crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+	mutex_unlock(&refcnt_lock);
 
 	pm_runtime_disable(crc->dev);
 	pm_runtime_put_noidle(crc->dev);
-- 
2.17.1


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 4/5] crypto: stm32/crc: don't sleep in runtime pm
  2020-05-12 14:11 [PATCH 0/5] STM32 CRC update Nicolas Toromanoff
                   ` (2 preceding siblings ...)
  2020-05-12 14:11 ` [PATCH 3/5] crypto: stm32/crc: fix multi-instance Nicolas Toromanoff
@ 2020-05-12 14:11 ` Nicolas Toromanoff
  2020-05-12 14:11 ` [PATCH 5/5] crypto: stm32/crc: protect from concurrent accesses Nicolas Toromanoff
  2020-05-22 14:13 ` [PATCH 0/5] STM32 CRC update Herbert Xu
  5 siblings, 0 replies; 14+ messages in thread
From: Nicolas Toromanoff @ 2020-05-12 14:11 UTC (permalink / raw)
  To: Herbert Xu, David S . Miller, Maxime Coquelin, Alexandre Torgue
  Cc: Nicolas Toromanoff, linux-crypto, linux-stm32, linux-arm-kernel,
	linux-kernel

Ensure stm32_crc_update() and stm32_crc_init() can be called
in atomic context and can't sleep.

Add pm_runtime_irq_safe() to make pm_runtime_get_sync() atomic.
Change runtime pm to call clk_enable()/clk_disable() and change
system pm to unprepare/prepare the clock and force runtime pm
suspend/resume.

Signed-off-by: Nicolas Toromanoff <nicolas.toromanoff@st.com>
---
 drivers/crypto/stm32/stm32-crc32.c | 45 ++++++++++++++++++++++++------
 1 file changed, 36 insertions(+), 9 deletions(-)

diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c
index 10304511f9b4..413415c216ef 100644
--- a/drivers/crypto/stm32/stm32-crc32.c
+++ b/drivers/crypto/stm32/stm32-crc32.c
@@ -297,6 +297,7 @@ static int stm32_crc_probe(struct platform_device *pdev)
 
 	pm_runtime_get_noresume(dev);
 	pm_runtime_set_active(dev);
+	pm_runtime_irq_safe(dev);
 	pm_runtime_enable(dev);
 
 	platform_set_drvdata(pdev, crc);
@@ -350,34 +351,60 @@ static int stm32_crc_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int stm32_crc_runtime_suspend(struct device *dev)
+static int __maybe_unused stm32_crc_suspend(struct device *dev)
 {
 	struct stm32_crc *crc = dev_get_drvdata(dev);
+	int ret;
 
-	clk_disable_unprepare(crc->clk);
+	ret = pm_runtime_force_suspend(dev);
+	if (ret)
+		return ret;
+
+	clk_unprepare(crc->clk);
 
 	return 0;
 }
 
-static int stm32_crc_runtime_resume(struct device *dev)
+static int __maybe_unused stm32_crc_resume(struct device *dev)
 {
 	struct stm32_crc *crc = dev_get_drvdata(dev);
 	int ret;
 
-	ret = clk_prepare_enable(crc->clk);
+	ret = clk_prepare(crc->clk);
 	if (ret) {
-		dev_err(crc->dev, "Failed to prepare_enable clock\n");
+		dev_err(crc->dev, "Failed to prepare clock\n");
+		return ret;
+	}
+
+	return pm_runtime_force_resume(dev);
+}
+
+static int __maybe_unused stm32_crc_runtime_suspend(struct device *dev)
+{
+	struct stm32_crc *crc = dev_get_drvdata(dev);
+
+	clk_disable(crc->clk);
+
+	return 0;
+}
+
+static int __maybe_unused stm32_crc_runtime_resume(struct device *dev)
+{
+	struct stm32_crc *crc = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_enable(crc->clk);
+	if (ret) {
+		dev_err(crc->dev, "Failed to enable clock\n");
 		return ret;
 	}
 
 	return 0;
 }
-#endif
 
 static const struct dev_pm_ops stm32_crc_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
-				pm_runtime_force_resume)
+	SET_SYSTEM_SLEEP_PM_OPS(stm32_crc_suspend,
+				stm32_crc_resume)
 	SET_RUNTIME_PM_OPS(stm32_crc_runtime_suspend,
 			   stm32_crc_runtime_resume, NULL)
 };
-- 
2.17.1


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 5/5] crypto: stm32/crc: protect from concurrent accesses
  2020-05-12 14:11 [PATCH 0/5] STM32 CRC update Nicolas Toromanoff
                   ` (3 preceding siblings ...)
  2020-05-12 14:11 ` [PATCH 4/5] crypto: stm32/crc: don't sleep in runtime pm Nicolas Toromanoff
@ 2020-05-12 14:11 ` Nicolas Toromanoff
  2020-05-22 16:11   ` Ard Biesheuvel
  2020-05-22 14:13 ` [PATCH 0/5] STM32 CRC update Herbert Xu
  5 siblings, 1 reply; 14+ messages in thread
From: Nicolas Toromanoff @ 2020-05-12 14:11 UTC (permalink / raw)
  To: Herbert Xu, David S . Miller, Maxime Coquelin, Alexandre Torgue
  Cc: Nicolas Toromanoff, linux-crypto, linux-stm32, linux-arm-kernel,
	linux-kernel

Protect STM32 CRC device from concurrent accesses.

As we create a spinlocked section that increase with buffer size,
we provide a module parameter to release the pressure by splitting
critical section in chunks.

Size of each chunk is defined in burst_size module parameter.
By default burst_size=0, i.e. don't split incoming buffer.

Signed-off-by: Nicolas Toromanoff <nicolas.toromanoff@st.com>
---
 drivers/crypto/stm32/stm32-crc32.c | 47 ++++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c
index 413415c216ef..3ba41148c2a4 100644
--- a/drivers/crypto/stm32/stm32-crc32.c
+++ b/drivers/crypto/stm32/stm32-crc32.c
@@ -35,11 +35,16 @@
 
 #define CRC_AUTOSUSPEND_DELAY	50
 
+static unsigned int burst_size;
+module_param(burst_size, uint, 0644);
+MODULE_PARM_DESC(burst_size, "Select burst byte size (0 unlimited)");
+
 struct stm32_crc {
 	struct list_head list;
 	struct device    *dev;
 	void __iomem     *regs;
 	struct clk       *clk;
+	spinlock_t       lock;
 };
 
 struct stm32_crc_list {
@@ -109,6 +114,7 @@ static int stm32_crc_init(struct shash_desc *desc)
 	struct stm32_crc_desc_ctx *ctx = shash_desc_ctx(desc);
 	struct stm32_crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
 	struct stm32_crc *crc;
+	unsigned long flags;
 
 	crc = stm32_crc_get_next_crc();
 	if (!crc)
@@ -116,6 +122,8 @@ static int stm32_crc_init(struct shash_desc *desc)
 
 	pm_runtime_get_sync(crc->dev);
 
+	spin_lock_irqsave(&crc->lock, flags);
+
 	/* Reset, set key, poly and configure in bit reverse mode */
 	writel_relaxed(bitrev32(mctx->key), crc->regs + CRC_INIT);
 	writel_relaxed(bitrev32(mctx->poly), crc->regs + CRC_POL);
@@ -125,18 +133,21 @@ static int stm32_crc_init(struct shash_desc *desc)
 	/* Store partial result */
 	ctx->partial = readl_relaxed(crc->regs + CRC_DR);
 
+	spin_unlock_irqrestore(&crc->lock, flags);
+
 	pm_runtime_mark_last_busy(crc->dev);
 	pm_runtime_put_autosuspend(crc->dev);
 
 	return 0;
 }
 
-static int stm32_crc_update(struct shash_desc *desc, const u8 *d8,
-			    unsigned int length)
+static int burst_update(struct shash_desc *desc, const u8 *d8,
+			size_t length)
 {
 	struct stm32_crc_desc_ctx *ctx = shash_desc_ctx(desc);
 	struct stm32_crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
 	struct stm32_crc *crc;
+	unsigned long flags;
 
 	crc = stm32_crc_get_next_crc();
 	if (!crc)
@@ -144,6 +155,8 @@ static int stm32_crc_update(struct shash_desc *desc, const u8 *d8,
 
 	pm_runtime_get_sync(crc->dev);
 
+	spin_lock_irqsave(&crc->lock, flags);
+
 	/*
 	 * Restore previously calculated CRC for this context as init value
 	 * Restore polynomial configuration
@@ -182,12 +195,40 @@ static int stm32_crc_update(struct shash_desc *desc, const u8 *d8,
 	/* Store partial result */
 	ctx->partial = readl_relaxed(crc->regs + CRC_DR);
 
+	spin_unlock_irqrestore(&crc->lock, flags);
+
 	pm_runtime_mark_last_busy(crc->dev);
 	pm_runtime_put_autosuspend(crc->dev);
 
 	return 0;
 }
 
+static int stm32_crc_update(struct shash_desc *desc, const u8 *d8,
+			    unsigned int length)
+{
+	const unsigned int burst_sz = burst_size;
+	unsigned int rem_sz;
+	const u8 *cur;
+	size_t size;
+	int ret;
+
+	if (!burst_sz)
+		return burst_update(desc, d8, length);
+
+	/* Digest first bytes not 32bit aligned at first pass in the loop */
+	size = min(length,
+		   burst_sz + (unsigned int)d8 - ALIGN_DOWN((unsigned int)d8,
+							    sizeof(u32)));
+	for (rem_sz = length, cur = d8; rem_sz;
+	     rem_sz -= size, cur += size, size = min(rem_sz, burst_sz)) {
+		ret = burst_update(desc, cur, size);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static int stm32_crc_final(struct shash_desc *desc, u8 *out)
 {
 	struct stm32_crc_desc_ctx *ctx = shash_desc_ctx(desc);
@@ -300,6 +341,8 @@ static int stm32_crc_probe(struct platform_device *pdev)
 	pm_runtime_irq_safe(dev);
 	pm_runtime_enable(dev);
 
+	spin_lock_init(&crc->lock);
+
 	platform_set_drvdata(pdev, crc);
 
 	spin_lock(&crc_list.lock);
-- 
2.17.1


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 0/5] STM32 CRC update
  2020-05-12 14:11 [PATCH 0/5] STM32 CRC update Nicolas Toromanoff
                   ` (4 preceding siblings ...)
  2020-05-12 14:11 ` [PATCH 5/5] crypto: stm32/crc: protect from concurrent accesses Nicolas Toromanoff
@ 2020-05-22 14:13 ` Herbert Xu
  5 siblings, 0 replies; 14+ messages in thread
From: Herbert Xu @ 2020-05-22 14:13 UTC (permalink / raw)
  To: Nicolas Toromanoff
  Cc: David S . Miller, Maxime Coquelin, Alexandre Torgue,
	linux-crypto, linux-stm32, linux-arm-kernel, linux-kernel

On Tue, May 12, 2020 at 04:11:08PM +0200, Nicolas Toromanoff wrote:
> This set of patches update the STM32 CRC driver.
> It contains bug fix.
> 
> First fixes issue if we enable STM32 CRC32 hardware accelerator with
> ext4 (with metadata-chksum enable) and other fs that use same direct
> access to update crc32 API without previous init.
> Second fixes some issues raise by the extra self-test.
> Third fixes wrong hw usage if there is multiple IP on the SOC.
> Forth fixes "sleep while atomic" in tcrypt test, and some other places
> (ext4)
> Last fixes concurrent accesses. As state is saved in the hardware cell
> and not in stack as other CRC32 drivers, we need to create atomic
> section to protect concurrent CRC32 calculus.
> 
> This patch series applies to cryptodev/master.
> 
> Nicolas Toromanoff (5):
>   crypto: stm32/crc: fix ext4 chksum BUG_ON()
>   crypto: stm32/crc: fix run-time self test issue.
>   crypto: stm32/crc: fix multi-instance
>   crypto: stm32/crc: don't sleep in runtime pm
>   crypto: stm32/crc: protect from concurrent accesses
> 
>  drivers/crypto/stm32/stm32-crc32.c | 230 ++++++++++++++++++++---------
>  1 file changed, 161 insertions(+), 69 deletions(-)

All applied.  Thanks.
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 5/5] crypto: stm32/crc: protect from concurrent accesses
  2020-05-12 14:11 ` [PATCH 5/5] crypto: stm32/crc: protect from concurrent accesses Nicolas Toromanoff
@ 2020-05-22 16:11   ` Ard Biesheuvel
  2020-05-25  7:24     ` Nicolas TOROMANOFF
  0 siblings, 1 reply; 14+ messages in thread
From: Ard Biesheuvel @ 2020-05-22 16:11 UTC (permalink / raw)
  To: Nicolas Toromanoff
  Cc: Herbert Xu, David S . Miller, Maxime Coquelin, Alexandre Torgue,
	Linux Crypto Mailing List, linux-stm32, Linux ARM,
	Linux Kernel Mailing List

On Tue, 12 May 2020 at 16:13, Nicolas Toromanoff
<nicolas.toromanoff@st.com> wrote:
>
> Protect STM32 CRC device from concurrent accesses.
>
> As we create a spinlocked section that increase with buffer size,
> we provide a module parameter to release the pressure by splitting
> critical section in chunks.
>
> Size of each chunk is defined in burst_size module parameter.
> By default burst_size=0, i.e. don't split incoming buffer.
>
> Signed-off-by: Nicolas Toromanoff <nicolas.toromanoff@st.com>

Would you mind explaining the usage model here? It looks like you are
sharing a CRC hardware accelerator with a synchronous interface
between different users by using spinlocks? You are aware that this
will tie up the waiting CPUs completely during this time, right? So it
would be much better to use a mutex here. Or perhaps it would make
more sense to fall back to a s/w based CRC routine if the h/w is tied
up working for another task?

Using spinlocks for this is really not acceptable.



> ---
>  drivers/crypto/stm32/stm32-crc32.c | 47 ++++++++++++++++++++++++++++--
>  1 file changed, 45 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c
> index 413415c216ef..3ba41148c2a4 100644
> --- a/drivers/crypto/stm32/stm32-crc32.c
> +++ b/drivers/crypto/stm32/stm32-crc32.c
> @@ -35,11 +35,16 @@
>
>  #define CRC_AUTOSUSPEND_DELAY  50
>
> +static unsigned int burst_size;
> +module_param(burst_size, uint, 0644);
> +MODULE_PARM_DESC(burst_size, "Select burst byte size (0 unlimited)");
> +
>  struct stm32_crc {
>         struct list_head list;
>         struct device    *dev;
>         void __iomem     *regs;
>         struct clk       *clk;
> +       spinlock_t       lock;
>  };
>
>  struct stm32_crc_list {
> @@ -109,6 +114,7 @@ static int stm32_crc_init(struct shash_desc *desc)
>         struct stm32_crc_desc_ctx *ctx = shash_desc_ctx(desc);
>         struct stm32_crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
>         struct stm32_crc *crc;
> +       unsigned long flags;
>
>         crc = stm32_crc_get_next_crc();
>         if (!crc)
> @@ -116,6 +122,8 @@ static int stm32_crc_init(struct shash_desc *desc)
>
>         pm_runtime_get_sync(crc->dev);
>
> +       spin_lock_irqsave(&crc->lock, flags);
> +
>         /* Reset, set key, poly and configure in bit reverse mode */
>         writel_relaxed(bitrev32(mctx->key), crc->regs + CRC_INIT);
>         writel_relaxed(bitrev32(mctx->poly), crc->regs + CRC_POL);
> @@ -125,18 +133,21 @@ static int stm32_crc_init(struct shash_desc *desc)
>         /* Store partial result */
>         ctx->partial = readl_relaxed(crc->regs + CRC_DR);
>
> +       spin_unlock_irqrestore(&crc->lock, flags);
> +
>         pm_runtime_mark_last_busy(crc->dev);
>         pm_runtime_put_autosuspend(crc->dev);
>
>         return 0;
>  }
>
> -static int stm32_crc_update(struct shash_desc *desc, const u8 *d8,
> -                           unsigned int length)
> +static int burst_update(struct shash_desc *desc, const u8 *d8,
> +                       size_t length)
>  {
>         struct stm32_crc_desc_ctx *ctx = shash_desc_ctx(desc);
>         struct stm32_crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
>         struct stm32_crc *crc;
> +       unsigned long flags;
>
>         crc = stm32_crc_get_next_crc();
>         if (!crc)
> @@ -144,6 +155,8 @@ static int stm32_crc_update(struct shash_desc *desc, const u8 *d8,
>
>         pm_runtime_get_sync(crc->dev);
>
> +       spin_lock_irqsave(&crc->lock, flags);
> +
>         /*
>          * Restore previously calculated CRC for this context as init value
>          * Restore polynomial configuration
> @@ -182,12 +195,40 @@ static int stm32_crc_update(struct shash_desc *desc, const u8 *d8,
>         /* Store partial result */
>         ctx->partial = readl_relaxed(crc->regs + CRC_DR);
>
> +       spin_unlock_irqrestore(&crc->lock, flags);
> +
>         pm_runtime_mark_last_busy(crc->dev);
>         pm_runtime_put_autosuspend(crc->dev);
>
>         return 0;
>  }
>
> +static int stm32_crc_update(struct shash_desc *desc, const u8 *d8,
> +                           unsigned int length)
> +{
> +       const unsigned int burst_sz = burst_size;
> +       unsigned int rem_sz;
> +       const u8 *cur;
> +       size_t size;
> +       int ret;
> +
> +       if (!burst_sz)
> +               return burst_update(desc, d8, length);
> +
> +       /* Digest first bytes not 32bit aligned at first pass in the loop */
> +       size = min(length,
> +                  burst_sz + (unsigned int)d8 - ALIGN_DOWN((unsigned int)d8,
> +                                                           sizeof(u32)));
> +       for (rem_sz = length, cur = d8; rem_sz;
> +            rem_sz -= size, cur += size, size = min(rem_sz, burst_sz)) {
> +               ret = burst_update(desc, cur, size);
> +               if (ret)
> +                       return ret;
> +       }
> +
> +       return 0;
> +}
> +
>  static int stm32_crc_final(struct shash_desc *desc, u8 *out)
>  {
>         struct stm32_crc_desc_ctx *ctx = shash_desc_ctx(desc);
> @@ -300,6 +341,8 @@ static int stm32_crc_probe(struct platform_device *pdev)
>         pm_runtime_irq_safe(dev);
>         pm_runtime_enable(dev);
>
> +       spin_lock_init(&crc->lock);
> +
>         platform_set_drvdata(pdev, crc);
>
>         spin_lock(&crc_list.lock);
> --
> 2.17.1
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH 5/5] crypto: stm32/crc: protect from concurrent accesses
  2020-05-22 16:11   ` Ard Biesheuvel
@ 2020-05-25  7:24     ` Nicolas TOROMANOFF
  2020-05-25  7:46       ` Ard Biesheuvel
  0 siblings, 1 reply; 14+ messages in thread
From: Nicolas TOROMANOFF @ 2020-05-25  7:24 UTC (permalink / raw)
  To: Ard Biesheuvel
  Cc: Herbert Xu, David S . Miller, Maxime Coquelin, Alexandre TORGUE,
	Linux Crypto Mailing List, linux-stm32, Linux ARM,
	Linux Kernel Mailing List

Hello,

> -----Original Message-----
> From: Ard Biesheuvel <ardb@kernel.org>
> Sent: Friday, May 22, 2020 6:12 PM> 
> On Tue, 12 May 2020 at 16:13, Nicolas Toromanoff
> <nicolas.toromanoff@st.com> wrote:
> >
> > Protect STM32 CRC device from concurrent accesses.
> >
> > As we create a spinlocked section that increase with buffer size, we
> > provide a module parameter to release the pressure by splitting
> > critical section in chunks.
> >
> > Size of each chunk is defined in burst_size module parameter.
> > By default burst_size=0, i.e. don't split incoming buffer.
> >
> > Signed-off-by: Nicolas Toromanoff <nicolas.toromanoff@st.com>
> 
> Would you mind explaining the usage model here? It looks like you are sharing a
> CRC hardware accelerator with a synchronous interface between different users
> by using spinlocks? You are aware that this will tie up the waiting CPUs
> completely during this time, right? So it would be much better to use a mutex
> here. Or perhaps it would make more sense to fall back to a s/w based CRC
> routine if the h/w is tied up working for another task?

I know mutex are more acceptable here, but shash _update() and _init() may be call 
from any context, and so I cannot take a mutex.
And to protect my concurrent HW access I only though about spinlock. Due to possible
constraint on CPUs, I add a burst_size option to force slitting long buffer into smaller one,
and so decrease time we take the lock.
But I didn't though to fallback to software CRC.

I'll do a patch on top.
In in the burst_update() function I'll use a spin_trylock_irqsave() and use software CRC32 if HW is already in use.

Thanks and regards,
Nicolas.

> Using spinlocks for this is really not acceptable.
> 
> 
> 
> > ---
> >  drivers/crypto/stm32/stm32-crc32.c | 47
> > ++++++++++++++++++++++++++++--
> >  1 file changed, 45 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/crypto/stm32/stm32-crc32.c
> > b/drivers/crypto/stm32/stm32-crc32.c
> > index 413415c216ef..3ba41148c2a4 100644
> > --- a/drivers/crypto/stm32/stm32-crc32.c
> > +++ b/drivers/crypto/stm32/stm32-crc32.c
> > @@ -35,11 +35,16 @@
> >
> >  #define CRC_AUTOSUSPEND_DELAY  50
> >
> > +static unsigned int burst_size;
> > +module_param(burst_size, uint, 0644); MODULE_PARM_DESC(burst_size,
> > +"Select burst byte size (0 unlimited)");
> > +
> >  struct stm32_crc {
> >         struct list_head list;
> >         struct device    *dev;
> >         void __iomem     *regs;
> >         struct clk       *clk;
> > +       spinlock_t       lock;
> >  };
> >
> >  struct stm32_crc_list {
> > @@ -109,6 +114,7 @@ static int stm32_crc_init(struct shash_desc *desc)
> >         struct stm32_crc_desc_ctx *ctx = shash_desc_ctx(desc);
> >         struct stm32_crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
> >         struct stm32_crc *crc;
> > +       unsigned long flags;
> >
> >         crc = stm32_crc_get_next_crc();
> >         if (!crc)
> > @@ -116,6 +122,8 @@ static int stm32_crc_init(struct shash_desc *desc)
> >
> >         pm_runtime_get_sync(crc->dev);
> >
> > +       spin_lock_irqsave(&crc->lock, flags);
> > +
> >         /* Reset, set key, poly and configure in bit reverse mode */
> >         writel_relaxed(bitrev32(mctx->key), crc->regs + CRC_INIT);
> >         writel_relaxed(bitrev32(mctx->poly), crc->regs + CRC_POL); @@
> > -125,18 +133,21 @@ static int stm32_crc_init(struct shash_desc *desc)
> >         /* Store partial result */
> >         ctx->partial = readl_relaxed(crc->regs + CRC_DR);
> >
> > +       spin_unlock_irqrestore(&crc->lock, flags);
> > +
> >         pm_runtime_mark_last_busy(crc->dev);
> >         pm_runtime_put_autosuspend(crc->dev);
> >
> >         return 0;
> >  }
> >
> > -static int stm32_crc_update(struct shash_desc *desc, const u8 *d8,
> > -                           unsigned int length)
> > +static int burst_update(struct shash_desc *desc, const u8 *d8,
> > +                       size_t length)
> >  {
> >         struct stm32_crc_desc_ctx *ctx = shash_desc_ctx(desc);
> >         struct stm32_crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
> >         struct stm32_crc *crc;
> > +       unsigned long flags;
> >
> >         crc = stm32_crc_get_next_crc();
> >         if (!crc)
> > @@ -144,6 +155,8 @@ static int stm32_crc_update(struct shash_desc
> > *desc, const u8 *d8,
> >
> >         pm_runtime_get_sync(crc->dev);
> >
> > +       spin_lock_irqsave(&crc->lock, flags);
> > +
> >         /*
> >          * Restore previously calculated CRC for this context as init value
> >          * Restore polynomial configuration @@ -182,12 +195,40 @@
> > static int stm32_crc_update(struct shash_desc *desc, const u8 *d8,
> >         /* Store partial result */
> >         ctx->partial = readl_relaxed(crc->regs + CRC_DR);
> >
> > +       spin_unlock_irqrestore(&crc->lock, flags);
> > +
> >         pm_runtime_mark_last_busy(crc->dev);
> >         pm_runtime_put_autosuspend(crc->dev);
> >
> >         return 0;
> >  }
> >
> > +static int stm32_crc_update(struct shash_desc *desc, const u8 *d8,
> > +                           unsigned int length) {
> > +       const unsigned int burst_sz = burst_size;
> > +       unsigned int rem_sz;
> > +       const u8 *cur;
> > +       size_t size;
> > +       int ret;
> > +
> > +       if (!burst_sz)
> > +               return burst_update(desc, d8, length);
> > +
> > +       /* Digest first bytes not 32bit aligned at first pass in the loop */
> > +       size = min(length,
> > +                  burst_sz + (unsigned int)d8 - ALIGN_DOWN((unsigned int)d8,
> > +                                                           sizeof(u32)));
> > +       for (rem_sz = length, cur = d8; rem_sz;
> > +            rem_sz -= size, cur += size, size = min(rem_sz, burst_sz)) {
> > +               ret = burst_update(desc, cur, size);
> > +               if (ret)
> > +                       return ret;
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> >  static int stm32_crc_final(struct shash_desc *desc, u8 *out)  {
> >         struct stm32_crc_desc_ctx *ctx = shash_desc_ctx(desc); @@
> > -300,6 +341,8 @@ static int stm32_crc_probe(struct platform_device *pdev)
> >         pm_runtime_irq_safe(dev);
> >         pm_runtime_enable(dev);
> >
> > +       spin_lock_init(&crc->lock);
> > +
> >         platform_set_drvdata(pdev, crc);
> >
> >         spin_lock(&crc_list.lock);
> > --
> > 2.17.1
> >

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 5/5] crypto: stm32/crc: protect from concurrent accesses
  2020-05-25  7:24     ` Nicolas TOROMANOFF
@ 2020-05-25  7:46       ` Ard Biesheuvel
  2020-05-25  9:01         ` Nicolas TOROMANOFF
  0 siblings, 1 reply; 14+ messages in thread
From: Ard Biesheuvel @ 2020-05-25  7:46 UTC (permalink / raw)
  To: Nicolas TOROMANOFF
  Cc: Herbert Xu, David S . Miller, Maxime Coquelin, Alexandre TORGUE,
	Linux Crypto Mailing List, linux-stm32, Linux ARM,
	Linux Kernel Mailing List

On Mon, 25 May 2020 at 09:24, Nicolas TOROMANOFF
<nicolas.toromanoff@st.com> wrote:
>
> Hello,
>
> > -----Original Message-----
> > From: Ard Biesheuvel <ardb@kernel.org>
> > Sent: Friday, May 22, 2020 6:12 PM>
> > On Tue, 12 May 2020 at 16:13, Nicolas Toromanoff
> > <nicolas.toromanoff@st.com> wrote:
> > >
> > > Protect STM32 CRC device from concurrent accesses.
> > >
> > > As we create a spinlocked section that increase with buffer size, we
> > > provide a module parameter to release the pressure by splitting
> > > critical section in chunks.
> > >
> > > Size of each chunk is defined in burst_size module parameter.
> > > By default burst_size=0, i.e. don't split incoming buffer.
> > >
> > > Signed-off-by: Nicolas Toromanoff <nicolas.toromanoff@st.com>
> >
> > Would you mind explaining the usage model here? It looks like you are sharing a
> > CRC hardware accelerator with a synchronous interface between different users
> > by using spinlocks? You are aware that this will tie up the waiting CPUs
> > completely during this time, right? So it would be much better to use a mutex
> > here. Or perhaps it would make more sense to fall back to a s/w based CRC
> > routine if the h/w is tied up working for another task?
>
> I know mutex are more acceptable here, but shash _update() and _init() may be call
> from any context, and so I cannot take a mutex.
> And to protect my concurrent HW access I only though about spinlock. Due to possible
> constraint on CPUs, I add a burst_size option to force slitting long buffer into smaller one,
> and so decrease time we take the lock.
> But I didn't though to fallback to software CRC.
>
> I'll do a patch on top.
> In in the burst_update() function I'll use a spin_trylock_irqsave() and use software CRC32 if HW is already in use.
>

Right. I didn't even notice that you were keeping interrupts disabled
the whole time when using the h/w block. That means that any serious
use of this h/w block will make IRQ latency go through the roof.

I recommend that you go back to the drawing board on this driver,
rather than papering over the issues with a spin_trylock(). Perhaps it
would be better to model it as a ahash (even though the h/w block
itself is synchronous) and use a kthread to feed in the data.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH 5/5] crypto: stm32/crc: protect from concurrent accesses
  2020-05-25  7:46       ` Ard Biesheuvel
@ 2020-05-25  9:01         ` Nicolas TOROMANOFF
  2020-05-25  9:07           ` Ard Biesheuvel
  0 siblings, 1 reply; 14+ messages in thread
From: Nicolas TOROMANOFF @ 2020-05-25  9:01 UTC (permalink / raw)
  To: Ard Biesheuvel
  Cc: Herbert Xu, David S . Miller, Maxime Coquelin, Alexandre TORGUE,
	Linux Crypto Mailing List, linux-stm32, Linux ARM,
	Linux Kernel Mailing List

> -----Original Message-----
> From: Ard Biesheuvel <ardb@kernel.org>
> Sent: Monday, May 25, 2020 9:46 AM
> To: Nicolas TOROMANOFF <nicolas.toromanoff@st.com>
> Subject: Re: [PATCH 5/5] crypto: stm32/crc: protect from concurrent accesses
> 
> On Mon, 25 May 2020 at 09:24, Nicolas TOROMANOFF
> <nicolas.toromanoff@st.com> wrote:
> >
> > Hello,
> >
> > > -----Original Message-----
> > > From: Ard Biesheuvel <ardb@kernel.org>
> > > Sent: Friday, May 22, 2020 6:12 PM>
> > > On Tue, 12 May 2020 at 16:13, Nicolas Toromanoff
> > > <nicolas.toromanoff@st.com> wrote:
> > > >
> > > > Protect STM32 CRC device from concurrent accesses.
> > > >
> > > > As we create a spinlocked section that increase with buffer size,
> > > > we provide a module parameter to release the pressure by splitting
> > > > critical section in chunks.
> > > >
> > > > Size of each chunk is defined in burst_size module parameter.
> > > > By default burst_size=0, i.e. don't split incoming buffer.
> > > >
> > > > Signed-off-by: Nicolas Toromanoff <nicolas.toromanoff@st.com>
> > >
> > > Would you mind explaining the usage model here? It looks like you
> > > are sharing a CRC hardware accelerator with a synchronous interface
> > > between different users by using spinlocks? You are aware that this
> > > will tie up the waiting CPUs completely during this time, right? So
> > > it would be much better to use a mutex here. Or perhaps it would
> > > make more sense to fall back to a s/w based CRC routine if the h/w is tied up
> working for another task?
> >
> > I know mutex are more acceptable here, but shash _update() and _init()
> > may be call from any context, and so I cannot take a mutex.
> > And to protect my concurrent HW access I only though about spinlock.
> > Due to possible constraint on CPUs, I add a burst_size option to force
> > slitting long buffer into smaller one, and so decrease time we take the lock.
> > But I didn't though to fallback to software CRC.
> >
> > I'll do a patch on top.
> > In in the burst_update() function I'll use a spin_trylock_irqsave() and use
> software CRC32 if HW is already in use.
> >
> 
> Right. I didn't even notice that you were keeping interrupts disabled the whole
> time when using the h/w block. That means that any serious use of this h/w
> block will make IRQ latency go through the roof.
> 
> I recommend that you go back to the drawing board on this driver, rather than
> papering over the issues with a spin_trylock(). Perhaps it would be better to
> model it as a ahash (even though the h/w block itself is synchronous) and use a
> kthread to feed in the data.

I thought when I updated the driver to move to a ahash interface, but the main usage
of crc32 is the ext4 fs, that calls the shash API.
Commit 877b5691f27a ("crypto: shash - remove shash_desc::flags") removed possibility
to sleep in shash callback. (before this commit and with MAY_SLEEP option set, using 
a mutex may have been fine).

By now the solution I see is to use the spin_trylock_irqsave(), fallback to software crc *AND* capping burst_size
to ensure the locked section stay reasonable.

Does this seems acceptable ?

Nicolas.
 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 5/5] crypto: stm32/crc: protect from concurrent accesses
  2020-05-25  9:01         ` Nicolas TOROMANOFF
@ 2020-05-25  9:07           ` Ard Biesheuvel
  2020-05-25 11:49             ` Nicolas TOROMANOFF
  0 siblings, 1 reply; 14+ messages in thread
From: Ard Biesheuvel @ 2020-05-25  9:07 UTC (permalink / raw)
  To: Nicolas TOROMANOFF, Eric Biggers
  Cc: Herbert Xu, David S . Miller, Maxime Coquelin, Alexandre TORGUE,
	Linux Crypto Mailing List, linux-stm32, Linux ARM,
	Linux Kernel Mailing List

(+ Eric)

On Mon, 25 May 2020 at 11:01, Nicolas TOROMANOFF
<nicolas.toromanoff@st.com> wrote:
>
> > -----Original Message-----
> > From: Ard Biesheuvel <ardb@kernel.org>
> > Sent: Monday, May 25, 2020 9:46 AM
> > To: Nicolas TOROMANOFF <nicolas.toromanoff@st.com>
> > Subject: Re: [PATCH 5/5] crypto: stm32/crc: protect from concurrent accesses
> >
> > On Mon, 25 May 2020 at 09:24, Nicolas TOROMANOFF
> > <nicolas.toromanoff@st.com> wrote:
> > >
> > > Hello,
> > >
> > > > -----Original Message-----
> > > > From: Ard Biesheuvel <ardb@kernel.org>
> > > > Sent: Friday, May 22, 2020 6:12 PM>
> > > > On Tue, 12 May 2020 at 16:13, Nicolas Toromanoff
> > > > <nicolas.toromanoff@st.com> wrote:
> > > > >
> > > > > Protect STM32 CRC device from concurrent accesses.
> > > > >
> > > > > As we create a spinlocked section that increase with buffer size,
> > > > > we provide a module parameter to release the pressure by splitting
> > > > > critical section in chunks.
> > > > >
> > > > > Size of each chunk is defined in burst_size module parameter.
> > > > > By default burst_size=0, i.e. don't split incoming buffer.
> > > > >
> > > > > Signed-off-by: Nicolas Toromanoff <nicolas.toromanoff@st.com>
> > > >
> > > > Would you mind explaining the usage model here? It looks like you
> > > > are sharing a CRC hardware accelerator with a synchronous interface
> > > > between different users by using spinlocks? You are aware that this
> > > > will tie up the waiting CPUs completely during this time, right? So
> > > > it would be much better to use a mutex here. Or perhaps it would
> > > > make more sense to fall back to a s/w based CRC routine if the h/w is tied up
> > working for another task?
> > >
> > > I know mutex are more acceptable here, but shash _update() and _init()
> > > may be call from any context, and so I cannot take a mutex.
> > > And to protect my concurrent HW access I only though about spinlock.
> > > Due to possible constraint on CPUs, I add a burst_size option to force
> > > slitting long buffer into smaller one, and so decrease time we take the lock.
> > > But I didn't though to fallback to software CRC.
> > >
> > > I'll do a patch on top.
> > > In in the burst_update() function I'll use a spin_trylock_irqsave() and use
> > software CRC32 if HW is already in use.
> > >
> >
> > Right. I didn't even notice that you were keeping interrupts disabled the whole
> > time when using the h/w block. That means that any serious use of this h/w
> > block will make IRQ latency go through the roof.
> >
> > I recommend that you go back to the drawing board on this driver, rather than
> > papering over the issues with a spin_trylock(). Perhaps it would be better to
> > model it as a ahash (even though the h/w block itself is synchronous) and use a
> > kthread to feed in the data.
>
> I thought when I updated the driver to move to a ahash interface, but the main usage
> of crc32 is the ext4 fs, that calls the shash API.
> Commit 877b5691f27a ("crypto: shash - remove shash_desc::flags") removed possibility
> to sleep in shash callback. (before this commit and with MAY_SLEEP option set, using
> a mutex may have been fine).
>

According to that commit's log, sleeping is never fine for shash(),
since it uses kmap_atomic() when iterating over the scatterlist.

> By now the solution I see is to use the spin_trylock_irqsave(), fallback to software crc *AND* capping burst_size
> to ensure the locked section stay reasonable.
>
> Does this seems acceptable ?
>

If the reason for disabling interrupts is to avoid deadlocks, wouldn't
the switch to trylock() with a software fallback allow us to keep
interrupts enabled?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH 5/5] crypto: stm32/crc: protect from concurrent accesses
  2020-05-25  9:07           ` Ard Biesheuvel
@ 2020-05-25 11:49             ` Nicolas TOROMANOFF
  2020-05-25 12:01               ` Ard Biesheuvel
  0 siblings, 1 reply; 14+ messages in thread
From: Nicolas TOROMANOFF @ 2020-05-25 11:49 UTC (permalink / raw)
  To: Ard Biesheuvel, Eric Biggers
  Cc: Herbert Xu, David S . Miller, Maxime Coquelin, Alexandre TORGUE,
	Linux Crypto Mailing List, linux-stm32, Linux ARM,
	Linux Kernel Mailing List

> -----Original Message-----
> From: Ard Biesheuvel <ardb@kernel.org>
> Sent: Monday, May 25, 2020 11:07 AM
> To: Nicolas TOROMANOFF <nicolas.toromanoff@st.com>; Eric Biggers
> <ebiggers@kernel.org>
> On Mon, 25 May 2020 at 11:01, Nicolas TOROMANOFF
> <nicolas.toromanoff@st.com> wrote:
> >
> > > -----Original Message-----
> > > From: Ard Biesheuvel <ardb@kernel.org>
> > > Sent: Monday, May 25, 2020 9:46 AM
> > > To: Nicolas TOROMANOFF <nicolas.toromanoff@st.com>
> > > Subject: Re: [PATCH 5/5] crypto: stm32/crc: protect from concurrent
> > > accesses
> > >
> > > On Mon, 25 May 2020 at 09:24, Nicolas TOROMANOFF
> > > <nicolas.toromanoff@st.com> wrote:
> > > >
> > > > Hello,
> > > >
> > > > > -----Original Message-----
> > > > > From: Ard Biesheuvel <ardb@kernel.org>
> > > > > Sent: Friday, May 22, 2020 6:12 PM> On Tue, 12 May 2020 at
> > > > > 16:13, Nicolas Toromanoff <nicolas.toromanoff@st.com> wrote:
> > > > > >
> > > > > > Protect STM32 CRC device from concurrent accesses.
> > > > > >
> > > > > > As we create a spinlocked section that increase with buffer
> > > > > > size, we provide a module parameter to release the pressure by
> > > > > > splitting critical section in chunks.
> > > > > >
> > > > > > Size of each chunk is defined in burst_size module parameter.
> > > > > > By default burst_size=0, i.e. don't split incoming buffer.
> > > > > >
> > > > > > Signed-off-by: Nicolas Toromanoff <nicolas.toromanoff@st.com>
> > > > >
> > > > > Would you mind explaining the usage model here? It looks like
> > > > > you are sharing a CRC hardware accelerator with a synchronous
> > > > > interface between different users by using spinlocks? You are
> > > > > aware that this will tie up the waiting CPUs completely during
> > > > > this time, right? So it would be much better to use a mutex
> > > > > here. Or perhaps it would make more sense to fall back to a s/w
> > > > > based CRC routine if the h/w is tied up
> > > working for another task?
> > > >
> > > > I know mutex are more acceptable here, but shash _update() and
> > > > _init() may be call from any context, and so I cannot take a mutex.
> > > > And to protect my concurrent HW access I only though about spinlock.
> > > > Due to possible constraint on CPUs, I add a burst_size option to
> > > > force slitting long buffer into smaller one, and so decrease time we take
> the lock.
> > > > But I didn't though to fallback to software CRC.
> > > >
> > > > I'll do a patch on top.
> > > > In in the burst_update() function I'll use a
> > > > spin_trylock_irqsave() and use
> > > software CRC32 if HW is already in use.
> > > >
> > >
> > > Right. I didn't even notice that you were keeping interrupts
> > > disabled the whole time when using the h/w block. That means that
> > > any serious use of this h/w block will make IRQ latency go through the roof.
> > >
> > > I recommend that you go back to the drawing board on this driver,
> > > rather than papering over the issues with a spin_trylock(). Perhaps
> > > it would be better to model it as a ahash (even though the h/w block
> > > itself is synchronous) and use a kthread to feed in the data.
> >
> > I thought when I updated the driver to move to a ahash interface, but
> > the main usage of crc32 is the ext4 fs, that calls the shash API.
> > Commit 877b5691f27a ("crypto: shash - remove shash_desc::flags")
> > removed possibility to sleep in shash callback. (before this commit
> > and with MAY_SLEEP option set, using a mutex may have been fine).
> >
> 
> According to that commit's log, sleeping is never fine for shash(), since it uses
> kmap_atomic() when iterating over the scatterlist.

Today, we could avoid using kmap_atomic() in shash_ashash_*() APIs (the
ones that Walks through the scatterlist) by using the
crypto_ahash_walk_first() function to initialize the shash_ahash walker
(note that this function is never call in current kernel source [to remove ?]).
Then shash_ahash_*() functions will call ahash_*() function that use kmap()
if (walk->flags & CRYPTO_ALG_ASYNC) [flag set by crypto_ahash_walk_first()]
The last kmap_atomic() will be in the shash_ahash_digest() call in the
optimize branch (that should be replaced by the no atomic one)

I didn't investigate more this way, because I didn't check the drawback of
using kmap() instead of kmap_atomic(), I wanted to avoid modifying behavior
of other drivers and using a function never use elsewhere in kernel scarred
me ;-).
If these updates correct visible bugs in the ahash usage of the stm32-crc32
code [no more "sleep while atomic" traces even with mutex in tests], 
Documentation states that shash API could be called from any context,
I cannot add mutex in them.

> > By now the solution I see is to use the spin_trylock_irqsave(),
> > fallback to software crc *AND* capping burst_size to ensure the locked
> section stay reasonable.
> >
> > Does this seems acceptable ?
> >
> 
> If the reason for disabling interrupts is to avoid deadlocks, wouldn't the switch
> to trylock() with a software fallback allow us to keep interrupts enabled?

Right, with the trylock, I don't see why we may need to mask interrupts.



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 5/5] crypto: stm32/crc: protect from concurrent accesses
  2020-05-25 11:49             ` Nicolas TOROMANOFF
@ 2020-05-25 12:01               ` Ard Biesheuvel
  0 siblings, 0 replies; 14+ messages in thread
From: Ard Biesheuvel @ 2020-05-25 12:01 UTC (permalink / raw)
  To: Nicolas TOROMANOFF
  Cc: Eric Biggers, Herbert Xu, David S . Miller, Maxime Coquelin,
	Alexandre TORGUE, Linux Crypto Mailing List, linux-stm32,
	Linux ARM, Linux Kernel Mailing List

On Mon, 25 May 2020 at 13:49, Nicolas TOROMANOFF
<nicolas.toromanoff@st.com> wrote:
>
> > -----Original Message-----
> > From: Ard Biesheuvel <ardb@kernel.org>
> > Sent: Monday, May 25, 2020 11:07 AM
> > To: Nicolas TOROMANOFF <nicolas.toromanoff@st.com>; Eric Biggers
> > <ebiggers@kernel.org>
> > On Mon, 25 May 2020 at 11:01, Nicolas TOROMANOFF
> > <nicolas.toromanoff@st.com> wrote:
> > >
> > > > -----Original Message-----
> > > > From: Ard Biesheuvel <ardb@kernel.org>
> > > > Sent: Monday, May 25, 2020 9:46 AM
> > > > To: Nicolas TOROMANOFF <nicolas.toromanoff@st.com>
> > > > Subject: Re: [PATCH 5/5] crypto: stm32/crc: protect from concurrent
> > > > accesses
> > > >
> > > > On Mon, 25 May 2020 at 09:24, Nicolas TOROMANOFF
> > > > <nicolas.toromanoff@st.com> wrote:
> > > > >
> > > > > Hello,
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Ard Biesheuvel <ardb@kernel.org>
> > > > > > Sent: Friday, May 22, 2020 6:12 PM> On Tue, 12 May 2020 at
> > > > > > 16:13, Nicolas Toromanoff <nicolas.toromanoff@st.com> wrote:
> > > > > > >
> > > > > > > Protect STM32 CRC device from concurrent accesses.
> > > > > > >
> > > > > > > As we create a spinlocked section that increase with buffer
> > > > > > > size, we provide a module parameter to release the pressure by
> > > > > > > splitting critical section in chunks.
> > > > > > >
> > > > > > > Size of each chunk is defined in burst_size module parameter.
> > > > > > > By default burst_size=0, i.e. don't split incoming buffer.
> > > > > > >
> > > > > > > Signed-off-by: Nicolas Toromanoff <nicolas.toromanoff@st.com>
> > > > > >
> > > > > > Would you mind explaining the usage model here? It looks like
> > > > > > you are sharing a CRC hardware accelerator with a synchronous
> > > > > > interface between different users by using spinlocks? You are
> > > > > > aware that this will tie up the waiting CPUs completely during
> > > > > > this time, right? So it would be much better to use a mutex
> > > > > > here. Or perhaps it would make more sense to fall back to a s/w
> > > > > > based CRC routine if the h/w is tied up
> > > > working for another task?
> > > > >
> > > > > I know mutex are more acceptable here, but shash _update() and
> > > > > _init() may be call from any context, and so I cannot take a mutex.
> > > > > And to protect my concurrent HW access I only though about spinlock.
> > > > > Due to possible constraint on CPUs, I add a burst_size option to
> > > > > force slitting long buffer into smaller one, and so decrease time we take
> > the lock.
> > > > > But I didn't though to fallback to software CRC.
> > > > >
> > > > > I'll do a patch on top.
> > > > > In in the burst_update() function I'll use a
> > > > > spin_trylock_irqsave() and use
> > > > software CRC32 if HW is already in use.
> > > > >
> > > >
> > > > Right. I didn't even notice that you were keeping interrupts
> > > > disabled the whole time when using the h/w block. That means that
> > > > any serious use of this h/w block will make IRQ latency go through the roof.
> > > >
> > > > I recommend that you go back to the drawing board on this driver,
> > > > rather than papering over the issues with a spin_trylock(). Perhaps
> > > > it would be better to model it as a ahash (even though the h/w block
> > > > itself is synchronous) and use a kthread to feed in the data.
> > >
> > > I thought when I updated the driver to move to a ahash interface, but
> > > the main usage of crc32 is the ext4 fs, that calls the shash API.
> > > Commit 877b5691f27a ("crypto: shash - remove shash_desc::flags")
> > > removed possibility to sleep in shash callback. (before this commit
> > > and with MAY_SLEEP option set, using a mutex may have been fine).
> > >
> >
> > According to that commit's log, sleeping is never fine for shash(), since it uses
> > kmap_atomic() when iterating over the scatterlist.
>
> Today, we could avoid using kmap_atomic() in shash_ashash_*() APIs (the
> ones that Walks through the scatterlist) by using the
> crypto_ahash_walk_first() function to initialize the shash_ahash walker
> (note that this function is never call in current kernel source [to remove ?]).
> Then shash_ahash_*() functions will call ahash_*() function that use kmap()
> if (walk->flags & CRYPTO_ALG_ASYNC) [flag set by crypto_ahash_walk_first()]
> The last kmap_atomic() will be in the shash_ahash_digest() call in the
> optimize branch (that should be replaced by the no atomic one)
>
> I didn't investigate more this way, because I didn't check the drawback of
> using kmap() instead of kmap_atomic(), I wanted to avoid modifying behavior
> of other drivers and using a function never use elsewhere in kernel scarred
> me ;-).
> If these updates correct visible bugs in the ahash usage of the stm32-crc32
> code [no more "sleep while atomic" traces even with mutex in tests],
> Documentation states that shash API could be called from any context,
> I cannot add mutex in them.
>
> > > By now the solution I see is to use the spin_trylock_irqsave(),
> > > fallback to software crc *AND* capping burst_size to ensure the locked
> > section stay reasonable.
> > >
> > > Does this seems acceptable ?
> > >
> >
> > If the reason for disabling interrupts is to avoid deadlocks, wouldn't the switch
> > to trylock() with a software fallback allow us to keep interrupts enabled?
>
> Right, with the trylock, I don't see why we may need to mask interrupts.
>
>

OK, then the fix should be easy.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, back to index

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-12 14:11 [PATCH 0/5] STM32 CRC update Nicolas Toromanoff
2020-05-12 14:11 ` [PATCH 1/5] crypto: stm32/crc: fix ext4 chksum BUG_ON() Nicolas Toromanoff
2020-05-12 14:11 ` [PATCH 2/5] crypto: stm32/crc: fix run-time self test issue Nicolas Toromanoff
2020-05-12 14:11 ` [PATCH 3/5] crypto: stm32/crc: fix multi-instance Nicolas Toromanoff
2020-05-12 14:11 ` [PATCH 4/5] crypto: stm32/crc: don't sleep in runtime pm Nicolas Toromanoff
2020-05-12 14:11 ` [PATCH 5/5] crypto: stm32/crc: protect from concurrent accesses Nicolas Toromanoff
2020-05-22 16:11   ` Ard Biesheuvel
2020-05-25  7:24     ` Nicolas TOROMANOFF
2020-05-25  7:46       ` Ard Biesheuvel
2020-05-25  9:01         ` Nicolas TOROMANOFF
2020-05-25  9:07           ` Ard Biesheuvel
2020-05-25 11:49             ` Nicolas TOROMANOFF
2020-05-25 12:01               ` Ard Biesheuvel
2020-05-22 14:13 ` [PATCH 0/5] STM32 CRC update Herbert Xu

Linux-Crypto Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-crypto/0 linux-crypto/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-crypto linux-crypto/ https://lore.kernel.org/linux-crypto \
		linux-crypto@vger.kernel.org
	public-inbox-index linux-crypto

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-crypto


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git