All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled
@ 2009-10-15  6:41 Vishnu Suresh
  2009-10-15  6:41   ` Vishnu Suresh
  2009-10-16  1:25   ` Dan Williams
  0 siblings, 2 replies; 18+ messages in thread
From: Vishnu Suresh @ 2009-10-15  6:41 UTC (permalink / raw)
  To: linuxppc-dev, linux-crypto, linux-kernel, linux-raid, herbert,
	dan.j.williams
  Cc: Vishnu Suresh

This patch disables the use of DMA_INTERRUPT capability with Async_tx

The fsldma produces a null transfer with DMA_INTERRUPT
capability when used with Async_tx. When RAID devices queue
a transaction via Async_tx, this  results in a hang.

Signed-off-by: Vishnu Suresh <Vishnu@freescale.com>
---
 drivers/dma/fsldma.c |    6 ++++++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 296f9e7..66d9b39 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -1200,7 +1200,13 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
 						- fdev->reg.start + 1);
 
 	dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
+#ifndef CONFIG_ASYNC_CORE
+	/*
+	 * The DMA_INTERRUPT async_tx is a NULL transfer, which will
+	 * triger a PE interrupt.
+	 */
 	dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
+#endif
 	dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
 	fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
 	fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
-- 
1.6.4.2

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v0 2/2] Crypto: Talitos: Support for Async_tx XOR offload
  2009-10-15  6:41 [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled Vishnu Suresh
@ 2009-10-15  6:41   ` Vishnu Suresh
  2009-10-16  1:25   ` Dan Williams
  1 sibling, 0 replies; 18+ messages in thread
From: Vishnu Suresh @ 2009-10-15  6:41 UTC (permalink / raw)
  To: linuxppc-dev, linux-crypto, linux-kernel, linux-raid, herbert,
	dan.j.williams
  Cc: Dipen Dudhat, Maneesh Gupta, Vishnu Suresh

Expose Talitos's XOR functionality to be used for
RAID Parity calculation via the Async_tx layer.

Thanks to Surender Kumar and Lee Nipper for their help in
realising this driver

Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Dipen Dudhat <Dipen.Dudhat@freescale.com>
Signed-off-by: Maneesh Gupta <Maneesh.Gupta@freescale.com>
Signed-off-by: Vishnu Suresh <Vishnu@freescale.com>
---
 drivers/crypto/Kconfig   |    2 +
 drivers/crypto/talitos.c |  423 +++++++++++++++++++++++++++++++++++++++++++++-
 drivers/crypto/talitos.h |    2 +
 3 files changed, 426 insertions(+), 1 deletions(-)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index b08403d..343e578 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -192,6 +192,8 @@ config CRYPTO_DEV_TALITOS
 	select CRYPTO_ALGAPI
 	select CRYPTO_AUTHENC
 	select HW_RANDOM
+	select DMA_ENGINE
+	select ASYNC_XOR
 	depends on FSL_SOC
 	help
 	  Say 'Y' here to use the Freescale Security Engine (SEC)
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index c47ffe8..84819d4 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1,7 +1,7 @@
 /*
  * talitos - Freescale Integrated Security Engine (SEC) device driver
  *
- * Copyright (c) 2008 Freescale Semiconductor, Inc.
+ * Copyright (c) 2008-2009 Freescale Semiconductor, Inc.
  *
  * Scatterlist Crypto API glue code copied from files with the following:
  * Copyright (c) 2006-2007 Herbert Xu <herbert@gondor.apana.org.au>
@@ -37,6 +37,8 @@
 #include <linux/io.h>
 #include <linux/spinlock.h>
 #include <linux/rtnetlink.h>
+#include <linux/dmaengine.h>
+#include <linux/raid/xor.h>
 
 #include <crypto/algapi.h>
 #include <crypto/aes.h>
@@ -140,6 +142,9 @@ struct talitos_private {
 
 	/* hwrng device */
 	struct hwrng rng;
+
+	/* XOR Device */
+	struct dma_device dma_dev_common;
 };
 
 /* .features flag */
@@ -685,6 +690,401 @@ static void talitos_unregister_rng(struct device *dev)
 }
 
 /*
+ * async_tx interface for XOR-capable SECs
+ *
+ * Dipen Dudhat <Dipen.Dudhat@freescale.com>
+ * Maneesh Gupta <Maneesh.Gupta@freescale.com>
+ * Vishnu Suresh <Vishnu@freescale.com>
+ */
+
+/**
+ * talitos_xor_chan - context management for the async_tx channel
+ * @completed_cookie: the last completed cookie
+ * @desc_lock: lock for tx queue
+ * @total_desc: number of descriptors allocated
+ * @submit_q: queue of submitted descriptors
+ * @pending_q: queue of pending descriptors
+ * @in_progress_q: queue of descriptors in progress
+ * @free_desc: queue of unused descriptors
+ * @dev: talitos device implementing this channel
+ * @common: the corresponding xor channel in async_tx
+ */
+struct talitos_xor_chan {
+	dma_cookie_t completed_cookie;
+	spinlock_t desc_lock;
+	unsigned int total_desc;
+	struct list_head submit_q;
+	struct list_head pending_q;
+	struct list_head in_progress_q;
+	struct list_head free_desc;
+	struct device *dev;
+	struct dma_chan common;
+};
+
+/**
+ * talitos_xor_desc - software xor descriptor
+ * @async_tx: the referring async_tx descriptor
+ * @node:
+ * @hwdesc: h/w descriptor
+ */
+struct talitos_xor_desc {
+	struct dma_async_tx_descriptor async_tx;
+	struct list_head tx_list;
+	struct list_head node;
+	struct talitos_desc hwdesc;
+};
+
+static void talitos_release_xor(struct device *dev, struct talitos_desc *hwdesc,
+				void *context, int error);
+
+static enum dma_status talitos_is_tx_complete(struct dma_chan *chan,
+					      dma_cookie_t cookie,
+					      dma_cookie_t *done,
+					      dma_cookie_t *used)
+{
+	struct talitos_xor_chan *xor_chan;
+	dma_cookie_t last_used;
+	dma_cookie_t last_complete;
+
+	xor_chan = container_of(chan, struct talitos_xor_chan, common);
+
+	last_used = chan->cookie;
+	last_complete = xor_chan->completed_cookie;
+
+	if (done)
+		*done = last_complete;
+
+	if (used)
+		*used = last_used;
+
+	return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static void talitos_process_pending(struct talitos_xor_chan *xor_chan)
+{
+	struct talitos_xor_desc *desc, *_desc;
+	unsigned long flags;
+	int status;
+
+	spin_lock_irqsave(&xor_chan->desc_lock, flags);
+
+	list_for_each_entry_safe(desc, _desc, &xor_chan->pending_q, node) {
+		status = talitos_submit(xor_chan->dev, &desc->hwdesc,
+					talitos_release_xor, desc);
+		if (status != -EINPROGRESS)
+			break;
+
+		list_del(&desc->node);
+		list_add_tail(&desc->node, &xor_chan->in_progress_q);
+	}
+
+	spin_unlock_irqrestore(&xor_chan->desc_lock, flags);
+}
+
+static void talitos_release_xor(struct device *dev, struct talitos_desc *hwdesc,
+				void *context, int error)
+{
+	struct talitos_xor_desc *desc = context;
+	struct talitos_xor_chan *xor_chan;
+	dma_async_tx_callback callback;
+	void *callback_param;
+
+	if (unlikely(error)) {
+		dev_err(dev, "xor operation: talitos error %d\n", error);
+		BUG();
+	}
+
+	xor_chan = container_of(desc->async_tx.chan, struct talitos_xor_chan,
+				common);
+	spin_lock_bh(&xor_chan->desc_lock);
+	if (xor_chan->completed_cookie < desc->async_tx.cookie)
+		xor_chan->completed_cookie = desc->async_tx.cookie;
+
+	callback = desc->async_tx.callback;
+	callback_param = desc->async_tx.callback_param;
+
+	if (callback) {
+		spin_unlock_bh(&xor_chan->desc_lock);
+		callback(callback_param);
+		spin_lock_bh(&xor_chan->desc_lock);
+	}
+
+	list_del(&desc->node);
+	list_add_tail(&desc->node, &xor_chan->free_desc);
+	spin_unlock_bh(&xor_chan->desc_lock);
+	if (!list_empty(&xor_chan->pending_q))
+		talitos_process_pending(xor_chan);
+}
+
+/**
+ * talitos_issue_pending - move the descriptors in submit
+ * queue to pending queue and submit them for processing
+ * @chan: DMA channel
+ */
+static void talitos_issue_pending(struct dma_chan *chan)
+{
+	struct talitos_xor_chan *xor_chan;
+
+	xor_chan = container_of(chan, struct talitos_xor_chan, common);
+	spin_lock_bh(&xor_chan->desc_lock);
+	list_splice_tail_init(&xor_chan->submit_q,
+				 &xor_chan->pending_q);
+	spin_unlock_bh(&xor_chan->desc_lock);
+	talitos_process_pending(xor_chan);
+}
+
+static dma_cookie_t talitos_async_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct talitos_xor_desc *desc;
+	struct talitos_xor_chan *xor_chan;
+	dma_cookie_t cookie;
+
+	desc = container_of(tx, struct talitos_xor_desc, async_tx);
+	xor_chan = container_of(tx->chan, struct talitos_xor_chan, common);
+
+	spin_lock_bh(&xor_chan->desc_lock);
+
+	cookie = xor_chan->common.cookie + 1;
+	if (cookie < 0)
+		cookie = 1;
+
+	desc->async_tx.cookie = cookie;
+	xor_chan->common.cookie = desc->async_tx.cookie;
+
+	list_splice_tail_init(&desc->tx_list,
+				 &xor_chan->submit_q);
+
+	spin_unlock_bh(&xor_chan->desc_lock);
+
+	return cookie;
+}
+
+static struct talitos_xor_desc *talitos_xor_alloc_descriptor(
+				struct talitos_xor_chan *xor_chan, gfp_t flags)
+{
+	struct talitos_xor_desc *desc;
+
+	desc = kmalloc(sizeof(*desc), flags);
+	if (desc) {
+		xor_chan->total_desc++;
+		desc->async_tx.tx_submit = talitos_async_tx_submit;
+	}
+
+	return desc;
+}
+
+static void talitos_free_chan_resources(struct dma_chan *chan)
+{
+	struct talitos_xor_chan *xor_chan;
+	struct talitos_xor_desc *desc, *_desc;
+
+	xor_chan = container_of(chan, struct talitos_xor_chan, common);
+
+	spin_lock_bh(&xor_chan->desc_lock);
+
+	list_for_each_entry_safe(desc, _desc, &xor_chan->submit_q, node) {
+		list_del(&desc->node);
+		xor_chan->total_desc--;
+		kfree(desc);
+	}
+	list_for_each_entry_safe(desc, _desc, &xor_chan->pending_q, node) {
+		list_del(&desc->node);
+		xor_chan->total_desc--;
+		kfree(desc);
+	}
+	list_for_each_entry_safe(desc, _desc, &xor_chan->in_progress_q, node) {
+		list_del(&desc->node);
+		xor_chan->total_desc--;
+		kfree(desc);
+	}
+	list_for_each_entry_safe(desc, _desc, &xor_chan->free_desc, node) {
+		list_del(&desc->node);
+		xor_chan->total_desc--;
+		kfree(desc);
+	}
+	BUG_ON(unlikely(xor_chan->total_desc));	/* Some descriptor not freed? */
+
+	spin_unlock_bh(&xor_chan->desc_lock);
+}
+
+static int talitos_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct talitos_xor_chan *xor_chan;
+	struct talitos_xor_desc *desc;
+	LIST_HEAD(tmp_list);
+	int i;
+
+	xor_chan = container_of(chan, struct talitos_xor_chan, common);
+
+	if (!list_empty(&xor_chan->free_desc))
+		return xor_chan->total_desc;
+
+	/* 256 initial descriptors */
+	for (i = 0; i < 256; i++) {
+		desc = talitos_xor_alloc_descriptor(xor_chan, GFP_KERNEL);
+		if (!desc) {
+			dev_err(xor_chan->common.device->dev,
+				"Only %d initial descriptors\n", i);
+			break;
+		}
+		list_add_tail(&desc->node, &tmp_list);
+	}
+
+	if (!i)
+		return -ENOMEM;
+
+	/* At least one desc is allocated */
+	spin_lock_bh(&xor_chan->desc_lock);
+	list_splice_init(&tmp_list, &xor_chan->free_desc);
+	spin_unlock_bh(&xor_chan->desc_lock);
+
+	return xor_chan->total_desc;
+}
+
+static struct dma_async_tx_descriptor * talitos_prep_dma_xor(
+			struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+			unsigned int src_cnt, size_t len, unsigned long flags)
+{
+	struct talitos_xor_chan *xor_chan;
+	struct talitos_xor_desc *new;
+	struct talitos_desc *desc;
+	int i, j;
+
+	BUG_ON(unlikely(len > TALITOS_MAX_DATA_LEN));
+
+	xor_chan = container_of(chan, struct talitos_xor_chan, common);
+
+	spin_lock_bh(&xor_chan->desc_lock);
+	if (!list_empty(&xor_chan->free_desc)) {
+		new = container_of(xor_chan->free_desc.next,
+				   struct talitos_xor_desc, node);
+		list_del(&new->node);
+	} else {
+		new = talitos_xor_alloc_descriptor(xor_chan, GFP_KERNEL);
+	}
+	spin_unlock_bh(&xor_chan->desc_lock);
+
+	if (!new) {
+		dev_err(xor_chan->common.device->dev,
+			"No free memory for XOR DMA descriptor\n");
+		return NULL;
+	}
+	dma_async_tx_descriptor_init(&new->async_tx, &xor_chan->common);
+
+	INIT_LIST_HEAD(&new->node);
+	INIT_LIST_HEAD(&new->tx_list);
+
+	desc = &new->hwdesc;
+	/* Set destination: Last pointer pair */
+	to_talitos_ptr(&desc->ptr[6], dest);
+	desc->ptr[6].len = cpu_to_be16(len);
+	desc->ptr[6].j_extent = 0;
+
+	/* Set Sources: End loading from second-last pointer pair */
+	for (i = 5, j = 0; (j < src_cnt) && (i > 0); i--, j++) {
+		to_talitos_ptr(&desc->ptr[i], src[j]);
+		desc->ptr[i].len = cpu_to_be16(len);
+		desc->ptr[i].j_extent = 0;
+	}
+
+	/*
+	 * documentation states first 0 ptr/len combo marks end of sources
+	 * yet device produces scatter boundary error unless all subsequent
+	 * sources are zeroed out
+	 */
+	for (; i >= 0; i--) {
+		to_talitos_ptr(&desc->ptr[i], 0);
+		desc->ptr[i].len = 0;
+		desc->ptr[i].j_extent = 0;
+	}
+
+	desc->hdr = DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_XOR
+		    | DESC_HDR_TYPE_RAID_XOR;
+
+	new->async_tx.parent = NULL;
+	new->async_tx.next = NULL;
+	new->async_tx.cookie = 0;
+	async_tx_ack(&new->async_tx);
+
+	list_add_tail(&new->node, &new->tx_list);
+
+	new->async_tx.flags = flags;
+	new->async_tx.cookie = -EBUSY;
+
+	return &new->async_tx;
+}
+
+static void talitos_unregister_async_xor(struct device *dev)
+{
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	struct talitos_xor_chan *xor_chan;
+	struct dma_chan *chan;
+
+	if (priv->dma_dev_common.chancnt)
+		dma_async_device_unregister(&priv->dma_dev_common);
+
+	list_for_each_entry(chan, &priv->dma_dev_common.channels, device_node) {
+		xor_chan = container_of(chan, struct talitos_xor_chan, common);
+		list_del(&chan->device_node);
+		priv->dma_dev_common.chancnt--;
+		kfree(xor_chan);
+	}
+}
+
+/**
+ * talitos_register_dma_async - Initialize the Freescale XOR ADMA device
+ * It is registered as a DMA device with the capability to perform
+ * XOR operation with the Async_tx layer.
+ * The various queues and channel resources are also allocated.
+ */
+static int talitos_register_async_tx(struct device *dev, int max_xor_srcs)
+{
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	struct dma_device *dma_dev = &priv->dma_dev_common;
+	struct talitos_xor_chan *xor_chan;
+	int err;
+
+	xor_chan = kzalloc(sizeof(struct talitos_xor_chan), GFP_KERNEL);
+	if (!xor_chan) {
+		dev_err(dev, "unable to allocate xor channel\n");
+		return -ENOMEM;
+	}
+
+	dma_dev->dev = dev;
+	dma_dev->device_alloc_chan_resources = talitos_alloc_chan_resources;
+	dma_dev->device_free_chan_resources = talitos_free_chan_resources;
+	dma_dev->device_prep_dma_xor = talitos_prep_dma_xor;
+	dma_dev->max_xor = max_xor_srcs;
+	dma_dev->device_is_tx_complete = talitos_is_tx_complete;
+	dma_dev->device_issue_pending = talitos_issue_pending;
+	INIT_LIST_HEAD(&dma_dev->channels);
+	dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+
+	xor_chan->dev = dev;
+	xor_chan->common.device = dma_dev;
+	xor_chan->total_desc = 0;
+	INIT_LIST_HEAD(&xor_chan->submit_q);
+	INIT_LIST_HEAD(&xor_chan->pending_q);
+	INIT_LIST_HEAD(&xor_chan->in_progress_q);
+	INIT_LIST_HEAD(&xor_chan->free_desc);
+	spin_lock_init(&xor_chan->desc_lock);
+
+	list_add_tail(&xor_chan->common.device_node, &dma_dev->channels);
+	dma_dev->chancnt++;
+
+	err = dma_async_device_register(dma_dev);
+	if (err) {
+		dev_err(dev, "Unable to register XOR with Async_tx\n");
+		goto err_out;
+	}
+
+	return err;
+
+err_out:
+	talitos_unregister_async_xor(dev);
+	return err;
+}
+/*
  * crypto alg
  */
 #define TALITOS_CRA_PRIORITY		3000
@@ -1768,6 +2168,8 @@ static int talitos_remove(struct of_device *ofdev)
 	tasklet_kill(&priv->done_task);
 
 	iounmap(priv->reg);
+	if (priv->dma_dev_common.chancnt)
+		talitos_unregister_async_xor(dev);
 
 	dev_set_drvdata(dev, NULL);
 
@@ -1926,6 +2328,25 @@ static int talitos_probe(struct of_device *ofdev,
 			dev_info(dev, "hwrng\n");
 	}
 
+	/*
+	 * register with async_tx xor, if capable
+	 * SEC 2.x support up to 3 RAID sources,
+	 * SEC 3.x support up to 6
+	 */
+	if (hw_supports(dev, DESC_HDR_SEL0_AESU | DESC_HDR_TYPE_RAID_XOR)) {
+		int max_xor_srcs = 3;
+		if (of_device_is_compatible(np, "fsl,sec3.0"))
+			max_xor_srcs = 6;
+
+		err = talitos_register_async_tx(dev, max_xor_srcs);
+		if (err) {
+			dev_err(dev, "failed to register async_tx xor: %d\n",
+				err);
+			goto err_out;
+		}
+		dev_info(dev, "max_xor_srcs %d\n", max_xor_srcs);
+	}
+
 	/* register crypto algorithms the device supports */
 	for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
 		if (hw_supports(dev, driver_algs[i].desc_hdr_template)) {
diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
index ff5a145..b6197bc 100644
--- a/drivers/crypto/talitos.h
+++ b/drivers/crypto/talitos.h
@@ -155,6 +155,7 @@
 /* primary execution unit mode (MODE0) and derivatives */
 #define	DESC_HDR_MODE0_ENCRYPT		cpu_to_be32(0x00100000)
 #define	DESC_HDR_MODE0_AESU_CBC		cpu_to_be32(0x00200000)
+#define	DESC_HDR_MODE0_AESU_XOR		cpu_to_be32(0x0c600000)
 #define	DESC_HDR_MODE0_DEU_CBC		cpu_to_be32(0x00400000)
 #define	DESC_HDR_MODE0_DEU_3DES		cpu_to_be32(0x00200000)
 #define	DESC_HDR_MODE0_MDEU_INIT	cpu_to_be32(0x01000000)
@@ -202,6 +203,7 @@
 #define DESC_HDR_TYPE_IPSEC_ESP			cpu_to_be32(1 << 3)
 #define DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU	cpu_to_be32(2 << 3)
 #define DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU	cpu_to_be32(4 << 3)
+#define DESC_HDR_TYPE_RAID_XOR			cpu_to_be32(21 << 3)
 
 /* link table extent field bits */
 #define DESC_PTR_LNKTBL_JUMP			0x80
-- 
1.6.4.2

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v0 2/2] Crypto: Talitos: Support for Async_tx XOR offload
@ 2009-10-15  6:41   ` Vishnu Suresh
  0 siblings, 0 replies; 18+ messages in thread
From: Vishnu Suresh @ 2009-10-15  6:41 UTC (permalink / raw)
  To: linuxppc-dev, linux-crypto, linux-kernel, linux-raid, herbert,
	dan.j.williams
  Cc: Vishnu Suresh, Kim Phillips, Dipen Dudhat, Maneesh Gupta

Expose Talitos's XOR functionality to be used for
RAID Parity calculation via the Async_tx layer.

Thanks to Surender Kumar and Lee Nipper for their help in
realising this driver

Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Dipen Dudhat <Dipen.Dudhat@freescale.com>
Signed-off-by: Maneesh Gupta <Maneesh.Gupta@freescale.com>
Signed-off-by: Vishnu Suresh <Vishnu@freescale.com>
---
 drivers/crypto/Kconfig   |    2 +
 drivers/crypto/talitos.c |  423 +++++++++++++++++++++++++++++++++++++++++++++-
 drivers/crypto/talitos.h |    2 +
 3 files changed, 426 insertions(+), 1 deletions(-)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index b08403d..343e578 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -192,6 +192,8 @@ config CRYPTO_DEV_TALITOS
 	select CRYPTO_ALGAPI
 	select CRYPTO_AUTHENC
 	select HW_RANDOM
+	select DMA_ENGINE
+	select ASYNC_XOR
 	depends on FSL_SOC
 	help
 	  Say 'Y' here to use the Freescale Security Engine (SEC)
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index c47ffe8..84819d4 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1,7 +1,7 @@
 /*
  * talitos - Freescale Integrated Security Engine (SEC) device driver
  *
- * Copyright (c) 2008 Freescale Semiconductor, Inc.
+ * Copyright (c) 2008-2009 Freescale Semiconductor, Inc.
  *
  * Scatterlist Crypto API glue code copied from files with the following:
  * Copyright (c) 2006-2007 Herbert Xu <herbert@gondor.apana.org.au>
@@ -37,6 +37,8 @@
 #include <linux/io.h>
 #include <linux/spinlock.h>
 #include <linux/rtnetlink.h>
+#include <linux/dmaengine.h>
+#include <linux/raid/xor.h>
 
 #include <crypto/algapi.h>
 #include <crypto/aes.h>
@@ -140,6 +142,9 @@ struct talitos_private {
 
 	/* hwrng device */
 	struct hwrng rng;
+
+	/* XOR Device */
+	struct dma_device dma_dev_common;
 };
 
 /* .features flag */
@@ -685,6 +690,401 @@ static void talitos_unregister_rng(struct device *dev)
 }
 
 /*
+ * async_tx interface for XOR-capable SECs
+ *
+ * Dipen Dudhat <Dipen.Dudhat@freescale.com>
+ * Maneesh Gupta <Maneesh.Gupta@freescale.com>
+ * Vishnu Suresh <Vishnu@freescale.com>
+ */
+
+/**
+ * talitos_xor_chan - context management for the async_tx channel
+ * @completed_cookie: the last completed cookie
+ * @desc_lock: lock for tx queue
+ * @total_desc: number of descriptors allocated
+ * @submit_q: queue of submitted descriptors
+ * @pending_q: queue of pending descriptors
+ * @in_progress_q: queue of descriptors in progress
+ * @free_desc: queue of unused descriptors
+ * @dev: talitos device implementing this channel
+ * @common: the corresponding xor channel in async_tx
+ */
+struct talitos_xor_chan {
+	dma_cookie_t completed_cookie;
+	spinlock_t desc_lock;
+	unsigned int total_desc;
+	struct list_head submit_q;
+	struct list_head pending_q;
+	struct list_head in_progress_q;
+	struct list_head free_desc;
+	struct device *dev;
+	struct dma_chan common;
+};
+
+/**
+ * talitos_xor_desc - software xor descriptor
+ * @async_tx: the referring async_tx descriptor
+ * @node:
+ * @hwdesc: h/w descriptor
+ */
+struct talitos_xor_desc {
+	struct dma_async_tx_descriptor async_tx;
+	struct list_head tx_list;
+	struct list_head node;
+	struct talitos_desc hwdesc;
+};
+
+static void talitos_release_xor(struct device *dev, struct talitos_desc *hwdesc,
+				void *context, int error);
+
+static enum dma_status talitos_is_tx_complete(struct dma_chan *chan,
+					      dma_cookie_t cookie,
+					      dma_cookie_t *done,
+					      dma_cookie_t *used)
+{
+	struct talitos_xor_chan *xor_chan;
+	dma_cookie_t last_used;
+	dma_cookie_t last_complete;
+
+	xor_chan = container_of(chan, struct talitos_xor_chan, common);
+
+	last_used = chan->cookie;
+	last_complete = xor_chan->completed_cookie;
+
+	if (done)
+		*done = last_complete;
+
+	if (used)
+		*used = last_used;
+
+	return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static void talitos_process_pending(struct talitos_xor_chan *xor_chan)
+{
+	struct talitos_xor_desc *desc, *_desc;
+	unsigned long flags;
+	int status;
+
+	spin_lock_irqsave(&xor_chan->desc_lock, flags);
+
+	list_for_each_entry_safe(desc, _desc, &xor_chan->pending_q, node) {
+		status = talitos_submit(xor_chan->dev, &desc->hwdesc,
+					talitos_release_xor, desc);
+		if (status != -EINPROGRESS)
+			break;
+
+		list_del(&desc->node);
+		list_add_tail(&desc->node, &xor_chan->in_progress_q);
+	}
+
+	spin_unlock_irqrestore(&xor_chan->desc_lock, flags);
+}
+
+static void talitos_release_xor(struct device *dev, struct talitos_desc *hwdesc,
+				void *context, int error)
+{
+	struct talitos_xor_desc *desc = context;
+	struct talitos_xor_chan *xor_chan;
+	dma_async_tx_callback callback;
+	void *callback_param;
+
+	if (unlikely(error)) {
+		dev_err(dev, "xor operation: talitos error %d\n", error);
+		BUG();
+	}
+
+	xor_chan = container_of(desc->async_tx.chan, struct talitos_xor_chan,
+				common);
+	spin_lock_bh(&xor_chan->desc_lock);
+	if (xor_chan->completed_cookie < desc->async_tx.cookie)
+		xor_chan->completed_cookie = desc->async_tx.cookie;
+
+	callback = desc->async_tx.callback;
+	callback_param = desc->async_tx.callback_param;
+
+	if (callback) {
+		spin_unlock_bh(&xor_chan->desc_lock);
+		callback(callback_param);
+		spin_lock_bh(&xor_chan->desc_lock);
+	}
+
+	list_del(&desc->node);
+	list_add_tail(&desc->node, &xor_chan->free_desc);
+	spin_unlock_bh(&xor_chan->desc_lock);
+	if (!list_empty(&xor_chan->pending_q))
+		talitos_process_pending(xor_chan);
+}
+
+/**
+ * talitos_issue_pending - move the descriptors in submit
+ * queue to pending queue and submit them for processing
+ * @chan: DMA channel
+ */
+static void talitos_issue_pending(struct dma_chan *chan)
+{
+	struct talitos_xor_chan *xor_chan;
+
+	xor_chan = container_of(chan, struct talitos_xor_chan, common);
+	spin_lock_bh(&xor_chan->desc_lock);
+	list_splice_tail_init(&xor_chan->submit_q,
+				 &xor_chan->pending_q);
+	spin_unlock_bh(&xor_chan->desc_lock);
+	talitos_process_pending(xor_chan);
+}
+
+static dma_cookie_t talitos_async_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct talitos_xor_desc *desc;
+	struct talitos_xor_chan *xor_chan;
+	dma_cookie_t cookie;
+
+	desc = container_of(tx, struct talitos_xor_desc, async_tx);
+	xor_chan = container_of(tx->chan, struct talitos_xor_chan, common);
+
+	spin_lock_bh(&xor_chan->desc_lock);
+
+	cookie = xor_chan->common.cookie + 1;
+	if (cookie < 0)
+		cookie = 1;
+
+	desc->async_tx.cookie = cookie;
+	xor_chan->common.cookie = desc->async_tx.cookie;
+
+	list_splice_tail_init(&desc->tx_list,
+				 &xor_chan->submit_q);
+
+	spin_unlock_bh(&xor_chan->desc_lock);
+
+	return cookie;
+}
+
+static struct talitos_xor_desc *talitos_xor_alloc_descriptor(
+				struct talitos_xor_chan *xor_chan, gfp_t flags)
+{
+	struct talitos_xor_desc *desc;
+
+	desc = kmalloc(sizeof(*desc), flags);
+	if (desc) {
+		xor_chan->total_desc++;
+		desc->async_tx.tx_submit = talitos_async_tx_submit;
+	}
+
+	return desc;
+}
+
+static void talitos_free_chan_resources(struct dma_chan *chan)
+{
+	struct talitos_xor_chan *xor_chan;
+	struct talitos_xor_desc *desc, *_desc;
+
+	xor_chan = container_of(chan, struct talitos_xor_chan, common);
+
+	spin_lock_bh(&xor_chan->desc_lock);
+
+	list_for_each_entry_safe(desc, _desc, &xor_chan->submit_q, node) {
+		list_del(&desc->node);
+		xor_chan->total_desc--;
+		kfree(desc);
+	}
+	list_for_each_entry_safe(desc, _desc, &xor_chan->pending_q, node) {
+		list_del(&desc->node);
+		xor_chan->total_desc--;
+		kfree(desc);
+	}
+	list_for_each_entry_safe(desc, _desc, &xor_chan->in_progress_q, node) {
+		list_del(&desc->node);
+		xor_chan->total_desc--;
+		kfree(desc);
+	}
+	list_for_each_entry_safe(desc, _desc, &xor_chan->free_desc, node) {
+		list_del(&desc->node);
+		xor_chan->total_desc--;
+		kfree(desc);
+	}
+	BUG_ON(unlikely(xor_chan->total_desc));	/* Some descriptor not freed? */
+
+	spin_unlock_bh(&xor_chan->desc_lock);
+}
+
+static int talitos_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct talitos_xor_chan *xor_chan;
+	struct talitos_xor_desc *desc;
+	LIST_HEAD(tmp_list);
+	int i;
+
+	xor_chan = container_of(chan, struct talitos_xor_chan, common);
+
+	if (!list_empty(&xor_chan->free_desc))
+		return xor_chan->total_desc;
+
+	/* 256 initial descriptors */
+	for (i = 0; i < 256; i++) {
+		desc = talitos_xor_alloc_descriptor(xor_chan, GFP_KERNEL);
+		if (!desc) {
+			dev_err(xor_chan->common.device->dev,
+				"Only %d initial descriptors\n", i);
+			break;
+		}
+		list_add_tail(&desc->node, &tmp_list);
+	}
+
+	if (!i)
+		return -ENOMEM;
+
+	/* At least one desc is allocated */
+	spin_lock_bh(&xor_chan->desc_lock);
+	list_splice_init(&tmp_list, &xor_chan->free_desc);
+	spin_unlock_bh(&xor_chan->desc_lock);
+
+	return xor_chan->total_desc;
+}
+
+static struct dma_async_tx_descriptor * talitos_prep_dma_xor(
+			struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+			unsigned int src_cnt, size_t len, unsigned long flags)
+{
+	struct talitos_xor_chan *xor_chan;
+	struct talitos_xor_desc *new;
+	struct talitos_desc *desc;
+	int i, j;
+
+	BUG_ON(unlikely(len > TALITOS_MAX_DATA_LEN));
+
+	xor_chan = container_of(chan, struct talitos_xor_chan, common);
+
+	spin_lock_bh(&xor_chan->desc_lock);
+	if (!list_empty(&xor_chan->free_desc)) {
+		new = container_of(xor_chan->free_desc.next,
+				   struct talitos_xor_desc, node);
+		list_del(&new->node);
+	} else {
+		new = talitos_xor_alloc_descriptor(xor_chan, GFP_KERNEL);
+	}
+	spin_unlock_bh(&xor_chan->desc_lock);
+
+	if (!new) {
+		dev_err(xor_chan->common.device->dev,
+			"No free memory for XOR DMA descriptor\n");
+		return NULL;
+	}
+	dma_async_tx_descriptor_init(&new->async_tx, &xor_chan->common);
+
+	INIT_LIST_HEAD(&new->node);
+	INIT_LIST_HEAD(&new->tx_list);
+
+	desc = &new->hwdesc;
+	/* Set destination: Last pointer pair */
+	to_talitos_ptr(&desc->ptr[6], dest);
+	desc->ptr[6].len = cpu_to_be16(len);
+	desc->ptr[6].j_extent = 0;
+
+	/* Set Sources: End loading from second-last pointer pair */
+	for (i = 5, j = 0; (j < src_cnt) && (i > 0); i--, j++) {
+		to_talitos_ptr(&desc->ptr[i], src[j]);
+		desc->ptr[i].len = cpu_to_be16(len);
+		desc->ptr[i].j_extent = 0;
+	}
+
+	/*
+	 * documentation states first 0 ptr/len combo marks end of sources
+	 * yet device produces scatter boundary error unless all subsequent
+	 * sources are zeroed out
+	 */
+	for (; i >= 0; i--) {
+		to_talitos_ptr(&desc->ptr[i], 0);
+		desc->ptr[i].len = 0;
+		desc->ptr[i].j_extent = 0;
+	}
+
+	desc->hdr = DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_XOR
+		    | DESC_HDR_TYPE_RAID_XOR;
+
+	new->async_tx.parent = NULL;
+	new->async_tx.next = NULL;
+	new->async_tx.cookie = 0;
+	async_tx_ack(&new->async_tx);
+
+	list_add_tail(&new->node, &new->tx_list);
+
+	new->async_tx.flags = flags;
+	new->async_tx.cookie = -EBUSY;
+
+	return &new->async_tx;
+}
+
+static void talitos_unregister_async_xor(struct device *dev)
+{
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	struct talitos_xor_chan *xor_chan;
+	struct dma_chan *chan;
+
+	if (priv->dma_dev_common.chancnt)
+		dma_async_device_unregister(&priv->dma_dev_common);
+
+	list_for_each_entry(chan, &priv->dma_dev_common.channels, device_node) {
+		xor_chan = container_of(chan, struct talitos_xor_chan, common);
+		list_del(&chan->device_node);
+		priv->dma_dev_common.chancnt--;
+		kfree(xor_chan);
+	}
+}
+
+/**
+ * talitos_register_dma_async - Initialize the Freescale XOR ADMA device
+ * It is registered as a DMA device with the capability to perform
+ * XOR operation with the Async_tx layer.
+ * The various queues and channel resources are also allocated.
+ */
+static int talitos_register_async_tx(struct device *dev, int max_xor_srcs)
+{
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	struct dma_device *dma_dev = &priv->dma_dev_common;
+	struct talitos_xor_chan *xor_chan;
+	int err;
+
+	xor_chan = kzalloc(sizeof(struct talitos_xor_chan), GFP_KERNEL);
+	if (!xor_chan) {
+		dev_err(dev, "unable to allocate xor channel\n");
+		return -ENOMEM;
+	}
+
+	dma_dev->dev = dev;
+	dma_dev->device_alloc_chan_resources = talitos_alloc_chan_resources;
+	dma_dev->device_free_chan_resources = talitos_free_chan_resources;
+	dma_dev->device_prep_dma_xor = talitos_prep_dma_xor;
+	dma_dev->max_xor = max_xor_srcs;
+	dma_dev->device_is_tx_complete = talitos_is_tx_complete;
+	dma_dev->device_issue_pending = talitos_issue_pending;
+	INIT_LIST_HEAD(&dma_dev->channels);
+	dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+
+	xor_chan->dev = dev;
+	xor_chan->common.device = dma_dev;
+	xor_chan->total_desc = 0;
+	INIT_LIST_HEAD(&xor_chan->submit_q);
+	INIT_LIST_HEAD(&xor_chan->pending_q);
+	INIT_LIST_HEAD(&xor_chan->in_progress_q);
+	INIT_LIST_HEAD(&xor_chan->free_desc);
+	spin_lock_init(&xor_chan->desc_lock);
+
+	list_add_tail(&xor_chan->common.device_node, &dma_dev->channels);
+	dma_dev->chancnt++;
+
+	err = dma_async_device_register(dma_dev);
+	if (err) {
+		dev_err(dev, "Unable to register XOR with Async_tx\n");
+		goto err_out;
+	}
+
+	return err;
+
+err_out:
+	talitos_unregister_async_xor(dev);
+	return err;
+}
+/*
  * crypto alg
  */
 #define TALITOS_CRA_PRIORITY		3000
@@ -1768,6 +2168,8 @@ static int talitos_remove(struct of_device *ofdev)
 	tasklet_kill(&priv->done_task);
 
 	iounmap(priv->reg);
+	if (priv->dma_dev_common.chancnt)
+		talitos_unregister_async_xor(dev);
 
 	dev_set_drvdata(dev, NULL);
 
@@ -1926,6 +2328,25 @@ static int talitos_probe(struct of_device *ofdev,
 			dev_info(dev, "hwrng\n");
 	}
 
+	/*
+	 * register with async_tx xor, if capable
+	 * SEC 2.x support up to 3 RAID sources,
+	 * SEC 3.x support up to 6
+	 */
+	if (hw_supports(dev, DESC_HDR_SEL0_AESU | DESC_HDR_TYPE_RAID_XOR)) {
+		int max_xor_srcs = 3;
+		if (of_device_is_compatible(np, "fsl,sec3.0"))
+			max_xor_srcs = 6;
+
+		err = talitos_register_async_tx(dev, max_xor_srcs);
+		if (err) {
+			dev_err(dev, "failed to register async_tx xor: %d\n",
+				err);
+			goto err_out;
+		}
+		dev_info(dev, "max_xor_srcs %d\n", max_xor_srcs);
+	}
+
 	/* register crypto algorithms the device supports */
 	for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
 		if (hw_supports(dev, driver_algs[i].desc_hdr_template)) {
diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
index ff5a145..b6197bc 100644
--- a/drivers/crypto/talitos.h
+++ b/drivers/crypto/talitos.h
@@ -155,6 +155,7 @@
 /* primary execution unit mode (MODE0) and derivatives */
 #define	DESC_HDR_MODE0_ENCRYPT		cpu_to_be32(0x00100000)
 #define	DESC_HDR_MODE0_AESU_CBC		cpu_to_be32(0x00200000)
+#define	DESC_HDR_MODE0_AESU_XOR		cpu_to_be32(0x0c600000)
 #define	DESC_HDR_MODE0_DEU_CBC		cpu_to_be32(0x00400000)
 #define	DESC_HDR_MODE0_DEU_3DES		cpu_to_be32(0x00200000)
 #define	DESC_HDR_MODE0_MDEU_INIT	cpu_to_be32(0x01000000)
@@ -202,6 +203,7 @@
 #define DESC_HDR_TYPE_IPSEC_ESP			cpu_to_be32(1 << 3)
 #define DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU	cpu_to_be32(2 << 3)
 #define DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU	cpu_to_be32(4 << 3)
+#define DESC_HDR_TYPE_RAID_XOR			cpu_to_be32(21 << 3)
 
 /* link table extent field bits */
 #define DESC_PTR_LNKTBL_JUMP			0x80
-- 
1.6.4.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled
  2009-10-15  6:41 [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled Vishnu Suresh
  2009-10-15  6:41   ` Vishnu Suresh
@ 2009-10-16  1:25   ` Dan Williams
  1 sibling, 0 replies; 18+ messages in thread
From: Dan Williams @ 2009-10-16  1:25 UTC (permalink / raw)
  To: Vishnu Suresh
  Cc: herbert, linux-kernel, linux-raid, linuxppc-dev, linux-crypto,
	Timur Tabi

[ added Leo and Timur to the Cc ]

On Wed, Oct 14, 2009 at 11:41 PM, Vishnu Suresh <Vishnu@freescale.com> wrote:
> This patch disables the use of DMA_INTERRUPT capability with Async_tx
>
> The fsldma produces a null transfer with DMA_INTERRUPT
> capability when used with Async_tx. When RAID devices queue
> a transaction via Async_tx, this  results in a hang.
>
> Signed-off-by: Vishnu Suresh <Vishnu@freescale.com>
> ---
>  drivers/dma/fsldma.c |    6 ++++++
>  1 files changed, 6 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
> index 296f9e7..66d9b39 100644
> --- a/drivers/dma/fsldma.c
> +++ b/drivers/dma/fsldma.c
> @@ -1200,7 +1200,13 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
>                                                - fdev->reg.start + 1);
>
>        dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
> +#ifndef CONFIG_ASYNC_CORE
> +       /*
> +        * The DMA_INTERRUPT async_tx is a NULL transfer, which will
> +        * triger a PE interrupt.
> +        */
>        dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
> +#endif
>        dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
>        fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
>        fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;

You are basically saying that fsl_dma_prep_interrupt() is buggy.  Can
that routine be fixed rather than this piecemeal solution?  If it
cannot be fixed (i.e. hardware issue) then fsl_dma_prep_interrupt()
should just be disabled/deleted altogether.

--
Dan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx  enabled
@ 2009-10-16  1:25   ` Dan Williams
  0 siblings, 0 replies; 18+ messages in thread
From: Dan Williams @ 2009-10-16  1:25 UTC (permalink / raw)
  To: Vishnu Suresh
  Cc: linuxppc-dev, linux-crypto, linux-kernel, linux-raid, herbert,
	Li Yang, Timur Tabi

[ added Leo and Timur to the Cc ]

On Wed, Oct 14, 2009 at 11:41 PM, Vishnu Suresh <Vishnu@freescale.com> wrote:
> This patch disables the use of DMA_INTERRUPT capability with Async_tx
>
> The fsldma produces a null transfer with DMA_INTERRUPT
> capability when used with Async_tx. When RAID devices queue
> a transaction via Async_tx, this  results in a hang.
>
> Signed-off-by: Vishnu Suresh <Vishnu@freescale.com>
> ---
>  drivers/dma/fsldma.c |    6 ++++++
>  1 files changed, 6 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
> index 296f9e7..66d9b39 100644
> --- a/drivers/dma/fsldma.c
> +++ b/drivers/dma/fsldma.c
> @@ -1200,7 +1200,13 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
>                                                - fdev->reg.start + 1);
>
>        dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
> +#ifndef CONFIG_ASYNC_CORE
> +       /*
> +        * The DMA_INTERRUPT async_tx is a NULL transfer, which will
> +        * triger a PE interrupt.
> +        */
>        dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
> +#endif
>        dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
>        fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
>        fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;

You are basically saying that fsl_dma_prep_interrupt() is buggy.  Can
that routine be fixed rather than this piecemeal solution?  If it
cannot be fixed (i.e. hardware issue) then fsl_dma_prep_interrupt()
should just be disabled/deleted altogether.

--
Dan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled
@ 2009-10-16  1:25   ` Dan Williams
  0 siblings, 0 replies; 18+ messages in thread
From: Dan Williams @ 2009-10-16  1:25 UTC (permalink / raw)
  To: Vishnu Suresh
  Cc: herbert, linux-kernel, linux-raid, linuxppc-dev, linux-crypto,
	Timur Tabi

[ added Leo and Timur to the Cc ]

On Wed, Oct 14, 2009 at 11:41 PM, Vishnu Suresh <Vishnu@freescale.com> wrot=
e:
> This patch disables the use of DMA_INTERRUPT capability with Async_tx
>
> The fsldma produces a null transfer with DMA_INTERRUPT
> capability when used with Async_tx. When RAID devices queue
> a transaction via Async_tx, this =A0results in a hang.
>
> Signed-off-by: Vishnu Suresh <Vishnu@freescale.com>
> ---
> =A0drivers/dma/fsldma.c | =A0 =A06 ++++++
> =A01 files changed, 6 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
> index 296f9e7..66d9b39 100644
> --- a/drivers/dma/fsldma.c
> +++ b/drivers/dma/fsldma.c
> @@ -1200,7 +1200,13 @@ static int __devinit of_fsl_dma_probe(struct of_de=
vice *dev,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =
=A0 =A0 =A0 =A0 =A0 =A0- fdev->reg.start + 1);
>
> =A0 =A0 =A0 =A0dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
> +#ifndef CONFIG_ASYNC_CORE
> + =A0 =A0 =A0 /*
> + =A0 =A0 =A0 =A0* The DMA_INTERRUPT async_tx is a NULL transfer, which w=
ill
> + =A0 =A0 =A0 =A0* triger a PE interrupt.
> + =A0 =A0 =A0 =A0*/
> =A0 =A0 =A0 =A0dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
> +#endif
> =A0 =A0 =A0 =A0dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
> =A0 =A0 =A0 =A0fdev->common.device_alloc_chan_resources =3D fsl_dma_alloc=
_chan_resources;
> =A0 =A0 =A0 =A0fdev->common.device_free_chan_resources =3D fsl_dma_free_c=
han_resources;

You are basically saying that fsl_dma_prep_interrupt() is buggy.  Can
that routine be fixed rather than this piecemeal solution?  If it
cannot be fixed (i.e. hardware issue) then fsl_dma_prep_interrupt()
should just be disabled/deleted altogether.

--
Dan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled
  2009-10-16  1:25   ` Dan Williams
@ 2009-10-16 15:33     ` Ira W. Snyder
  -1 siblings, 0 replies; 18+ messages in thread
From: Ira W. Snyder @ 2009-10-16 15:33 UTC (permalink / raw)
  To: Dan Williams
  Cc: Vishnu Suresh, herbert, linux-kernel, linux-raid, linuxppc-dev,
	linux-crypto, Timur Tabi

On Thu, Oct 15, 2009 at 06:25:14PM -0700, Dan Williams wrote:
> [ added Leo and Timur to the Cc ]
> 
> On Wed, Oct 14, 2009 at 11:41 PM, Vishnu Suresh <Vishnu@freescale.com> wrote:
> > This patch disables the use of DMA_INTERRUPT capability with Async_tx
> >
> > The fsldma produces a null transfer with DMA_INTERRUPT
> > capability when used with Async_tx. When RAID devices queue
> > a transaction via Async_tx, this  results in a hang.
> >
> > Signed-off-by: Vishnu Suresh <Vishnu@freescale.com>
> > ---
> >  drivers/dma/fsldma.c |    6 ++++++
> >  1 files changed, 6 insertions(+), 0 deletions(-)
> >
> > diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
> > index 296f9e7..66d9b39 100644
> > --- a/drivers/dma/fsldma.c
> > +++ b/drivers/dma/fsldma.c
> > @@ -1200,7 +1200,13 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
> >                                                - fdev->reg.start + 1);
> >
> >        dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
> > +#ifndef CONFIG_ASYNC_CORE
> > +       /*
> > +        * The DMA_INTERRUPT async_tx is a NULL transfer, which will
> > +        * triger a PE interrupt.
> > +        */
> >        dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
> > +#endif
> >        dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
> >        fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
> >        fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
> 
> You are basically saying that fsl_dma_prep_interrupt() is buggy.  Can
> that routine be fixed rather than this piecemeal solution?  If it
> cannot be fixed (i.e. hardware issue) then fsl_dma_prep_interrupt()
> should just be disabled/deleted altogether.
> 

For what it's worth, I've used the following code in the recent past,
without any issues. This was on an 83xx, within the last few kernel
releases. I haven't tried it on the latest -rc.

Using device_prep_dma_memcpy() can trigger a callback as well, so the
interrupt feature isn't strictly needed. Just attach the callback to the
last memcpy operation.

static dma_cookie_t dma_async_interrupt(struct dma_chan *chan,
                                        dma_async_tx_callback callback,
                                        void *data)
{
        struct dma_device *dev = chan->device;
        struct dma_async_tx_descriptor *tx; 

        /* Set up the DMA */
        tx = dev->device_prep_dma_interrupt(chan, DMA_PREP_INTERRUPT);
        if (!tx)
                return -ENOMEM;

        tx->callback = callback;
        tx->callback_param = data;

        return tx->tx_submit(tx);
}

Ira

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled
@ 2009-10-16 15:33     ` Ira W. Snyder
  0 siblings, 0 replies; 18+ messages in thread
From: Ira W. Snyder @ 2009-10-16 15:33 UTC (permalink / raw)
  To: Dan Williams
  Cc: herbert, linux-kernel, linux-raid, linuxppc-dev, Vishnu Suresh,
	linux-crypto, Timur Tabi

On Thu, Oct 15, 2009 at 06:25:14PM -0700, Dan Williams wrote:
> [ added Leo and Timur to the Cc ]
> 
> On Wed, Oct 14, 2009 at 11:41 PM, Vishnu Suresh <Vishnu@freescale.com> wrote:
> > This patch disables the use of DMA_INTERRUPT capability with Async_tx
> >
> > The fsldma produces a null transfer with DMA_INTERRUPT
> > capability when used with Async_tx. When RAID devices queue
> > a transaction via Async_tx, this  results in a hang.
> >
> > Signed-off-by: Vishnu Suresh <Vishnu@freescale.com>
> > ---
> >  drivers/dma/fsldma.c |    6 ++++++
> >  1 files changed, 6 insertions(+), 0 deletions(-)
> >
> > diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
> > index 296f9e7..66d9b39 100644
> > --- a/drivers/dma/fsldma.c
> > +++ b/drivers/dma/fsldma.c
> > @@ -1200,7 +1200,13 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
> >                                                - fdev->reg.start + 1);
> >
> >        dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
> > +#ifndef CONFIG_ASYNC_CORE
> > +       /*
> > +        * The DMA_INTERRUPT async_tx is a NULL transfer, which will
> > +        * triger a PE interrupt.
> > +        */
> >        dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
> > +#endif
> >        dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
> >        fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
> >        fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
> 
> You are basically saying that fsl_dma_prep_interrupt() is buggy.  Can
> that routine be fixed rather than this piecemeal solution?  If it
> cannot be fixed (i.e. hardware issue) then fsl_dma_prep_interrupt()
> should just be disabled/deleted altogether.
> 

For what it's worth, I've used the following code in the recent past,
without any issues. This was on an 83xx, within the last few kernel
releases. I haven't tried it on the latest -rc.

Using device_prep_dma_memcpy() can trigger a callback as well, so the
interrupt feature isn't strictly needed. Just attach the callback to the
last memcpy operation.

static dma_cookie_t dma_async_interrupt(struct dma_chan *chan,
                                        dma_async_tx_callback callback,
                                        void *data)
{
        struct dma_device *dev = chan->device;
        struct dma_async_tx_descriptor *tx; 

        /* Set up the DMA */
        tx = dev->device_prep_dma_interrupt(chan, DMA_PREP_INTERRUPT);
        if (!tx)
                return -ENOMEM;

        tx->callback = callback;
        tx->callback_param = data;

        return tx->tx_submit(tx);
}

Ira

^ permalink raw reply	[flat|nested] 18+ messages in thread

* active/active raid0
  2009-10-16 15:33     ` Ira W. Snyder
  (?)
@ 2009-10-16 16:21     ` Matthew Ingersoll
  -1 siblings, 0 replies; 18+ messages in thread
From: Matthew Ingersoll @ 2009-10-16 16:21 UTC (permalink / raw)
  To: linux-raid

I'm testing an active/active setup that uses md for raid0 and was  
hoping for some feedback.
The basic design has a storage device that runs vladed (aoe-storage)  
and an aoe initiator (controller) that creates a raid0 using mdadm  
from the aoe devices.  From there, lvm (clustered/clvm) slices out  
storage for sharing block devices.  I don't want to concentrate on the  
layers above that (clustered file systems, etc) but mainly the active/ 
active portion of this interacting with raid0 (via md not lvm) and aoe.

Some diagrams that may help in understanding...
-----
"Physical" diagram:

(export network block devices [ iscsi, aoe, etc] )
                -----------
                /        \
       active  /          \  active
   +-------------+      +-------------+
   | controller1 |      | controller2 |
   +-------------+      +-------------+
              \           /
               +---------+
                    |
             +-------------+                    +-------------+
             | aoe-storage | --- drbd raid1 --- |  secondary  |
             +-------------+                    +-------------+

-----
Layer diagram:
+-----------------+
| (net block dev) | controller1/2
+-----------------+
| lvm2 clustered  | controller1/2
+-----------------+
| md: linux-raid0 | controller1/2
+-----------------+
|  aoe initiator  | controller1/2
+-----------------+
|   aoe target    | aoe-storage
+-----------------+


My main concern is interaction with buffer cache and the consistencies  
of the md devices between the controllers (probably not page cache  
since these are block devices?).  Will direct/O_DIRECT bypass this  
keeping the state of the data purely on the aoe-storage device?  I  
have a test setup of this running with no issues so far connecting to  
the controllers that run iscsi-target (iet) in blockio mode, thus  
bypaassing buffer/page cache.

Any feedback is appreciated.

--
Matth


^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled
  2009-10-16 15:33     ` Ira W. Snyder
  (?)
@ 2009-10-20 13:09       ` Suresh Vishnu-B05022
  -1 siblings, 0 replies; 18+ messages in thread
From: Suresh Vishnu-B05022 @ 2009-10-20 13:09 UTC (permalink / raw)
  To: Ira W. Snyder, Dan Williams
  Cc: herbert, linux-kernel, linux-raid, linuxppc-dev, linux-crypto,
	Tabi Timur-B04825

> -----Original Message-----
> From: Ira W. Snyder [mailto:iws@ovro.caltech.edu] 
> Sent: Friday, October 16, 2009 9:04 PM
> To: Dan Williams
> Cc: Suresh Vishnu-B05022; herbert@gondor.apana.org.au; 
> linux-kernel@vger.kernel.org; linux-raid@vger.kernel.org; 
> linuxppc-dev@ozlabs.org; linux-crypto@vger.kernel.org; Tabi 
> Timur-B04825
> Subject: Re: [PATCH v0 1/2] DMA: fsldma: Disable 
> DMA_INTERRUPT when Async_tx enabled
> 
> On Thu, Oct 15, 2009 at 06:25:14PM -0700, Dan Williams wrote:
> > [ added Leo and Timur to the Cc ]
> > 
> > On Wed, Oct 14, 2009 at 11:41 PM, Vishnu Suresh 
> <Vishnu@freescale.com> wrote:
> > > This patch disables the use of DMA_INTERRUPT capability with 
> > > Async_tx
> > >
> > > The fsldma produces a null transfer with DMA_INTERRUPT capability 
> > > when used with Async_tx. When RAID devices queue a 
> transaction via 
> > > Async_tx, this  results in a hang.
> > >
> > > Signed-off-by: Vishnu Suresh <Vishnu@freescale.com>
> > > ---
> > >  drivers/dma/fsldma.c |    6 ++++++
> > >  1 files changed, 6 insertions(+), 0 deletions(-)
> > >
> > > diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index 
> > > 296f9e7..66d9b39 100644
> > > --- a/drivers/dma/fsldma.c
> > > +++ b/drivers/dma/fsldma.c
> > > @@ -1200,7 +1200,13 @@ static int __devinit 
> of_fsl_dma_probe(struct 
> > > of_device *dev,
> > >                                                - 
> fdev->reg.start + 
> > > 1);
> > >
> > >        dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
> > > +#ifndef CONFIG_ASYNC_CORE
> > > +       /*
> > > +        * The DMA_INTERRUPT async_tx is a NULL transfer, 
> which will
> > > +        * triger a PE interrupt.
> > > +        */
> > >        dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
> > > +#endif
> > >        dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
> > >        fdev->common.device_alloc_chan_resources = 
> > > fsl_dma_alloc_chan_resources;
> > >        fdev->common.device_free_chan_resources = 
> > > fsl_dma_free_chan_resources;
> > 
> > You are basically saying that fsl_dma_prep_interrupt() is 
> buggy.  Can 
> > that routine be fixed rather than this piecemeal solution?  If it 
> > cannot be fixed (i.e. hardware issue) then fsl_dma_prep_interrupt() 
> > should just be disabled/deleted altogether.
We are working to fix this issue.
> > 
> 
> For what it's worth, I've used the following code in the 
> recent past, without any issues. This was on an 83xx, within 
> the last few kernel releases. I haven't tried it on the latest -rc.
This works fine as long as only DMA_MEMCPY is being used.
The async_tx_channel_switch does not occur and the 
device_prep_dma_interrupt is not called. 
However, when a DMA_XOR capable device is exposed, 
which is differnet from the DMA_MEMCPY/INTERRUPT device, this path is hit.

Is it proper to schedule a dma_interrupt from the channel switch call, 
even when the depend_tx and tx channels correspond to different devices?

> 
> Using device_prep_dma_memcpy() can trigger a callback as 
> well, so the interrupt feature isn't strictly needed. Just 
> attach the callback to the last memcpy operation.
> 
> static dma_cookie_t dma_async_interrupt(struct dma_chan *chan,
>                                         dma_async_tx_callback 
> callback,
>                                         void *data) {
>         struct dma_device *dev = chan->device;
>         struct dma_async_tx_descriptor *tx; 
> 
>         /* Set up the DMA */
>         tx = dev->device_prep_dma_interrupt(chan, DMA_PREP_INTERRUPT);
>         if (!tx)
>                 return -ENOMEM;
> 
>         tx->callback = callback;
>         tx->callback_param = data;
> 
>         return tx->tx_submit(tx);
> }
> 
> Ira
> 
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled
@ 2009-10-20 13:09       ` Suresh Vishnu-B05022
  0 siblings, 0 replies; 18+ messages in thread
From: Suresh Vishnu-B05022 @ 2009-10-20 13:09 UTC (permalink / raw)
  To: Ira W. Snyder, Dan Williams
  Cc: herbert, linux-kernel, linux-raid, linuxppc-dev, linux-crypto,
	Tabi Timur-B04825, Li Yang-R58472

> -----Original Message-----
> From: Ira W. Snyder [mailto:iws@ovro.caltech.edu] 
> Sent: Friday, October 16, 2009 9:04 PM
> To: Dan Williams
> Cc: Suresh Vishnu-B05022; herbert@gondor.apana.org.au; 
> linux-kernel@vger.kernel.org; linux-raid@vger.kernel.org; 
> linuxppc-dev@ozlabs.org; linux-crypto@vger.kernel.org; Tabi 
> Timur-B04825
> Subject: Re: [PATCH v0 1/2] DMA: fsldma: Disable 
> DMA_INTERRUPT when Async_tx enabled
> 
> On Thu, Oct 15, 2009 at 06:25:14PM -0700, Dan Williams wrote:
> > [ added Leo and Timur to the Cc ]
> > 
> > On Wed, Oct 14, 2009 at 11:41 PM, Vishnu Suresh 
> <Vishnu@freescale.com> wrote:
> > > This patch disables the use of DMA_INTERRUPT capability with 
> > > Async_tx
> > >
> > > The fsldma produces a null transfer with DMA_INTERRUPT capability 
> > > when used with Async_tx. When RAID devices queue a 
> transaction via 
> > > Async_tx, this  results in a hang.
> > >
> > > Signed-off-by: Vishnu Suresh <Vishnu@freescale.com>
> > > ---
> > >  drivers/dma/fsldma.c |    6 ++++++
> > >  1 files changed, 6 insertions(+), 0 deletions(-)
> > >
> > > diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index 
> > > 296f9e7..66d9b39 100644
> > > --- a/drivers/dma/fsldma.c
> > > +++ b/drivers/dma/fsldma.c
> > > @@ -1200,7 +1200,13 @@ static int __devinit 
> of_fsl_dma_probe(struct 
> > > of_device *dev,
> > >                                                - 
> fdev->reg.start + 
> > > 1);
> > >
> > >        dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
> > > +#ifndef CONFIG_ASYNC_CORE
> > > +       /*
> > > +        * The DMA_INTERRUPT async_tx is a NULL transfer, 
> which will
> > > +        * triger a PE interrupt.
> > > +        */
> > >        dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
> > > +#endif
> > >        dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
> > >        fdev->common.device_alloc_chan_resources = 
> > > fsl_dma_alloc_chan_resources;
> > >        fdev->common.device_free_chan_resources = 
> > > fsl_dma_free_chan_resources;
> > 
> > You are basically saying that fsl_dma_prep_interrupt() is 
> buggy.  Can 
> > that routine be fixed rather than this piecemeal solution?  If it 
> > cannot be fixed (i.e. hardware issue) then fsl_dma_prep_interrupt() 
> > should just be disabled/deleted altogether.
We are working to fix this issue.
> > 
> 
> For what it's worth, I've used the following code in the 
> recent past, without any issues. This was on an 83xx, within 
> the last few kernel releases. I haven't tried it on the latest -rc.
This works fine as long as only DMA_MEMCPY is being used.
The async_tx_channel_switch does not occur and the 
device_prep_dma_interrupt is not called. 
However, when a DMA_XOR capable device is exposed, 
which is differnet from the DMA_MEMCPY/INTERRUPT device, this path is hit.

Is it proper to schedule a dma_interrupt from the channel switch call, 
even when the depend_tx and tx channels correspond to different devices?

> 
> Using device_prep_dma_memcpy() can trigger a callback as 
> well, so the interrupt feature isn't strictly needed. Just 
> attach the callback to the last memcpy operation.
> 
> static dma_cookie_t dma_async_interrupt(struct dma_chan *chan,
>                                         dma_async_tx_callback 
> callback,
>                                         void *data) {
>         struct dma_device *dev = chan->device;
>         struct dma_async_tx_descriptor *tx; 
> 
>         /* Set up the DMA */
>         tx = dev->device_prep_dma_interrupt(chan, DMA_PREP_INTERRUPT);
>         if (!tx)
>                 return -ENOMEM;
> 
>         tx->callback = callback;
>         tx->callback_param = data;
> 
>         return tx->tx_submit(tx);
> }
> 
> Ira
> 
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled
@ 2009-10-20 13:09       ` Suresh Vishnu-B05022
  0 siblings, 0 replies; 18+ messages in thread
From: Suresh Vishnu-B05022 @ 2009-10-20 13:09 UTC (permalink / raw)
  To: Ira W. Snyder, Dan Williams
  Cc: herbert, linux-kernel, linux-raid, linuxppc-dev, linux-crypto,
	Tabi Timur-B04825

> -----Original Message-----
> From: Ira W. Snyder [mailto:iws@ovro.caltech.edu]=20
> Sent: Friday, October 16, 2009 9:04 PM
> To: Dan Williams
> Cc: Suresh Vishnu-B05022; herbert@gondor.apana.org.au;=20
> linux-kernel@vger.kernel.org; linux-raid@vger.kernel.org;=20
> linuxppc-dev@ozlabs.org; linux-crypto@vger.kernel.org; Tabi=20
> Timur-B04825
> Subject: Re: [PATCH v0 1/2] DMA: fsldma: Disable=20
> DMA_INTERRUPT when Async_tx enabled
>=20
> On Thu, Oct 15, 2009 at 06:25:14PM -0700, Dan Williams wrote:
> > [ added Leo and Timur to the Cc ]
> >=20
> > On Wed, Oct 14, 2009 at 11:41 PM, Vishnu Suresh=20
> <Vishnu@freescale.com> wrote:
> > > This patch disables the use of DMA_INTERRUPT capability with=20
> > > Async_tx
> > >
> > > The fsldma produces a null transfer with DMA_INTERRUPT capability=20
> > > when used with Async_tx. When RAID devices queue a=20
> transaction via=20
> > > Async_tx, this =A0results in a hang.
> > >
> > > Signed-off-by: Vishnu Suresh <Vishnu@freescale.com>
> > > ---
> > > =A0drivers/dma/fsldma.c | =A0 =A06 ++++++
> > > =A01 files changed, 6 insertions(+), 0 deletions(-)
> > >
> > > diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index=20
> > > 296f9e7..66d9b39 100644
> > > --- a/drivers/dma/fsldma.c
> > > +++ b/drivers/dma/fsldma.c
> > > @@ -1200,7 +1200,13 @@ static int __devinit=20
> of_fsl_dma_probe(struct=20
> > > of_device *dev,
> > > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =
=A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0-=20
> fdev->reg.start +=20
> > > 1);
> > >
> > > =A0 =A0 =A0 =A0dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
> > > +#ifndef CONFIG_ASYNC_CORE
> > > + =A0 =A0 =A0 /*
> > > + =A0 =A0 =A0 =A0* The DMA_INTERRUPT async_tx is a NULL transfer,=20
> which will
> > > + =A0 =A0 =A0 =A0* triger a PE interrupt.
> > > + =A0 =A0 =A0 =A0*/
> > > =A0 =A0 =A0 =A0dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
> > > +#endif
> > > =A0 =A0 =A0 =A0dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
> > > =A0 =A0 =A0 =A0fdev->common.device_alloc_chan_resources =3D=20
> > > fsl_dma_alloc_chan_resources;
> > > =A0 =A0 =A0 =A0fdev->common.device_free_chan_resources =3D=20
> > > fsl_dma_free_chan_resources;
> >=20
> > You are basically saying that fsl_dma_prep_interrupt() is=20
> buggy.  Can=20
> > that routine be fixed rather than this piecemeal solution?  If it=20
> > cannot be fixed (i.e. hardware issue) then fsl_dma_prep_interrupt()=20
> > should just be disabled/deleted altogether.
We are working to fix this issue.
> >=20
>=20
> For what it's worth, I've used the following code in the=20
> recent past, without any issues. This was on an 83xx, within=20
> the last few kernel releases. I haven't tried it on the latest -rc.
This works fine as long as only DMA_MEMCPY is being used.
The async_tx_channel_switch does not occur and the=20
device_prep_dma_interrupt is not called.=20
However, when a DMA_XOR capable device is exposed,=20
which is differnet from the DMA_MEMCPY/INTERRUPT device, this path is =
hit.

Is it proper to schedule a dma_interrupt from the channel switch call,=20
even when the depend_tx and tx channels correspond to different devices?

>=20
> Using device_prep_dma_memcpy() can trigger a callback as=20
> well, so the interrupt feature isn't strictly needed. Just=20
> attach the callback to the last memcpy operation.
>=20
> static dma_cookie_t dma_async_interrupt(struct dma_chan *chan,
>                                         dma_async_tx_callback=20
> callback,
>                                         void *data) {
>         struct dma_device *dev =3D chan->device;
>         struct dma_async_tx_descriptor *tx;=20
>=20
>         /* Set up the DMA */
>         tx =3D dev->device_prep_dma_interrupt(chan, =
DMA_PREP_INTERRUPT);
>         if (!tx)
>                 return -ENOMEM;
>=20
>         tx->callback =3D callback;
>         tx->callback_param =3D data;
>=20
>         return tx->tx_submit(tx);
> }
>=20
> Ira
>=20
>=20

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v0 2/2] Crypto: Talitos: Support for Async_tx XOR offload
  2009-10-15  6:41   ` Vishnu Suresh
  (?)
@ 2009-10-29 22:46     ` Dan Williams
  -1 siblings, 0 replies; 18+ messages in thread
From: Dan Williams @ 2009-10-29 22:46 UTC (permalink / raw)
  To: Vishnu Suresh
  Cc: linuxppc-dev, linux-crypto, linux-kernel, linux-raid, herbert,
	Kim Phillips, Dipen Dudhat, Maneesh Gupta

On Wed, Oct 14, 2009 at 11:41 PM, Vishnu Suresh <Vishnu@freescale.com> wrote:
[..]
> diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
> index b08403d..343e578 100644
> --- a/drivers/crypto/Kconfig
> +++ b/drivers/crypto/Kconfig
> @@ -192,6 +192,8 @@ config CRYPTO_DEV_TALITOS
>        select CRYPTO_ALGAPI
>        select CRYPTO_AUTHENC
>        select HW_RANDOM
> +       select DMA_ENGINE
> +       select ASYNC_XOR

You need only select DMA_ENGINE.  ASYNC_XOR is selected by its users.

>        depends on FSL_SOC
>        help
>          Say 'Y' here to use the Freescale Security Engine (SEC)
> diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
> index c47ffe8..84819d4 100644
> --- a/drivers/crypto/talitos.c
> +++ b/drivers/crypto/talitos.c
[..]
> +static void talitos_process_pending(struct talitos_xor_chan *xor_chan)
> +{
> +       struct talitos_xor_desc *desc, *_desc;
> +       unsigned long flags;
> +       int status;
> +
> +       spin_lock_irqsave(&xor_chan->desc_lock, flags);
> +
> +       list_for_each_entry_safe(desc, _desc, &xor_chan->pending_q, node) {
> +               status = talitos_submit(xor_chan->dev, &desc->hwdesc,
> +                                       talitos_release_xor, desc);
> +               if (status != -EINPROGRESS)
> +                       break;
> +
> +               list_del(&desc->node);
> +               list_add_tail(&desc->node, &xor_chan->in_progress_q);
> +       }
> +
> +       spin_unlock_irqrestore(&xor_chan->desc_lock, flags);
> +}

The driver uses spin_lock_bh everywhere else which is either a bug, or
this code is being overly protective.  In any event lockdep will
rightly complain about this.  The API and its users do not submit
operations in hard-irq context.

> +
> +static void talitos_release_xor(struct device *dev, struct talitos_desc *hwdesc,
> +                               void *context, int error)
> +{
> +       struct talitos_xor_desc *desc = context;
> +       struct talitos_xor_chan *xor_chan;
> +       dma_async_tx_callback callback;
> +       void *callback_param;
> +
> +       if (unlikely(error)) {
> +               dev_err(dev, "xor operation: talitos error %d\n", error);
> +               BUG();
> +       }
> +
> +       xor_chan = container_of(desc->async_tx.chan, struct talitos_xor_chan,
> +                               common);
> +       spin_lock_bh(&xor_chan->desc_lock);
> +       if (xor_chan->completed_cookie < desc->async_tx.cookie)
> +               xor_chan->completed_cookie = desc->async_tx.cookie;
> +
> +       callback = desc->async_tx.callback;
> +       callback_param = desc->async_tx.callback_param;
> +
> +       if (callback) {
> +               spin_unlock_bh(&xor_chan->desc_lock);
> +               callback(callback_param);
> +               spin_lock_bh(&xor_chan->desc_lock);
> +       }

You do not need to unlock to call the callback.  Users of the API
assume that they are called in bh context and that they are not
allowed to submit new operations directly from the callback (this
simplifies the descriptor cleanup routines in other drivers).

> +
> +       list_del(&desc->node);
> +       list_add_tail(&desc->node, &xor_chan->free_desc);
> +       spin_unlock_bh(&xor_chan->desc_lock);
> +       if (!list_empty(&xor_chan->pending_q))
> +               talitos_process_pending(xor_chan);
> +}

It appears this routine is missing a call to dma_run_dependencies()?
This is needed if another channel is handling memory copy offload.  I
assume this is the case due to your other patch to fsldma.  See
iop_adma_run_tx_complete_actions().  If this xor resource can also
handle copy operations that would be more efficient as it eliminates
channel switching.  See the ioatdma driver and its use of
ASYNC_TX_DISABLE_CHANNEL_SWITCH.

> +static struct dma_async_tx_descriptor * talitos_prep_dma_xor(
> +                       struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
> +                       unsigned int src_cnt, size_t len, unsigned long flags)
> +{
> +       struct talitos_xor_chan *xor_chan;
> +       struct talitos_xor_desc *new;
> +       struct talitos_desc *desc;
> +       int i, j;
> +
> +       BUG_ON(unlikely(len > TALITOS_MAX_DATA_LEN));
> +
> +       xor_chan = container_of(chan, struct talitos_xor_chan, common);
> +
> +       spin_lock_bh(&xor_chan->desc_lock);
> +       if (!list_empty(&xor_chan->free_desc)) {
> +               new = container_of(xor_chan->free_desc.next,
> +                                  struct talitos_xor_desc, node);
> +               list_del(&new->node);
> +       } else {
> +               new = talitos_xor_alloc_descriptor(xor_chan, GFP_KERNEL);
> +       }
> +       spin_unlock_bh(&xor_chan->desc_lock);
> +
> +       if (!new) {
> +               dev_err(xor_chan->common.device->dev,
> +                       "No free memory for XOR DMA descriptor\n");
> +               return NULL;
> +       }
> +       dma_async_tx_descriptor_init(&new->async_tx, &xor_chan->common);
> +
> +       INIT_LIST_HEAD(&new->node);
> +       INIT_LIST_HEAD(&new->tx_list);

You can save some overhead in the fast path by moving this
initialization to talitos_xor_alloc_descriptor.

> +
> +       desc = &new->hwdesc;
> +       /* Set destination: Last pointer pair */
> +       to_talitos_ptr(&desc->ptr[6], dest);
> +       desc->ptr[6].len = cpu_to_be16(len);
> +       desc->ptr[6].j_extent = 0;
> +
> +       /* Set Sources: End loading from second-last pointer pair */
> +       for (i = 5, j = 0; (j < src_cnt) && (i > 0); i--, j++) {
> +               to_talitos_ptr(&desc->ptr[i], src[j]);
> +               desc->ptr[i].len = cpu_to_be16(len);
> +               desc->ptr[i].j_extent = 0;
> +       }
> +
> +       /*
> +        * documentation states first 0 ptr/len combo marks end of sources
> +        * yet device produces scatter boundary error unless all subsequent
> +        * sources are zeroed out
> +        */
> +       for (; i >= 0; i--) {
> +               to_talitos_ptr(&desc->ptr[i], 0);
> +               desc->ptr[i].len = 0;
> +               desc->ptr[i].j_extent = 0;
> +       }
> +
> +       desc->hdr = DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_XOR
> +                   | DESC_HDR_TYPE_RAID_XOR;
> +
> +       new->async_tx.parent = NULL;
> +       new->async_tx.next = NULL;

These fields are managed by the async_tx channel switch code.  No need
to manage it here.

> +       new->async_tx.cookie = 0;

This is set below to -EBUSY, it's redundant to touch it here.

> +       async_tx_ack(&new->async_tx);

This makes it impossible to attach a dependency to this operation.
Not sure this is what you want.

--
Dan
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v0 2/2] Crypto: Talitos: Support for Async_tx XOR offload
@ 2009-10-29 22:46     ` Dan Williams
  0 siblings, 0 replies; 18+ messages in thread
From: Dan Williams @ 2009-10-29 22:46 UTC (permalink / raw)
  To: Vishnu Suresh
  Cc: linuxppc-dev, linux-crypto, linux-kernel, linux-raid, herbert,
	Kim Phillips, Dipen Dudhat, Maneesh Gupta

On Wed, Oct 14, 2009 at 11:41 PM, Vishnu Suresh <Vishnu@freescale.com> wrote:
[..]
> diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
> index b08403d..343e578 100644
> --- a/drivers/crypto/Kconfig
> +++ b/drivers/crypto/Kconfig
> @@ -192,6 +192,8 @@ config CRYPTO_DEV_TALITOS
>        select CRYPTO_ALGAPI
>        select CRYPTO_AUTHENC
>        select HW_RANDOM
> +       select DMA_ENGINE
> +       select ASYNC_XOR

You need only select DMA_ENGINE.  ASYNC_XOR is selected by its users.

>        depends on FSL_SOC
>        help
>          Say 'Y' here to use the Freescale Security Engine (SEC)
> diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
> index c47ffe8..84819d4 100644
> --- a/drivers/crypto/talitos.c
> +++ b/drivers/crypto/talitos.c
[..]
> +static void talitos_process_pending(struct talitos_xor_chan *xor_chan)
> +{
> +       struct talitos_xor_desc *desc, *_desc;
> +       unsigned long flags;
> +       int status;
> +
> +       spin_lock_irqsave(&xor_chan->desc_lock, flags);
> +
> +       list_for_each_entry_safe(desc, _desc, &xor_chan->pending_q, node) {
> +               status = talitos_submit(xor_chan->dev, &desc->hwdesc,
> +                                       talitos_release_xor, desc);
> +               if (status != -EINPROGRESS)
> +                       break;
> +
> +               list_del(&desc->node);
> +               list_add_tail(&desc->node, &xor_chan->in_progress_q);
> +       }
> +
> +       spin_unlock_irqrestore(&xor_chan->desc_lock, flags);
> +}

The driver uses spin_lock_bh everywhere else which is either a bug, or
this code is being overly protective.  In any event lockdep will
rightly complain about this.  The API and its users do not submit
operations in hard-irq context.

> +
> +static void talitos_release_xor(struct device *dev, struct talitos_desc *hwdesc,
> +                               void *context, int error)
> +{
> +       struct talitos_xor_desc *desc = context;
> +       struct talitos_xor_chan *xor_chan;
> +       dma_async_tx_callback callback;
> +       void *callback_param;
> +
> +       if (unlikely(error)) {
> +               dev_err(dev, "xor operation: talitos error %d\n", error);
> +               BUG();
> +       }
> +
> +       xor_chan = container_of(desc->async_tx.chan, struct talitos_xor_chan,
> +                               common);
> +       spin_lock_bh(&xor_chan->desc_lock);
> +       if (xor_chan->completed_cookie < desc->async_tx.cookie)
> +               xor_chan->completed_cookie = desc->async_tx.cookie;
> +
> +       callback = desc->async_tx.callback;
> +       callback_param = desc->async_tx.callback_param;
> +
> +       if (callback) {
> +               spin_unlock_bh(&xor_chan->desc_lock);
> +               callback(callback_param);
> +               spin_lock_bh(&xor_chan->desc_lock);
> +       }

You do not need to unlock to call the callback.  Users of the API
assume that they are called in bh context and that they are not
allowed to submit new operations directly from the callback (this
simplifies the descriptor cleanup routines in other drivers).

> +
> +       list_del(&desc->node);
> +       list_add_tail(&desc->node, &xor_chan->free_desc);
> +       spin_unlock_bh(&xor_chan->desc_lock);
> +       if (!list_empty(&xor_chan->pending_q))
> +               talitos_process_pending(xor_chan);
> +}

It appears this routine is missing a call to dma_run_dependencies()?
This is needed if another channel is handling memory copy offload.  I
assume this is the case due to your other patch to fsldma.  See
iop_adma_run_tx_complete_actions().  If this xor resource can also
handle copy operations that would be more efficient as it eliminates
channel switching.  See the ioatdma driver and its use of
ASYNC_TX_DISABLE_CHANNEL_SWITCH.

> +static struct dma_async_tx_descriptor * talitos_prep_dma_xor(
> +                       struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
> +                       unsigned int src_cnt, size_t len, unsigned long flags)
> +{
> +       struct talitos_xor_chan *xor_chan;
> +       struct talitos_xor_desc *new;
> +       struct talitos_desc *desc;
> +       int i, j;
> +
> +       BUG_ON(unlikely(len > TALITOS_MAX_DATA_LEN));
> +
> +       xor_chan = container_of(chan, struct talitos_xor_chan, common);
> +
> +       spin_lock_bh(&xor_chan->desc_lock);
> +       if (!list_empty(&xor_chan->free_desc)) {
> +               new = container_of(xor_chan->free_desc.next,
> +                                  struct talitos_xor_desc, node);
> +               list_del(&new->node);
> +       } else {
> +               new = talitos_xor_alloc_descriptor(xor_chan, GFP_KERNEL);
> +       }
> +       spin_unlock_bh(&xor_chan->desc_lock);
> +
> +       if (!new) {
> +               dev_err(xor_chan->common.device->dev,
> +                       "No free memory for XOR DMA descriptor\n");
> +               return NULL;
> +       }
> +       dma_async_tx_descriptor_init(&new->async_tx, &xor_chan->common);
> +
> +       INIT_LIST_HEAD(&new->node);
> +       INIT_LIST_HEAD(&new->tx_list);

You can save some overhead in the fast path by moving this
initialization to talitos_xor_alloc_descriptor.

> +
> +       desc = &new->hwdesc;
> +       /* Set destination: Last pointer pair */
> +       to_talitos_ptr(&desc->ptr[6], dest);
> +       desc->ptr[6].len = cpu_to_be16(len);
> +       desc->ptr[6].j_extent = 0;
> +
> +       /* Set Sources: End loading from second-last pointer pair */
> +       for (i = 5, j = 0; (j < src_cnt) && (i > 0); i--, j++) {
> +               to_talitos_ptr(&desc->ptr[i], src[j]);
> +               desc->ptr[i].len = cpu_to_be16(len);
> +               desc->ptr[i].j_extent = 0;
> +       }
> +
> +       /*
> +        * documentation states first 0 ptr/len combo marks end of sources
> +        * yet device produces scatter boundary error unless all subsequent
> +        * sources are zeroed out
> +        */
> +       for (; i >= 0; i--) {
> +               to_talitos_ptr(&desc->ptr[i], 0);
> +               desc->ptr[i].len = 0;
> +               desc->ptr[i].j_extent = 0;
> +       }
> +
> +       desc->hdr = DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_XOR
> +                   | DESC_HDR_TYPE_RAID_XOR;
> +
> +       new->async_tx.parent = NULL;
> +       new->async_tx.next = NULL;

These fields are managed by the async_tx channel switch code.  No need
to manage it here.

> +       new->async_tx.cookie = 0;

This is set below to -EBUSY, it's redundant to touch it here.

> +       async_tx_ack(&new->async_tx);

This makes it impossible to attach a dependency to this operation.
Not sure this is what you want.

--
Dan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v0 2/2] Crypto: Talitos: Support for Async_tx XOR offload
@ 2009-10-29 22:46     ` Dan Williams
  0 siblings, 0 replies; 18+ messages in thread
From: Dan Williams @ 2009-10-29 22:46 UTC (permalink / raw)
  To: Vishnu Suresh
  Cc: herbert, linux-kernel, linux-raid, linuxppc-dev, linux-crypto,
	Dipen Dudhat, Maneesh Gupta

On Wed, Oct 14, 2009 at 11:41 PM, Vishnu Suresh <Vishnu@freescale.com> wrot=
e:
[..]
> diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
> index b08403d..343e578 100644
> --- a/drivers/crypto/Kconfig
> +++ b/drivers/crypto/Kconfig
> @@ -192,6 +192,8 @@ config CRYPTO_DEV_TALITOS
> =A0 =A0 =A0 =A0select CRYPTO_ALGAPI
> =A0 =A0 =A0 =A0select CRYPTO_AUTHENC
> =A0 =A0 =A0 =A0select HW_RANDOM
> + =A0 =A0 =A0 select DMA_ENGINE
> + =A0 =A0 =A0 select ASYNC_XOR

You need only select DMA_ENGINE.  ASYNC_XOR is selected by its users.

> =A0 =A0 =A0 =A0depends on FSL_SOC
> =A0 =A0 =A0 =A0help
> =A0 =A0 =A0 =A0 =A0Say 'Y' here to use the Freescale Security Engine (SEC=
)
> diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
> index c47ffe8..84819d4 100644
> --- a/drivers/crypto/talitos.c
> +++ b/drivers/crypto/talitos.c
[..]
> +static void talitos_process_pending(struct talitos_xor_chan *xor_chan)
> +{
> + =A0 =A0 =A0 struct talitos_xor_desc *desc, *_desc;
> + =A0 =A0 =A0 unsigned long flags;
> + =A0 =A0 =A0 int status;
> +
> + =A0 =A0 =A0 spin_lock_irqsave(&xor_chan->desc_lock, flags);
> +
> + =A0 =A0 =A0 list_for_each_entry_safe(desc, _desc, &xor_chan->pending_q,=
 node) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 status =3D talitos_submit(xor_chan->dev, &d=
esc->hwdesc,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
 =A0 talitos_release_xor, desc);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (status !=3D -EINPROGRESS)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 break;
> +
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 list_del(&desc->node);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 list_add_tail(&desc->node, &xor_chan->in_pr=
ogress_q);
> + =A0 =A0 =A0 }
> +
> + =A0 =A0 =A0 spin_unlock_irqrestore(&xor_chan->desc_lock, flags);
> +}

The driver uses spin_lock_bh everywhere else which is either a bug, or
this code is being overly protective.  In any event lockdep will
rightly complain about this.  The API and its users do not submit
operations in hard-irq context.

> +
> +static void talitos_release_xor(struct device *dev, struct talitos_desc =
*hwdesc,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 void *conte=
xt, int error)
> +{
> + =A0 =A0 =A0 struct talitos_xor_desc *desc =3D context;
> + =A0 =A0 =A0 struct talitos_xor_chan *xor_chan;
> + =A0 =A0 =A0 dma_async_tx_callback callback;
> + =A0 =A0 =A0 void *callback_param;
> +
> + =A0 =A0 =A0 if (unlikely(error)) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 dev_err(dev, "xor operation: talitos error =
%d\n", error);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 BUG();
> + =A0 =A0 =A0 }
> +
> + =A0 =A0 =A0 xor_chan =3D container_of(desc->async_tx.chan, struct talit=
os_xor_chan,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 common);
> + =A0 =A0 =A0 spin_lock_bh(&xor_chan->desc_lock);
> + =A0 =A0 =A0 if (xor_chan->completed_cookie < desc->async_tx.cookie)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 xor_chan->completed_cookie =3D desc->async_=
tx.cookie;
> +
> + =A0 =A0 =A0 callback =3D desc->async_tx.callback;
> + =A0 =A0 =A0 callback_param =3D desc->async_tx.callback_param;
> +
> + =A0 =A0 =A0 if (callback) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock_bh(&xor_chan->desc_lock);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 callback(callback_param);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_lock_bh(&xor_chan->desc_lock);
> + =A0 =A0 =A0 }

You do not need to unlock to call the callback.  Users of the API
assume that they are called in bh context and that they are not
allowed to submit new operations directly from the callback (this
simplifies the descriptor cleanup routines in other drivers).

> +
> + =A0 =A0 =A0 list_del(&desc->node);
> + =A0 =A0 =A0 list_add_tail(&desc->node, &xor_chan->free_desc);
> + =A0 =A0 =A0 spin_unlock_bh(&xor_chan->desc_lock);
> + =A0 =A0 =A0 if (!list_empty(&xor_chan->pending_q))
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 talitos_process_pending(xor_chan);
> +}

It appears this routine is missing a call to dma_run_dependencies()?
This is needed if another channel is handling memory copy offload.  I
assume this is the case due to your other patch to fsldma.  See
iop_adma_run_tx_complete_actions().  If this xor resource can also
handle copy operations that would be more efficient as it eliminates
channel switching.  See the ioatdma driver and its use of
ASYNC_TX_DISABLE_CHANNEL_SWITCH.

> +static struct dma_async_tx_descriptor * talitos_prep_dma_xor(
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 struct dma_chan *chan, dma_=
addr_t dest, dma_addr_t *src,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 unsigned int src_cnt, size_=
t len, unsigned long flags)
> +{
> + =A0 =A0 =A0 struct talitos_xor_chan *xor_chan;
> + =A0 =A0 =A0 struct talitos_xor_desc *new;
> + =A0 =A0 =A0 struct talitos_desc *desc;
> + =A0 =A0 =A0 int i, j;
> +
> + =A0 =A0 =A0 BUG_ON(unlikely(len > TALITOS_MAX_DATA_LEN));
> +
> + =A0 =A0 =A0 xor_chan =3D container_of(chan, struct talitos_xor_chan, co=
mmon);
> +
> + =A0 =A0 =A0 spin_lock_bh(&xor_chan->desc_lock);
> + =A0 =A0 =A0 if (!list_empty(&xor_chan->free_desc)) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 new =3D container_of(xor_chan->free_desc.ne=
xt,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0stru=
ct talitos_xor_desc, node);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 list_del(&new->node);
> + =A0 =A0 =A0 } else {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 new =3D talitos_xor_alloc_descriptor(xor_ch=
an, GFP_KERNEL);
> + =A0 =A0 =A0 }
> + =A0 =A0 =A0 spin_unlock_bh(&xor_chan->desc_lock);
> +
> + =A0 =A0 =A0 if (!new) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 dev_err(xor_chan->common.device->dev,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 "No free memory for XOR DMA=
 descriptor\n");
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return NULL;
> + =A0 =A0 =A0 }
> + =A0 =A0 =A0 dma_async_tx_descriptor_init(&new->async_tx, &xor_chan->com=
mon);
> +
> + =A0 =A0 =A0 INIT_LIST_HEAD(&new->node);
> + =A0 =A0 =A0 INIT_LIST_HEAD(&new->tx_list);

You can save some overhead in the fast path by moving this
initialization to talitos_xor_alloc_descriptor.

> +
> + =A0 =A0 =A0 desc =3D &new->hwdesc;
> + =A0 =A0 =A0 /* Set destination: Last pointer pair */
> + =A0 =A0 =A0 to_talitos_ptr(&desc->ptr[6], dest);
> + =A0 =A0 =A0 desc->ptr[6].len =3D cpu_to_be16(len);
> + =A0 =A0 =A0 desc->ptr[6].j_extent =3D 0;
> +
> + =A0 =A0 =A0 /* Set Sources: End loading from second-last pointer pair *=
/
> + =A0 =A0 =A0 for (i =3D 5, j =3D 0; (j < src_cnt) && (i > 0); i--, j++) =
{
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 to_talitos_ptr(&desc->ptr[i], src[j]);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 desc->ptr[i].len =3D cpu_to_be16(len);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 desc->ptr[i].j_extent =3D 0;
> + =A0 =A0 =A0 }
> +
> + =A0 =A0 =A0 /*
> + =A0 =A0 =A0 =A0* documentation states first 0 ptr/len combo marks end o=
f sources
> + =A0 =A0 =A0 =A0* yet device produces scatter boundary error unless all =
subsequent
> + =A0 =A0 =A0 =A0* sources are zeroed out
> + =A0 =A0 =A0 =A0*/
> + =A0 =A0 =A0 for (; i >=3D 0; i--) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 to_talitos_ptr(&desc->ptr[i], 0);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 desc->ptr[i].len =3D 0;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 desc->ptr[i].j_extent =3D 0;
> + =A0 =A0 =A0 }
> +
> + =A0 =A0 =A0 desc->hdr =3D DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_XOR
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 | DESC_HDR_TYPE_RAID_XOR;
> +
> + =A0 =A0 =A0 new->async_tx.parent =3D NULL;
> + =A0 =A0 =A0 new->async_tx.next =3D NULL;

These fields are managed by the async_tx channel switch code.  No need
to manage it here.

> + =A0 =A0 =A0 new->async_tx.cookie =3D 0;

This is set below to -EBUSY, it's redundant to touch it here.

> + =A0 =A0 =A0 async_tx_ack(&new->async_tx);

This makes it impossible to attach a dependency to this operation.
Not sure this is what you want.

--
Dan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v0 2/2] Crypto: Talitos: Support for Async_tx XOR offload
  2009-10-29 22:46     ` Dan Williams
  (?)
@ 2009-11-03  0:43       ` hank peng
  -1 siblings, 0 replies; 18+ messages in thread
From: hank peng @ 2009-11-03  0:43 UTC (permalink / raw)
  To: Dan Williams
  Cc: Vishnu Suresh, linuxppc-dev, linux-crypto, linux-kernel,
	linux-raid, herbert, Kim Phillips, Dipen Dudhat, Maneesh Gupta

I have tested this patch on my MPC8548 machine box, kernel version is
2.6.30. There is a problem.
#mdadm -C /dev/md0 -l5 -n3 /dev/sd{a,b,c}
Recovery can be done successfully, interrupts looks normal.
# cat /proc/interrupts
           CPU0
 16:   16091057   OpenPIC   Level     mvSata
 17:          0   OpenPIC   Level     mvSata
 18:          4   OpenPIC   Level     phy_interrupt, phy_interrupt
 20:         39   OpenPIC   Level     fsldma-channel
 21:          0   OpenPIC   Level     fsldma-channel
 22:          0   OpenPIC   Level     fsldma-channel
 23:          0   OpenPIC   Level     fsldma-channel
 29:        241   OpenPIC   Level     eth0_tx
 30:    1004692   OpenPIC   Level     eth0_rx
 34:          0   OpenPIC   Level     eth0_er
 35:          0   OpenPIC   Level     eth1_tx
 36:          0   OpenPIC   Level     eth1_rx
 40:          0   OpenPIC   Level     eth1_er
 42:      73060   OpenPIC   Level     serial
 43:       9854   OpenPIC   Level     i2c-mpc, i2c-mpc
 45:   61264188   OpenPIC   Level     talitos
BAD:          0

Then, I plan to create a VG, but problem occured.
#pvcreate /dev/md0

After I input this command, system hangs there without any messages.

BTW, all is OK before using this patch.


2009/10/30 Dan Williams <dan.j.williams@intel.com>:
> On Wed, Oct 14, 2009 at 11:41 PM, Vishnu Suresh <Vishnu@freescale.com> wrote:
> [..]
>> diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
>> index b08403d..343e578 100644
>> --- a/drivers/crypto/Kconfig
>> +++ b/drivers/crypto/Kconfig
>> @@ -192,6 +192,8 @@ config CRYPTO_DEV_TALITOS
>>        select CRYPTO_ALGAPI
>>        select CRYPTO_AUTHENC
>>        select HW_RANDOM
>> +       select DMA_ENGINE
>> +       select ASYNC_XOR
>
> You need only select DMA_ENGINE.  ASYNC_XOR is selected by its users.
>
>>        depends on FSL_SOC
>>        help
>>          Say 'Y' here to use the Freescale Security Engine (SEC)
>> diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
>> index c47ffe8..84819d4 100644
>> --- a/drivers/crypto/talitos.c
>> +++ b/drivers/crypto/talitos.c
> [..]
>> +static void talitos_process_pending(struct talitos_xor_chan *xor_chan)
>> +{
>> +       struct talitos_xor_desc *desc, *_desc;
>> +       unsigned long flags;
>> +       int status;
>> +
>> +       spin_lock_irqsave(&xor_chan->desc_lock, flags);
>> +
>> +       list_for_each_entry_safe(desc, _desc, &xor_chan->pending_q, node) {
>> +               status = talitos_submit(xor_chan->dev, &desc->hwdesc,
>> +                                       talitos_release_xor, desc);
>> +               if (status != -EINPROGRESS)
>> +                       break;
>> +
>> +               list_del(&desc->node);
>> +               list_add_tail(&desc->node, &xor_chan->in_progress_q);
>> +       }
>> +
>> +       spin_unlock_irqrestore(&xor_chan->desc_lock, flags);
>> +}
>
> The driver uses spin_lock_bh everywhere else which is either a bug, or
> this code is being overly protective.  In any event lockdep will
> rightly complain about this.  The API and its users do not submit
> operations in hard-irq context.
>
>> +
>> +static void talitos_release_xor(struct device *dev, struct talitos_desc *hwdesc,
>> +                               void *context, int error)
>> +{
>> +       struct talitos_xor_desc *desc = context;
>> +       struct talitos_xor_chan *xor_chan;
>> +       dma_async_tx_callback callback;
>> +       void *callback_param;
>> +
>> +       if (unlikely(error)) {
>> +               dev_err(dev, "xor operation: talitos error %d\n", error);
>> +               BUG();
>> +       }
>> +
>> +       xor_chan = container_of(desc->async_tx.chan, struct talitos_xor_chan,
>> +                               common);
>> +       spin_lock_bh(&xor_chan->desc_lock);
>> +       if (xor_chan->completed_cookie < desc->async_tx.cookie)
>> +               xor_chan->completed_cookie = desc->async_tx.cookie;
>> +
>> +       callback = desc->async_tx.callback;
>> +       callback_param = desc->async_tx.callback_param;
>> +
>> +       if (callback) {
>> +               spin_unlock_bh(&xor_chan->desc_lock);
>> +               callback(callback_param);
>> +               spin_lock_bh(&xor_chan->desc_lock);
>> +       }
>
> You do not need to unlock to call the callback.  Users of the API
> assume that they are called in bh context and that they are not
> allowed to submit new operations directly from the callback (this
> simplifies the descriptor cleanup routines in other drivers).
>
>> +
>> +       list_del(&desc->node);
>> +       list_add_tail(&desc->node, &xor_chan->free_desc);
>> +       spin_unlock_bh(&xor_chan->desc_lock);
>> +       if (!list_empty(&xor_chan->pending_q))
>> +               talitos_process_pending(xor_chan);
>> +}
>
> It appears this routine is missing a call to dma_run_dependencies()?
> This is needed if another channel is handling memory copy offload.  I
> assume this is the case due to your other patch to fsldma.  See
> iop_adma_run_tx_complete_actions().  If this xor resource can also
> handle copy operations that would be more efficient as it eliminates
> channel switching.  See the ioatdma driver and its use of
> ASYNC_TX_DISABLE_CHANNEL_SWITCH.
>
>> +static struct dma_async_tx_descriptor * talitos_prep_dma_xor(
>> +                       struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
>> +                       unsigned int src_cnt, size_t len, unsigned long flags)
>> +{
>> +       struct talitos_xor_chan *xor_chan;
>> +       struct talitos_xor_desc *new;
>> +       struct talitos_desc *desc;
>> +       int i, j;
>> +
>> +       BUG_ON(unlikely(len > TALITOS_MAX_DATA_LEN));
>> +
>> +       xor_chan = container_of(chan, struct talitos_xor_chan, common);
>> +
>> +       spin_lock_bh(&xor_chan->desc_lock);
>> +       if (!list_empty(&xor_chan->free_desc)) {
>> +               new = container_of(xor_chan->free_desc.next,
>> +                                  struct talitos_xor_desc, node);
>> +               list_del(&new->node);
>> +       } else {
>> +               new = talitos_xor_alloc_descriptor(xor_chan, GFP_KERNEL);
>> +       }
>> +       spin_unlock_bh(&xor_chan->desc_lock);
>> +
>> +       if (!new) {
>> +               dev_err(xor_chan->common.device->dev,
>> +                       "No free memory for XOR DMA descriptor\n");
>> +               return NULL;
>> +       }
>> +       dma_async_tx_descriptor_init(&new->async_tx, &xor_chan->common);
>> +
>> +       INIT_LIST_HEAD(&new->node);
>> +       INIT_LIST_HEAD(&new->tx_list);
>
> You can save some overhead in the fast path by moving this
> initialization to talitos_xor_alloc_descriptor.
>
>> +
>> +       desc = &new->hwdesc;
>> +       /* Set destination: Last pointer pair */
>> +       to_talitos_ptr(&desc->ptr[6], dest);
>> +       desc->ptr[6].len = cpu_to_be16(len);
>> +       desc->ptr[6].j_extent = 0;
>> +
>> +       /* Set Sources: End loading from second-last pointer pair */
>> +       for (i = 5, j = 0; (j < src_cnt) && (i > 0); i--, j++) {
>> +               to_talitos_ptr(&desc->ptr[i], src[j]);
>> +               desc->ptr[i].len = cpu_to_be16(len);
>> +               desc->ptr[i].j_extent = 0;
>> +       }
>> +
>> +       /*
>> +        * documentation states first 0 ptr/len combo marks end of sources
>> +        * yet device produces scatter boundary error unless all subsequent
>> +        * sources are zeroed out
>> +        */
>> +       for (; i >= 0; i--) {
>> +               to_talitos_ptr(&desc->ptr[i], 0);
>> +               desc->ptr[i].len = 0;
>> +               desc->ptr[i].j_extent = 0;
>> +       }
>> +
>> +       desc->hdr = DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_XOR
>> +                   | DESC_HDR_TYPE_RAID_XOR;
>> +
>> +       new->async_tx.parent = NULL;
>> +       new->async_tx.next = NULL;
>
> These fields are managed by the async_tx channel switch code.  No need
> to manage it here.
>
>> +       new->async_tx.cookie = 0;
>
> This is set below to -EBUSY, it's redundant to touch it here.
>
>> +       async_tx_ack(&new->async_tx);
>
> This makes it impossible to attach a dependency to this operation.
> Not sure this is what you want.
>
> --
> Dan
> --
> To unsubscribe from this list: send the line "unsubscribe linux-raid" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>



-- 
The simplest is not all best but the best is surely the simplest!
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v0 2/2] Crypto: Talitos: Support for Async_tx XOR offload
@ 2009-11-03  0:43       ` hank peng
  0 siblings, 0 replies; 18+ messages in thread
From: hank peng @ 2009-11-03  0:43 UTC (permalink / raw)
  To: Dan Williams
  Cc: Vishnu Suresh, linuxppc-dev, linux-crypto, linux-kernel,
	linux-raid, herbert, Kim Phillips, Dipen Dudhat, Maneesh Gupta

I have tested this patch on my MPC8548 machine box, kernel version is
2.6.30. There is a problem.
#mdadm -C /dev/md0 -l5 -n3 /dev/sd{a,b,c}
Recovery can be done successfully, interrupts looks normal.
# cat /proc/interrupts
           CPU0
 16:   16091057   OpenPIC   Level     mvSata
 17:          0   OpenPIC   Level     mvSata
 18:          4   OpenPIC   Level     phy_interrupt, phy_interrupt
 20:         39   OpenPIC   Level     fsldma-channel
 21:          0   OpenPIC   Level     fsldma-channel
 22:          0   OpenPIC   Level     fsldma-channel
 23:          0   OpenPIC   Level     fsldma-channel
 29:        241   OpenPIC   Level     eth0_tx
 30:    1004692   OpenPIC   Level     eth0_rx
 34:          0   OpenPIC   Level     eth0_er
 35:          0   OpenPIC   Level     eth1_tx
 36:          0   OpenPIC   Level     eth1_rx
 40:          0   OpenPIC   Level     eth1_er
 42:      73060   OpenPIC   Level     serial
 43:       9854   OpenPIC   Level     i2c-mpc, i2c-mpc
 45:   61264188   OpenPIC   Level     talitos
BAD:          0

Then, I plan to create a VG, but problem occured.
#pvcreate /dev/md0

After I input this command, system hangs there without any messages.

BTW, all is OK before using this patch.


2009/10/30 Dan Williams <dan.j.williams@intel.com>:
> On Wed, Oct 14, 2009 at 11:41 PM, Vishnu Suresh <Vishnu@freescale.com> wrote:
> [..]
>> diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
>> index b08403d..343e578 100644
>> --- a/drivers/crypto/Kconfig
>> +++ b/drivers/crypto/Kconfig
>> @@ -192,6 +192,8 @@ config CRYPTO_DEV_TALITOS
>>        select CRYPTO_ALGAPI
>>        select CRYPTO_AUTHENC
>>        select HW_RANDOM
>> +       select DMA_ENGINE
>> +       select ASYNC_XOR
>
> You need only select DMA_ENGINE.  ASYNC_XOR is selected by its users.
>
>>        depends on FSL_SOC
>>        help
>>          Say 'Y' here to use the Freescale Security Engine (SEC)
>> diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
>> index c47ffe8..84819d4 100644
>> --- a/drivers/crypto/talitos.c
>> +++ b/drivers/crypto/talitos.c
> [..]
>> +static void talitos_process_pending(struct talitos_xor_chan *xor_chan)
>> +{
>> +       struct talitos_xor_desc *desc, *_desc;
>> +       unsigned long flags;
>> +       int status;
>> +
>> +       spin_lock_irqsave(&xor_chan->desc_lock, flags);
>> +
>> +       list_for_each_entry_safe(desc, _desc, &xor_chan->pending_q, node) {
>> +               status = talitos_submit(xor_chan->dev, &desc->hwdesc,
>> +                                       talitos_release_xor, desc);
>> +               if (status != -EINPROGRESS)
>> +                       break;
>> +
>> +               list_del(&desc->node);
>> +               list_add_tail(&desc->node, &xor_chan->in_progress_q);
>> +       }
>> +
>> +       spin_unlock_irqrestore(&xor_chan->desc_lock, flags);
>> +}
>
> The driver uses spin_lock_bh everywhere else which is either a bug, or
> this code is being overly protective.  In any event lockdep will
> rightly complain about this.  The API and its users do not submit
> operations in hard-irq context.
>
>> +
>> +static void talitos_release_xor(struct device *dev, struct talitos_desc *hwdesc,
>> +                               void *context, int error)
>> +{
>> +       struct talitos_xor_desc *desc = context;
>> +       struct talitos_xor_chan *xor_chan;
>> +       dma_async_tx_callback callback;
>> +       void *callback_param;
>> +
>> +       if (unlikely(error)) {
>> +               dev_err(dev, "xor operation: talitos error %d\n", error);
>> +               BUG();
>> +       }
>> +
>> +       xor_chan = container_of(desc->async_tx.chan, struct talitos_xor_chan,
>> +                               common);
>> +       spin_lock_bh(&xor_chan->desc_lock);
>> +       if (xor_chan->completed_cookie < desc->async_tx.cookie)
>> +               xor_chan->completed_cookie = desc->async_tx.cookie;
>> +
>> +       callback = desc->async_tx.callback;
>> +       callback_param = desc->async_tx.callback_param;
>> +
>> +       if (callback) {
>> +               spin_unlock_bh(&xor_chan->desc_lock);
>> +               callback(callback_param);
>> +               spin_lock_bh(&xor_chan->desc_lock);
>> +       }
>
> You do not need to unlock to call the callback.  Users of the API
> assume that they are called in bh context and that they are not
> allowed to submit new operations directly from the callback (this
> simplifies the descriptor cleanup routines in other drivers).
>
>> +
>> +       list_del(&desc->node);
>> +       list_add_tail(&desc->node, &xor_chan->free_desc);
>> +       spin_unlock_bh(&xor_chan->desc_lock);
>> +       if (!list_empty(&xor_chan->pending_q))
>> +               talitos_process_pending(xor_chan);
>> +}
>
> It appears this routine is missing a call to dma_run_dependencies()?
> This is needed if another channel is handling memory copy offload.  I
> assume this is the case due to your other patch to fsldma.  See
> iop_adma_run_tx_complete_actions().  If this xor resource can also
> handle copy operations that would be more efficient as it eliminates
> channel switching.  See the ioatdma driver and its use of
> ASYNC_TX_DISABLE_CHANNEL_SWITCH.
>
>> +static struct dma_async_tx_descriptor * talitos_prep_dma_xor(
>> +                       struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
>> +                       unsigned int src_cnt, size_t len, unsigned long flags)
>> +{
>> +       struct talitos_xor_chan *xor_chan;
>> +       struct talitos_xor_desc *new;
>> +       struct talitos_desc *desc;
>> +       int i, j;
>> +
>> +       BUG_ON(unlikely(len > TALITOS_MAX_DATA_LEN));
>> +
>> +       xor_chan = container_of(chan, struct talitos_xor_chan, common);
>> +
>> +       spin_lock_bh(&xor_chan->desc_lock);
>> +       if (!list_empty(&xor_chan->free_desc)) {
>> +               new = container_of(xor_chan->free_desc.next,
>> +                                  struct talitos_xor_desc, node);
>> +               list_del(&new->node);
>> +       } else {
>> +               new = talitos_xor_alloc_descriptor(xor_chan, GFP_KERNEL);
>> +       }
>> +       spin_unlock_bh(&xor_chan->desc_lock);
>> +
>> +       if (!new) {
>> +               dev_err(xor_chan->common.device->dev,
>> +                       "No free memory for XOR DMA descriptor\n");
>> +               return NULL;
>> +       }
>> +       dma_async_tx_descriptor_init(&new->async_tx, &xor_chan->common);
>> +
>> +       INIT_LIST_HEAD(&new->node);
>> +       INIT_LIST_HEAD(&new->tx_list);
>
> You can save some overhead in the fast path by moving this
> initialization to talitos_xor_alloc_descriptor.
>
>> +
>> +       desc = &new->hwdesc;
>> +       /* Set destination: Last pointer pair */
>> +       to_talitos_ptr(&desc->ptr[6], dest);
>> +       desc->ptr[6].len = cpu_to_be16(len);
>> +       desc->ptr[6].j_extent = 0;
>> +
>> +       /* Set Sources: End loading from second-last pointer pair */
>> +       for (i = 5, j = 0; (j < src_cnt) && (i > 0); i--, j++) {
>> +               to_talitos_ptr(&desc->ptr[i], src[j]);
>> +               desc->ptr[i].len = cpu_to_be16(len);
>> +               desc->ptr[i].j_extent = 0;
>> +       }
>> +
>> +       /*
>> +        * documentation states first 0 ptr/len combo marks end of sources
>> +        * yet device produces scatter boundary error unless all subsequent
>> +        * sources are zeroed out
>> +        */
>> +       for (; i >= 0; i--) {
>> +               to_talitos_ptr(&desc->ptr[i], 0);
>> +               desc->ptr[i].len = 0;
>> +               desc->ptr[i].j_extent = 0;
>> +       }
>> +
>> +       desc->hdr = DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_XOR
>> +                   | DESC_HDR_TYPE_RAID_XOR;
>> +
>> +       new->async_tx.parent = NULL;
>> +       new->async_tx.next = NULL;
>
> These fields are managed by the async_tx channel switch code.  No need
> to manage it here.
>
>> +       new->async_tx.cookie = 0;
>
> This is set below to -EBUSY, it's redundant to touch it here.
>
>> +       async_tx_ack(&new->async_tx);
>
> This makes it impossible to attach a dependency to this operation.
> Not sure this is what you want.
>
> --
> Dan
> --
> To unsubscribe from this list: send the line "unsubscribe linux-raid" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>



-- 
The simplest is not all best but the best is surely the simplest!

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v0 2/2] Crypto: Talitos: Support for Async_tx XOR offload
@ 2009-11-03  0:43       ` hank peng
  0 siblings, 0 replies; 18+ messages in thread
From: hank peng @ 2009-11-03  0:43 UTC (permalink / raw)
  To: Dan Williams
  Cc: herbert, linux-kernel, linux-raid, linuxppc-dev, Vishnu Suresh,
	linux-crypto, Dipen Dudhat, Maneesh Gupta

I have tested this patch on my MPC8548 machine box, kernel version is
2.6.30. There is a problem.
#mdadm -C /dev/md0 -l5 -n3 /dev/sd{a,b,c}
Recovery can be done successfully, interrupts looks normal.
# cat /proc/interrupts
           CPU0
 16:   16091057   OpenPIC   Level     mvSata
 17:          0   OpenPIC   Level     mvSata
 18:          4   OpenPIC   Level     phy_interrupt, phy_interrupt
 20:         39   OpenPIC   Level     fsldma-channel
 21:          0   OpenPIC   Level     fsldma-channel
 22:          0   OpenPIC   Level     fsldma-channel
 23:          0   OpenPIC   Level     fsldma-channel
 29:        241   OpenPIC   Level     eth0_tx
 30:    1004692   OpenPIC   Level     eth0_rx
 34:          0   OpenPIC   Level     eth0_er
 35:          0   OpenPIC   Level     eth1_tx
 36:          0   OpenPIC   Level     eth1_rx
 40:          0   OpenPIC   Level     eth1_er
 42:      73060   OpenPIC   Level     serial
 43:       9854   OpenPIC   Level     i2c-mpc, i2c-mpc
 45:   61264188   OpenPIC   Level     talitos
BAD:          0

Then, I plan to create a VG, but problem occured.
#pvcreate /dev/md0

After I input this command, system hangs there without any messages.

BTW, all is OK before using this patch.


2009/10/30 Dan Williams <dan.j.williams@intel.com>:
> On Wed, Oct 14, 2009 at 11:41 PM, Vishnu Suresh <Vishnu@freescale.com> wr=
ote:
> [..]
>> diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
>> index b08403d..343e578 100644
>> --- a/drivers/crypto/Kconfig
>> +++ b/drivers/crypto/Kconfig
>> @@ -192,6 +192,8 @@ config CRYPTO_DEV_TALITOS
>> =C2=A0 =C2=A0 =C2=A0 =C2=A0select CRYPTO_ALGAPI
>> =C2=A0 =C2=A0 =C2=A0 =C2=A0select CRYPTO_AUTHENC
>> =C2=A0 =C2=A0 =C2=A0 =C2=A0select HW_RANDOM
>> + =C2=A0 =C2=A0 =C2=A0 select DMA_ENGINE
>> + =C2=A0 =C2=A0 =C2=A0 select ASYNC_XOR
>
> You need only select DMA_ENGINE. =C2=A0ASYNC_XOR is selected by its users=
.
>
>> =C2=A0 =C2=A0 =C2=A0 =C2=A0depends on FSL_SOC
>> =C2=A0 =C2=A0 =C2=A0 =C2=A0help
>> =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0Say 'Y' here to use the Freescale Secu=
rity Engine (SEC)
>> diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
>> index c47ffe8..84819d4 100644
>> --- a/drivers/crypto/talitos.c
>> +++ b/drivers/crypto/talitos.c
> [..]
>> +static void talitos_process_pending(struct talitos_xor_chan *xor_chan)
>> +{
>> + =C2=A0 =C2=A0 =C2=A0 struct talitos_xor_desc *desc, *_desc;
>> + =C2=A0 =C2=A0 =C2=A0 unsigned long flags;
>> + =C2=A0 =C2=A0 =C2=A0 int status;
>> +
>> + =C2=A0 =C2=A0 =C2=A0 spin_lock_irqsave(&xor_chan->desc_lock, flags);
>> +
>> + =C2=A0 =C2=A0 =C2=A0 list_for_each_entry_safe(desc, _desc, &xor_chan->=
pending_q, node) {
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 status =3D talitos_su=
bmit(xor_chan->dev, &desc->hwdesc,
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 talitos_rele=
ase_xor, desc);
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 if (status !=3D -EINP=
ROGRESS)
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =
=C2=A0 break;
>> +
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 list_del(&desc->node)=
;
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 list_add_tail(&desc->=
node, &xor_chan->in_progress_q);
>> + =C2=A0 =C2=A0 =C2=A0 }
>> +
>> + =C2=A0 =C2=A0 =C2=A0 spin_unlock_irqrestore(&xor_chan->desc_lock, flag=
s);
>> +}
>
> The driver uses spin_lock_bh everywhere else which is either a bug, or
> this code is being overly protective. =C2=A0In any event lockdep will
> rightly complain about this. =C2=A0The API and its users do not submit
> operations in hard-irq context.
>
>> +
>> +static void talitos_release_xor(struct device *dev, struct talitos_desc=
 *hwdesc,
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 void *context, int error)
>> +{
>> + =C2=A0 =C2=A0 =C2=A0 struct talitos_xor_desc *desc =3D context;
>> + =C2=A0 =C2=A0 =C2=A0 struct talitos_xor_chan *xor_chan;
>> + =C2=A0 =C2=A0 =C2=A0 dma_async_tx_callback callback;
>> + =C2=A0 =C2=A0 =C2=A0 void *callback_param;
>> +
>> + =C2=A0 =C2=A0 =C2=A0 if (unlikely(error)) {
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 dev_err(dev, "xor ope=
ration: talitos error %d\n", error);
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 BUG();
>> + =C2=A0 =C2=A0 =C2=A0 }
>> +
>> + =C2=A0 =C2=A0 =C2=A0 xor_chan =3D container_of(desc->async_tx.chan, st=
ruct talitos_xor_chan,
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 common);
>> + =C2=A0 =C2=A0 =C2=A0 spin_lock_bh(&xor_chan->desc_lock);
>> + =C2=A0 =C2=A0 =C2=A0 if (xor_chan->completed_cookie < desc->async_tx.c=
ookie)
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 xor_chan->completed_c=
ookie =3D desc->async_tx.cookie;
>> +
>> + =C2=A0 =C2=A0 =C2=A0 callback =3D desc->async_tx.callback;
>> + =C2=A0 =C2=A0 =C2=A0 callback_param =3D desc->async_tx.callback_param;
>> +
>> + =C2=A0 =C2=A0 =C2=A0 if (callback) {
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 spin_unlock_bh(&xor_c=
han->desc_lock);
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 callback(callback_par=
am);
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 spin_lock_bh(&xor_cha=
n->desc_lock);
>> + =C2=A0 =C2=A0 =C2=A0 }
>
> You do not need to unlock to call the callback. =C2=A0Users of the API
> assume that they are called in bh context and that they are not
> allowed to submit new operations directly from the callback (this
> simplifies the descriptor cleanup routines in other drivers).
>
>> +
>> + =C2=A0 =C2=A0 =C2=A0 list_del(&desc->node);
>> + =C2=A0 =C2=A0 =C2=A0 list_add_tail(&desc->node, &xor_chan->free_desc);
>> + =C2=A0 =C2=A0 =C2=A0 spin_unlock_bh(&xor_chan->desc_lock);
>> + =C2=A0 =C2=A0 =C2=A0 if (!list_empty(&xor_chan->pending_q))
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 talitos_process_pendi=
ng(xor_chan);
>> +}
>
> It appears this routine is missing a call to dma_run_dependencies()?
> This is needed if another channel is handling memory copy offload. =C2=A0=
I
> assume this is the case due to your other patch to fsldma. =C2=A0See
> iop_adma_run_tx_complete_actions(). =C2=A0If this xor resource can also
> handle copy operations that would be more efficient as it eliminates
> channel switching. =C2=A0See the ioatdma driver and its use of
> ASYNC_TX_DISABLE_CHANNEL_SWITCH.
>
>> +static struct dma_async_tx_descriptor * talitos_prep_dma_xor(
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =
=C2=A0 struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =
=C2=A0 unsigned int src_cnt, size_t len, unsigned long flags)
>> +{
>> + =C2=A0 =C2=A0 =C2=A0 struct talitos_xor_chan *xor_chan;
>> + =C2=A0 =C2=A0 =C2=A0 struct talitos_xor_desc *new;
>> + =C2=A0 =C2=A0 =C2=A0 struct talitos_desc *desc;
>> + =C2=A0 =C2=A0 =C2=A0 int i, j;
>> +
>> + =C2=A0 =C2=A0 =C2=A0 BUG_ON(unlikely(len > TALITOS_MAX_DATA_LEN));
>> +
>> + =C2=A0 =C2=A0 =C2=A0 xor_chan =3D container_of(chan, struct talitos_xo=
r_chan, common);
>> +
>> + =C2=A0 =C2=A0 =C2=A0 spin_lock_bh(&xor_chan->desc_lock);
>> + =C2=A0 =C2=A0 =C2=A0 if (!list_empty(&xor_chan->free_desc)) {
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 new =3D container_of(=
xor_chan->free_desc.next,
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0struct talitos_xor_desc, no=
de);
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 list_del(&new->node);
>> + =C2=A0 =C2=A0 =C2=A0 } else {
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 new =3D talitos_xor_a=
lloc_descriptor(xor_chan, GFP_KERNEL);
>> + =C2=A0 =C2=A0 =C2=A0 }
>> + =C2=A0 =C2=A0 =C2=A0 spin_unlock_bh(&xor_chan->desc_lock);
>> +
>> + =C2=A0 =C2=A0 =C2=A0 if (!new) {
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 dev_err(xor_chan->com=
mon.device->dev,
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =
=C2=A0 "No free memory for XOR DMA descriptor\n");
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 return NULL;
>> + =C2=A0 =C2=A0 =C2=A0 }
>> + =C2=A0 =C2=A0 =C2=A0 dma_async_tx_descriptor_init(&new->async_tx, &xor=
_chan->common);
>> +
>> + =C2=A0 =C2=A0 =C2=A0 INIT_LIST_HEAD(&new->node);
>> + =C2=A0 =C2=A0 =C2=A0 INIT_LIST_HEAD(&new->tx_list);
>
> You can save some overhead in the fast path by moving this
> initialization to talitos_xor_alloc_descriptor.
>
>> +
>> + =C2=A0 =C2=A0 =C2=A0 desc =3D &new->hwdesc;
>> + =C2=A0 =C2=A0 =C2=A0 /* Set destination: Last pointer pair */
>> + =C2=A0 =C2=A0 =C2=A0 to_talitos_ptr(&desc->ptr[6], dest);
>> + =C2=A0 =C2=A0 =C2=A0 desc->ptr[6].len =3D cpu_to_be16(len);
>> + =C2=A0 =C2=A0 =C2=A0 desc->ptr[6].j_extent =3D 0;
>> +
>> + =C2=A0 =C2=A0 =C2=A0 /* Set Sources: End loading from second-last poin=
ter pair */
>> + =C2=A0 =C2=A0 =C2=A0 for (i =3D 5, j =3D 0; (j < src_cnt) && (i > 0); =
i--, j++) {
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 to_talitos_ptr(&desc-=
>ptr[i], src[j]);
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 desc->ptr[i].len =3D =
cpu_to_be16(len);
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 desc->ptr[i].j_extent=
 =3D 0;
>> + =C2=A0 =C2=A0 =C2=A0 }
>> +
>> + =C2=A0 =C2=A0 =C2=A0 /*
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0* documentation states first 0 ptr/len comb=
o marks end of sources
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0* yet device produces scatter boundary erro=
r unless all subsequent
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0* sources are zeroed out
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0*/
>> + =C2=A0 =C2=A0 =C2=A0 for (; i >=3D 0; i--) {
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 to_talitos_ptr(&desc-=
>ptr[i], 0);
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 desc->ptr[i].len =3D =
0;
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 desc->ptr[i].j_extent=
 =3D 0;
>> + =C2=A0 =C2=A0 =C2=A0 }
>> +
>> + =C2=A0 =C2=A0 =C2=A0 desc->hdr =3D DESC_HDR_SEL0_AESU | DESC_HDR_MODE0=
_AESU_XOR
>> + =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 | DESC_=
HDR_TYPE_RAID_XOR;
>> +
>> + =C2=A0 =C2=A0 =C2=A0 new->async_tx.parent =3D NULL;
>> + =C2=A0 =C2=A0 =C2=A0 new->async_tx.next =3D NULL;
>
> These fields are managed by the async_tx channel switch code. =C2=A0No ne=
ed
> to manage it here.
>
>> + =C2=A0 =C2=A0 =C2=A0 new->async_tx.cookie =3D 0;
>
> This is set below to -EBUSY, it's redundant to touch it here.
>
>> + =C2=A0 =C2=A0 =C2=A0 async_tx_ack(&new->async_tx);
>
> This makes it impossible to attach a dependency to this operation.
> Not sure this is what you want.
>
> --
> Dan
> --
> To unsubscribe from this list: send the line "unsubscribe linux-raid" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at =C2=A0http://vger.kernel.org/majordomo-info.html
>



--=20
The simplest is not all best but the best is surely the simplest!

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2009-11-03  0:43 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-10-15  6:41 [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled Vishnu Suresh
2009-10-15  6:41 ` [PATCH v0 2/2] Crypto: Talitos: Support for Async_tx XOR offload Vishnu Suresh
2009-10-15  6:41   ` Vishnu Suresh
2009-10-29 22:46   ` Dan Williams
2009-10-29 22:46     ` Dan Williams
2009-10-29 22:46     ` Dan Williams
2009-11-03  0:43     ` hank peng
2009-11-03  0:43       ` hank peng
2009-11-03  0:43       ` hank peng
2009-10-16  1:25 ` [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled Dan Williams
2009-10-16  1:25   ` Dan Williams
2009-10-16  1:25   ` Dan Williams
2009-10-16 15:33   ` Ira W. Snyder
2009-10-16 15:33     ` Ira W. Snyder
2009-10-16 16:21     ` active/active raid0 Matthew Ingersoll
2009-10-20 13:09     ` [PATCH v0 1/2] DMA: fsldma: Disable DMA_INTERRUPT when Async_tx enabled Suresh Vishnu-B05022
2009-10-20 13:09       ` Suresh Vishnu-B05022
2009-10-20 13:09       ` Suresh Vishnu-B05022

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.