All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 0/5] dmaengine: ioatdma: DMA error reporting
@ 2016-06-28 23:15 Dave Jiang
  2016-06-28 23:15 ` [PATCH v3 1/5] dmaengine: Adding error handling flag Dave Jiang
                   ` (5 more replies)
  0 siblings, 6 replies; 9+ messages in thread
From: Dave Jiang @ 2016-06-28 23:15 UTC (permalink / raw)
  To: vinod.koul, allen.hubbe, jdmason; +Cc: dmaengine, linux-ntb, dan.j.williams

Changes v2 -> v3:
- Fix up commit header that was missing in patch 2/5.

Changes v1 -> v2: 
- Addressed comments from Allen Hubbe
- Added CPU resubmit for aborted/failed NTB submissions

The following series implements error reporting and handling for ioatdma.
An error flag field has been added to return DMA error status via the 
generic DMA descriptor so the upper layer can be notified if there are errors.
The ioatdma driver now will abort remaining outstanding descriptors when an
error has occured that requires a channel reset. The Intel NTB driver has been
modified to handle the error condition from the DMA layer.

---

Dave Jiang (5):
      dmaengine: Adding error handling flag
      dmaengine: ioatdma: Add error handling to ioat driver
      dmaengine: ioatdma: add error strings to chanerr output
      ntb: add DMA error handling for TX DMA
      ntb: add DMA error handling for RX DMA


 drivers/dma/ioat/dma.c       |  201 +++++++++++++++++++++++++++++++++++++-----
 drivers/dma/ioat/registers.h |    2 
 2 files changed, 181 insertions(+), 22 deletions(-)

--

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v3 1/5] dmaengine: Adding error handling flag
  2016-06-28 23:15 [PATCH v3 0/5] dmaengine: ioatdma: DMA error reporting Dave Jiang
@ 2016-06-28 23:15 ` Dave Jiang
  2016-07-08  5:29   ` Vinod Koul
  2016-06-28 23:15 ` [PATCH v3 2/5] dmaengine: ioatdma: Add error handling to ioat driver Dave Jiang
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 9+ messages in thread
From: Dave Jiang @ 2016-06-28 23:15 UTC (permalink / raw)
  To: vinod.koul, allen.hubbe, jdmason; +Cc: dmaengine, linux-ntb, dan.j.williams

Adding error flag for the call back descriptor to notify upper layer that
an error has occurred with this particular DMA op.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 0 files changed

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 0174337..6524881 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -453,6 +453,20 @@ struct dmaengine_unmap_data {
 };
 
 /**
+ * enum err_result_flags - result of DMA operations
+ * @ERR_DMA_NONE - no errors
+ * @ERR_DMA_READ - DMA read error
+ * @ERR_DMA_WRITE - DMA write error
+ * @ERR_DMA_ABORT - Operation aborted
+ */
+enum err_result_flags {
+	ERR_DMA_NONE = 0,
+	ERR_DMA_READ,
+	ERR_DMA_WRITE,
+	ERR_DMA_ABORT,
+};
+
+/**
  * struct dma_async_tx_descriptor - async transaction descriptor
  * ---dma generic offload fields---
  * @cookie: tracking cookie for this transaction, set to -EBUSY if
@@ -480,6 +494,7 @@ struct dma_async_tx_descriptor {
 	dma_async_tx_callback callback;
 	void *callback_param;
 	struct dmaengine_unmap_data *unmap;
+	enum err_result_flags result;
 #ifdef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH
 	struct dma_async_tx_descriptor *next;
 	struct dma_async_tx_descriptor *parent;


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 2/5] dmaengine: ioatdma: Add error handling to ioat driver
  2016-06-28 23:15 [PATCH v3 0/5] dmaengine: ioatdma: DMA error reporting Dave Jiang
  2016-06-28 23:15 ` [PATCH v3 1/5] dmaengine: Adding error handling flag Dave Jiang
@ 2016-06-28 23:15 ` Dave Jiang
  2016-06-28 23:15 ` [PATCH v3 3/5] dmaengine: ioatdma: add error strings to chanerr output Dave Jiang
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 9+ messages in thread
From: Dave Jiang @ 2016-06-28 23:15 UTC (permalink / raw)
  To: vinod.koul, allen.hubbe, jdmason; +Cc: dmaengine, linux-ntb, dan.j.williams

Adding error handling to the ioatdma driver so that when a read/write error
occurs the error results are reported back and all the remaining descriptors
are aborted.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 drivers/dma/ioat/dma.c       |  136 ++++++++++++++++++++++++++++++++++++------
 drivers/dma/ioat/registers.h |    2 +
 2 files changed, 120 insertions(+), 18 deletions(-)

diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index bd09961..899f9e1 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -622,7 +622,8 @@ static void ioat_cleanup(struct ioatdma_chan *ioat_chan)
 	if (is_ioat_halted(*ioat_chan->completion)) {
 		u32 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
 
-		if (chanerr & IOAT_CHANERR_HANDLE_MASK) {
+		if (chanerr &
+		    (IOAT_CHANERR_HANDLE_MASK | IOAT_CHANERR_RECOVER_MASK)) {
 			mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
 			ioat_eh(ioat_chan);
 		}
@@ -652,6 +653,60 @@ static void ioat_restart_channel(struct ioatdma_chan *ioat_chan)
 	__ioat_restart_chan(ioat_chan);
 }
 
+
+static void ioat_abort_descs(struct ioatdma_chan *ioat_chan)
+{
+	struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+	struct ioat_ring_ent *desc;
+	u16 active;
+	int idx = ioat_chan->tail, i;
+
+	/*
+	 * We assume that the failed descriptor has been processed.
+	 * Now we are just returning all the remaining submitted
+	 * descriptors to abort.
+	 */
+	active = ioat_ring_active(ioat_chan);
+
+	/* we skip the failed descriptor that tail points to */
+	for (i = 1; i < active; i++) {
+		struct dma_async_tx_descriptor *tx;
+
+		smp_read_barrier_depends();
+		prefetch(ioat_get_ring_ent(ioat_chan, idx + i + 1));
+		desc = ioat_get_ring_ent(ioat_chan, idx + i);
+		desc->txd.result = ERR_DMA_ABORT;
+
+		tx = &desc->txd;
+		if (tx->cookie) {
+			dma_cookie_complete(tx);
+			dma_descriptor_unmap(tx);
+			if (tx->callback) {
+				tx->callback(tx->callback_param);
+				tx->callback = NULL;
+			}
+		}
+
+		/* skip extended descriptors */
+		if (desc_has_ext(desc)) {
+			WARN_ON(i + 1 >= active);
+			i++;
+		}
+
+		/* cleanup super extended descriptors */
+		if (desc->sed) {
+			ioat_free_sed(ioat_dma, desc->sed);
+			desc->sed = NULL;
+		}
+	}
+
+	smp_mb(); /* finish all descriptor reads before incrementing tail */
+	ioat_chan->tail = idx + active;
+
+	desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail);
+	ioat_chan->last_completion = *ioat_chan->completion = desc->txd.phys;
+}
+
 static void ioat_eh(struct ioatdma_chan *ioat_chan)
 {
 	struct pci_dev *pdev = to_pdev(ioat_chan);
@@ -662,6 +717,7 @@ static void ioat_eh(struct ioatdma_chan *ioat_chan)
 	u32 err_handled = 0;
 	u32 chanerr_int;
 	u32 chanerr;
+	bool abort = false;
 
 	/* cleanup so tail points to descriptor that caused the error */
 	if (ioat_cleanup_preamble(ioat_chan, &phys_complete))
@@ -697,30 +753,50 @@ static void ioat_eh(struct ioatdma_chan *ioat_chan)
 		break;
 	}
 
+	if (chanerr & IOAT_CHANERR_RECOVER_MASK) {
+		if (chanerr & IOAT_CHANERR_READ_DATA_ERR) {
+			desc->txd.result = ERR_DMA_READ;
+			err_handled |= IOAT_CHANERR_READ_DATA_ERR;
+		} else if (chanerr & IOAT_CHANERR_WRITE_DATA_ERR) {
+			desc->txd.result = ERR_DMA_WRITE;
+			err_handled |= IOAT_CHANERR_WRITE_DATA_ERR;
+		}
+
+		abort = true;
+	}
+
 	/* fault on unhandled error or spurious halt */
 	if (chanerr ^ err_handled || chanerr == 0) {
 		dev_err(to_dev(ioat_chan), "%s: fatal error (%x:%x)\n",
 			__func__, chanerr, err_handled);
 		BUG();
-	} else { /* cleanup the faulty descriptor */
-		tx = &desc->txd;
-		if (tx->cookie) {
-			dma_cookie_complete(tx);
-			dma_descriptor_unmap(tx);
-			if (tx->callback) {
-				tx->callback(tx->callback_param);
-				tx->callback = NULL;
-			}
-		}
 	}
 
-	writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
-	pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr_int);
+	/* cleanup the faulty descriptor since we are continuing */
+	tx = &desc->txd;
+	if (tx->cookie) {
+		dma_cookie_complete(tx);
+		dma_descriptor_unmap(tx);
+		if (tx->callback) {
+			tx->callback(tx->callback_param);
+			tx->callback = NULL;
+		}
+	}
 
 	/* mark faulting descriptor as complete */
 	*ioat_chan->completion = desc->txd.phys;
 
 	spin_lock_bh(&ioat_chan->prep_lock);
+	/* we need abort all descriptors */
+	if (abort) {
+		ioat_abort_descs(ioat_chan);
+		/* clean up the channel, we could be in weird state */
+		ioat_reset_hw(ioat_chan);
+	}
+
+	writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+	pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr_int);
+
 	ioat_restart_channel(ioat_chan);
 	spin_unlock_bh(&ioat_chan->prep_lock);
 }
@@ -753,10 +829,25 @@ void ioat_timer_event(unsigned long data)
 		chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
 		dev_err(to_dev(ioat_chan), "%s: Channel halted (%x)\n",
 			__func__, chanerr);
-		if (test_bit(IOAT_RUN, &ioat_chan->state))
-			BUG_ON(is_ioat_bug(chanerr));
-		else /* we never got off the ground */
-			return;
+		if (test_bit(IOAT_RUN, &ioat_chan->state)) {
+			spin_lock_bh(&ioat_chan->cleanup_lock);
+			spin_lock_bh(&ioat_chan->prep_lock);
+			set_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+			spin_unlock_bh(&ioat_chan->prep_lock);
+
+			ioat_abort_descs(ioat_chan);
+			dev_warn(to_dev(ioat_chan), "Reset channel...\n");
+			ioat_reset_hw(ioat_chan);
+			dev_warn(to_dev(ioat_chan), "Restart channel...\n");
+			ioat_restart_channel(ioat_chan);
+
+			spin_lock_bh(&ioat_chan->prep_lock);
+			clear_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+			spin_unlock_bh(&ioat_chan->prep_lock);
+			spin_unlock_bh(&ioat_chan->cleanup_lock);
+		}
+
+		return;
 	}
 
 	spin_lock_bh(&ioat_chan->cleanup_lock);
@@ -780,14 +871,23 @@ void ioat_timer_event(unsigned long data)
 		u32 chanerr;
 
 		chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
-		dev_warn(to_dev(ioat_chan), "Restarting channel...\n");
 		dev_warn(to_dev(ioat_chan), "CHANSTS: %#Lx CHANERR: %#x\n",
 			 status, chanerr);
 		dev_warn(to_dev(ioat_chan), "Active descriptors: %d\n",
 			 ioat_ring_active(ioat_chan));
 
 		spin_lock_bh(&ioat_chan->prep_lock);
+		set_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+		spin_unlock_bh(&ioat_chan->prep_lock);
+
+		ioat_abort_descs(ioat_chan);
+		dev_warn(to_dev(ioat_chan), "Resetting channel...\n");
+		ioat_reset_hw(ioat_chan);
+		dev_warn(to_dev(ioat_chan), "Restarting channel...\n");
 		ioat_restart_channel(ioat_chan);
+
+		spin_lock_bh(&ioat_chan->prep_lock);
+		clear_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
 		spin_unlock_bh(&ioat_chan->prep_lock);
 		spin_unlock_bh(&ioat_chan->cleanup_lock);
 		return;
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h
index 4994a36..faf20fa 100644
--- a/drivers/dma/ioat/registers.h
+++ b/drivers/dma/ioat/registers.h
@@ -233,6 +233,8 @@
 #define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR	0x40000
 
 #define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR)
+#define IOAT_CHANERR_RECOVER_MASK (IOAT_CHANERR_READ_DATA_ERR | \
+				   IOAT_CHANERR_WRITE_DATA_ERR)
 
 #define IOAT_CHANERR_MASK_OFFSET		0x2C	/* 32-bit Channel Error Register */
 


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 3/5] dmaengine: ioatdma: add error strings to chanerr output
  2016-06-28 23:15 [PATCH v3 0/5] dmaengine: ioatdma: DMA error reporting Dave Jiang
  2016-06-28 23:15 ` [PATCH v3 1/5] dmaengine: Adding error handling flag Dave Jiang
  2016-06-28 23:15 ` [PATCH v3 2/5] dmaengine: ioatdma: Add error handling to ioat driver Dave Jiang
@ 2016-06-28 23:15 ` Dave Jiang
  2016-06-28 23:15 ` [PATCH v3 4/5] ntb: add DMA error handling for TX DMA Dave Jiang
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 9+ messages in thread
From: Dave Jiang @ 2016-06-28 23:15 UTC (permalink / raw)
  To: vinod.koul, allen.hubbe, jdmason; +Cc: dmaengine, linux-ntb, dan.j.williams

Provide a mechanism to translate CHANERR bits to English strings in order
to allow user to report more concise errors.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 drivers/dma/ioat/dma.c |   65 +++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 61 insertions(+), 4 deletions(-)

diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index 899f9e1..007169a 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -38,8 +38,54 @@
 
 #include "../dmaengine.h"
 
+static char *chanerr_str[] = {
+	"DMA Transfer Destination Address Error",
+	"Next Descriptor Address Error",
+	"Descriptor Error",
+	"Chan Address Value Error",
+	"CHANCMD Error",
+	"Chipset Uncorrectable Data Integrity Error",
+	"DMA Uncorrectable Data Integrity Error",
+	"Read Data Error",
+	"Write Data Error",
+	"Descriptor Control Error",
+	"Descriptor Transfer Size Error",
+	"Completion Address Error",
+	"Interrupt Configuration Error",
+	"Super extended descriptor Address Error",
+	"Unaffiliated Error",
+	"CRC or XOR P Error",
+	"XOR Q Error",
+	"Descriptor Count Error",
+	"DIF All F detect Error",
+	"Guard Tag verification Error",
+	"Application Tag verification Error",
+	"Reference Tag verification Error",
+	"Bundle Bit Error",
+	"Result DIF All F detect Error",
+	"Result Guard Tag verification Error",
+	"Result Application Tag verification Error",
+	"Result Reference Tag verification Error",
+	NULL
+};
+
 static void ioat_eh(struct ioatdma_chan *ioat_chan);
 
+static void ioat_print_chanerrs(struct ioatdma_chan *ioat_chan, u32 chanerr)
+{
+	int i;
+
+	for (i = 0; i < 32; i++) {
+		if ((chanerr >> i) & 1) {
+			if (chanerr_str[i]) {
+				dev_err(to_dev(ioat_chan), "Err(%d): %s\n",
+					i, chanerr_str[i]);
+			} else
+				break;
+		}
+	}
+}
+
 /**
  * ioat_dma_do_interrupt - handler used for single vector interrupt mode
  * @irq: interrupt id
@@ -769,6 +815,11 @@ static void ioat_eh(struct ioatdma_chan *ioat_chan)
 	if (chanerr ^ err_handled || chanerr == 0) {
 		dev_err(to_dev(ioat_chan), "%s: fatal error (%x:%x)\n",
 			__func__, chanerr, err_handled);
+		dev_err(to_dev(ioat_chan), "Errors handled:\n");
+		ioat_print_chanerrs(ioat_chan, err_handled);
+		dev_err(to_dev(ioat_chan), "Errors not handled:\n");
+		ioat_print_chanerrs(ioat_chan, (chanerr & ~err_handled));
+
 		BUG();
 	}
 
@@ -829,6 +880,9 @@ void ioat_timer_event(unsigned long data)
 		chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
 		dev_err(to_dev(ioat_chan), "%s: Channel halted (%x)\n",
 			__func__, chanerr);
+		dev_err(to_dev(ioat_chan), "Errors:\n");
+		ioat_print_chanerrs(ioat_chan, chanerr);
+
 		if (test_bit(IOAT_RUN, &ioat_chan->state)) {
 			spin_lock_bh(&ioat_chan->cleanup_lock);
 			spin_lock_bh(&ioat_chan->prep_lock);
@@ -871,10 +925,13 @@ void ioat_timer_event(unsigned long data)
 		u32 chanerr;
 
 		chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
-		dev_warn(to_dev(ioat_chan), "CHANSTS: %#Lx CHANERR: %#x\n",
-			 status, chanerr);
-		dev_warn(to_dev(ioat_chan), "Active descriptors: %d\n",
-			 ioat_ring_active(ioat_chan));
+		dev_err(to_dev(ioat_chan), "CHANSTS: %#Lx CHANERR: %#x\n",
+			status, chanerr);
+		dev_err(to_dev(ioat_chan), "Errors:\n");
+		ioat_print_chanerrs(ioat_chan, chanerr);
+
+		dev_dbg(to_dev(ioat_chan), "Active descriptors: %d\n",
+			ioat_ring_active(ioat_chan));
 
 		spin_lock_bh(&ioat_chan->prep_lock);
 		set_bit(IOAT_CHAN_DOWN, &ioat_chan->state);


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 4/5] ntb: add DMA error handling for TX DMA
  2016-06-28 23:15 [PATCH v3 0/5] dmaengine: ioatdma: DMA error reporting Dave Jiang
                   ` (2 preceding siblings ...)
  2016-06-28 23:15 ` [PATCH v3 3/5] dmaengine: ioatdma: add error strings to chanerr output Dave Jiang
@ 2016-06-28 23:15 ` Dave Jiang
  2016-06-28 23:15 ` [PATCH v3 5/5] ntb: add DMA error handling for RX DMA Dave Jiang
  2016-06-29 13:24 ` [PATCH v3 0/5] dmaengine: ioatdma: DMA error reporting Allen Hubbe
  5 siblings, 0 replies; 9+ messages in thread
From: Dave Jiang @ 2016-06-28 23:15 UTC (permalink / raw)
  To: vinod.koul, allen.hubbe, jdmason; +Cc: dmaengine, linux-ntb, dan.j.williams

Adding support on the tx DMA path to allow recovery of errors when DMA
responds with error status and abort all the subsequent ops.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 0 files changed

diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 2ef9d913..bdb2001 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -102,6 +102,10 @@ struct ntb_queue_entry {
 	void *buf;
 	unsigned int len;
 	unsigned int flags;
+	struct dma_async_tx_descriptor *txd;
+	int retries;
+	int errors;
+	unsigned int tx_index;
 
 	struct ntb_transport_qp *qp;
 	union {
@@ -258,6 +262,9 @@ enum {
 static void ntb_transport_rxc_db(unsigned long data);
 static const struct ntb_ctx_ops ntb_transport_ops;
 static struct ntb_client ntb_transport_client;
+static int ntb_async_tx_submit(struct ntb_transport_qp *qp,
+			       struct ntb_queue_entry *entry);
+static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset);
 
 static int ntb_transport_bus_match(struct device *dev,
 				   struct device_driver *drv)
@@ -1444,6 +1451,33 @@ static void ntb_tx_copy_callback(void *data)
 	struct ntb_queue_entry *entry = data;
 	struct ntb_transport_qp *qp = entry->qp;
 	struct ntb_payload_header __iomem *hdr = entry->tx_hdr;
+	struct dma_async_tx_descriptor *txd;
+
+	txd = entry->txd;
+
+	/* we need to check DMA results if we are using DMA */
+	if (txd) {
+		switch (txd->result) {
+		case ERR_DMA_READ:
+		case ERR_DMA_WRITE:
+			entry->errors++;
+		case ERR_DMA_ABORT:
+		{
+			void __iomem *offset =
+				qp->tx_mw + qp->tx_max_frame *
+				entry->tx_index;
+
+			/* resubmit via CPU */
+			ntb_memcpy_tx(entry, offset);
+			qp->tx_memcpy++;
+			return;
+		}
+
+		case ERR_DMA_NONE:
+		default:
+			break;
+		}
+	}
 
 	iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags);
 
@@ -1482,37 +1516,21 @@ static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset)
 	ntb_tx_copy_callback(entry);
 }
 
-static void ntb_async_tx(struct ntb_transport_qp *qp,
-			 struct ntb_queue_entry *entry)
+static int ntb_async_tx_submit(struct ntb_transport_qp *qp,
+			       struct ntb_queue_entry *entry)
 {
-	struct ntb_payload_header __iomem *hdr;
-	struct dma_async_tx_descriptor *txd;
 	struct dma_chan *chan = qp->tx_dma_chan;
 	struct dma_device *device;
+	size_t len = entry->len;
+	void *buf = entry->buf;
 	size_t dest_off, buff_off;
 	struct dmaengine_unmap_data *unmap;
 	dma_addr_t dest;
 	dma_cookie_t cookie;
-	void __iomem *offset;
-	size_t len = entry->len;
-	void *buf = entry->buf;
 	int retries = 0;
 
-	offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
-	hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header);
-	entry->tx_hdr = hdr;
-
-	iowrite32(entry->len, &hdr->len);
-	iowrite32((u32)qp->tx_pkts, &hdr->ver);
-
-	if (!chan)
-		goto err;
-
-	if (len < copy_bytes)
-		goto err;
-
 	device = chan->device;
-	dest = qp->tx_mw_phys + qp->tx_max_frame * qp->tx_index;
+	dest = qp->tx_mw_phys + qp->tx_max_frame * entry->tx_index;
 	buff_off = (size_t)buf & ~PAGE_MASK;
 	dest_off = (size_t)dest & ~PAGE_MASK;
 
@@ -1532,39 +1550,74 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
 	unmap->to_cnt = 1;
 
 	for (retries = 0; retries < DMA_RETRIES; retries++) {
-		txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0],
-						     len, DMA_PREP_INTERRUPT);
-		if (txd)
+		entry->txd = device->device_prep_dma_memcpy(chan, dest,
+							    unmap->addr[0], len,
+							    DMA_PREP_INTERRUPT);
+		if (entry->txd)
 			break;
 
 		set_current_state(TASK_INTERRUPTIBLE);
 		schedule_timeout(DMA_OUT_RESOURCE_TO);
 	}
 
-	if (!txd) {
+	if (!entry->txd) {
 		qp->dma_tx_prep_err++;
 		goto err_get_unmap;
 	}
 
-	txd->callback = ntb_tx_copy_callback;
-	txd->callback_param = entry;
-	dma_set_unmap(txd, unmap);
+	entry->txd->callback = ntb_tx_copy_callback;
+	entry->txd->callback_param = entry;
+	dma_set_unmap(entry->txd, unmap);
 
-	cookie = dmaengine_submit(txd);
+	cookie = dmaengine_submit(entry->txd);
 	if (dma_submit_error(cookie))
 		goto err_set_unmap;
 
 	dmaengine_unmap_put(unmap);
 
 	dma_async_issue_pending(chan);
-	qp->tx_async++;
 
-	return;
+	return 0;
 err_set_unmap:
 	dmaengine_unmap_put(unmap);
 err_get_unmap:
 	dmaengine_unmap_put(unmap);
 err:
+	return -ENXIO;
+}
+
+static void ntb_async_tx(struct ntb_transport_qp *qp,
+			 struct ntb_queue_entry *entry)
+{
+	struct ntb_payload_header __iomem *hdr;
+	struct dma_chan *chan = qp->tx_dma_chan;
+	void __iomem *offset;
+	int res;
+
+	entry->tx_index = qp->tx_index;
+	offset = qp->tx_mw + qp->tx_max_frame * entry->tx_index;
+	hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header);
+	entry->tx_hdr = hdr;
+
+	iowrite32(entry->len, &hdr->len);
+	iowrite32((u32)qp->tx_pkts, &hdr->ver);
+
+	if (!chan)
+		goto err;
+
+	if (entry->len < copy_bytes)
+		goto err;
+
+	res = ntb_async_tx_submit(qp, entry);
+	if (res < 0)
+		goto err;
+
+	if (!entry->retries)
+		qp->tx_async++;
+
+	return;
+
+err:
 	ntb_memcpy_tx(entry, offset);
 	qp->tx_memcpy++;
 }
@@ -1940,6 +1993,10 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
 	entry->buf = data;
 	entry->len = len;
 	entry->flags = 0;
+	entry->errors = 0;
+	entry->retries = 0;
+	entry->tx_index = 0;
+	entry->txd = NULL;
 
 	rc = ntb_process_tx(qp, entry);
 	if (rc)


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 5/5] ntb: add DMA error handling for RX DMA
  2016-06-28 23:15 [PATCH v3 0/5] dmaengine: ioatdma: DMA error reporting Dave Jiang
                   ` (3 preceding siblings ...)
  2016-06-28 23:15 ` [PATCH v3 4/5] ntb: add DMA error handling for TX DMA Dave Jiang
@ 2016-06-28 23:15 ` Dave Jiang
  2016-06-29 13:24 ` [PATCH v3 0/5] dmaengine: ioatdma: DMA error reporting Allen Hubbe
  5 siblings, 0 replies; 9+ messages in thread
From: Dave Jiang @ 2016-06-28 23:15 UTC (permalink / raw)
  To: vinod.koul, allen.hubbe, jdmason; +Cc: dmaengine, linux-ntb, dan.j.williams

Adding support on the rx DMA path to allow recovery of errors when DMA
responds with error status and abort all the subsequent ops.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 0 files changed

diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index bdb2001..5754cab 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -106,13 +106,13 @@ struct ntb_queue_entry {
 	int retries;
 	int errors;
 	unsigned int tx_index;
+	unsigned int rx_index;
 
 	struct ntb_transport_qp *qp;
 	union {
 		struct ntb_payload_header __iomem *tx_hdr;
 		struct ntb_payload_header *rx_hdr;
 	};
-	unsigned int index;
 };
 
 struct ntb_rx_info {
@@ -265,6 +265,9 @@ static struct ntb_client ntb_transport_client;
 static int ntb_async_tx_submit(struct ntb_transport_qp *qp,
 			       struct ntb_queue_entry *entry);
 static void ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset);
+static int ntb_async_rx_submit(struct ntb_queue_entry *entry, void *offset);
+static void ntb_memcpy_rx(struct ntb_queue_entry *entry, void *offset);
+
 
 static int ntb_transport_bus_match(struct device *dev,
 				   struct device_driver *drv)
@@ -1208,7 +1211,7 @@ static void ntb_complete_rxc(struct ntb_transport_qp *qp)
 			break;
 
 		entry->rx_hdr->flags = 0;
-		iowrite32(entry->index, &qp->rx_info->entry);
+		iowrite32(entry->rx_index, &qp->rx_info->entry);
 
 		cb_data = entry->cb_data;
 		len = entry->len;
@@ -1229,6 +1232,31 @@ static void ntb_complete_rxc(struct ntb_transport_qp *qp)
 static void ntb_rx_copy_callback(void *data)
 {
 	struct ntb_queue_entry *entry = data;
+	struct dma_async_tx_descriptor *txd;
+
+	txd = entry->txd;
+
+	/* we need to check DMA results if we are using DMA */
+	if (txd) {
+		switch (txd->result) {
+		case ERR_DMA_READ:
+		case ERR_DMA_WRITE:
+			entry->errors++;
+		case ERR_DMA_ABORT:
+		{
+			struct ntb_transport_qp *qp = entry->qp;
+			void *offset = qp->rx_buff + qp->rx_max_frame *
+					qp->rx_index;
+
+			ntb_memcpy_rx(entry, offset);
+			qp->rx_memcpy++;
+			return;
+		}
+		case ERR_DMA_NONE:
+		default:
+			break;
+		}
+	}
 
 	entry->flags |= DESC_DONE_FLAG;
 
@@ -1248,9 +1276,8 @@ static void ntb_memcpy_rx(struct ntb_queue_entry *entry, void *offset)
 	ntb_rx_copy_callback(entry);
 }
 
-static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset)
+static int ntb_async_rx_submit(struct ntb_queue_entry *entry, void *offset)
 {
-	struct dma_async_tx_descriptor *txd;
 	struct ntb_transport_qp *qp = entry->qp;
 	struct dma_chan *chan = qp->rx_dma_chan;
 	struct dma_device *device;
@@ -1261,13 +1288,6 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset)
 	int retries = 0;
 
 	len = entry->len;
-
-	if (!chan)
-		goto err;
-
-	if (len < copy_bytes)
-		goto err;
-
 	device = chan->device;
 	pay_off = (size_t)offset & ~PAGE_MASK;
 	buff_off = (size_t)buf & ~PAGE_MASK;
@@ -1295,26 +1315,27 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset)
 	unmap->from_cnt = 1;
 
 	for (retries = 0; retries < DMA_RETRIES; retries++) {
-		txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
-						     unmap->addr[0], len,
-						     DMA_PREP_INTERRUPT);
-		if (txd)
+		entry->txd = device->device_prep_dma_memcpy(chan,
+							    unmap->addr[1],
+							    unmap->addr[0], len,
+							    DMA_PREP_INTERRUPT);
+		if (entry->txd)
 			break;
 
 		set_current_state(TASK_INTERRUPTIBLE);
 		schedule_timeout(DMA_OUT_RESOURCE_TO);
 	}
 
-	if (!txd) {
+	if (!entry->txd) {
 		qp->dma_rx_prep_err++;
 		goto err_get_unmap;
 	}
 
-	txd->callback = ntb_rx_copy_callback;
-	txd->callback_param = entry;
-	dma_set_unmap(txd, unmap);
+	entry->txd->callback = ntb_rx_copy_callback;
+	entry->txd->callback_param = entry;
+	dma_set_unmap(entry->txd, unmap);
 
-	cookie = dmaengine_submit(txd);
+	cookie = dmaengine_submit(entry->txd);
 	if (dma_submit_error(cookie))
 		goto err_set_unmap;
 
@@ -1324,13 +1345,38 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset)
 
 	qp->rx_async++;
 
-	return;
+	return 0;
 
 err_set_unmap:
 	dmaengine_unmap_put(unmap);
 err_get_unmap:
 	dmaengine_unmap_put(unmap);
 err:
+	return -ENXIO;
+}
+
+static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset)
+{
+	struct ntb_transport_qp *qp = entry->qp;
+	struct dma_chan *chan = qp->rx_dma_chan;
+	int res;
+
+	if (!chan)
+		goto err;
+
+	if (entry->len < copy_bytes)
+		goto err;
+
+	res = ntb_async_rx_submit(entry, offset);
+	if (res < 0)
+		goto err;
+
+	if (!entry->retries)
+		qp->rx_async++;
+
+	return;
+
+err:
 	ntb_memcpy_rx(entry, offset);
 	qp->rx_memcpy++;
 }
@@ -1376,7 +1422,7 @@ static int ntb_process_rxc(struct ntb_transport_qp *qp)
 	}
 
 	entry->rx_hdr = hdr;
-	entry->index = qp->rx_index;
+	entry->rx_index = qp->rx_index;
 
 	if (hdr->len > entry->len) {
 		dev_dbg(&qp->ndev->pdev->dev,
@@ -1951,6 +1997,10 @@ int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
 	entry->buf = data;
 	entry->len = len;
 	entry->flags = 0;
+	entry->retries = 0;
+	entry->errors = 0;
+	entry->rx_index = 0;
+	entry->txd = NULL;
 
 	ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_pend_q);
 


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* RE: [PATCH v3 0/5] dmaengine: ioatdma: DMA error reporting
  2016-06-28 23:15 [PATCH v3 0/5] dmaengine: ioatdma: DMA error reporting Dave Jiang
                   ` (4 preceding siblings ...)
  2016-06-28 23:15 ` [PATCH v3 5/5] ntb: add DMA error handling for RX DMA Dave Jiang
@ 2016-06-29 13:24 ` Allen Hubbe
  5 siblings, 0 replies; 9+ messages in thread
From: Allen Hubbe @ 2016-06-29 13:24 UTC (permalink / raw)
  To: 'Dave Jiang', vinod.koul, jdmason
  Cc: dmaengine, linux-ntb, dan.j.williams

From: Dave Jiang
> Changes v2 -> v3:
> - Fix up commit header that was missing in patch 2/5.
> 
> Changes v1 -> v2:
> - Addressed comments from Allen Hubbe
> - Added CPU resubmit for aborted/failed NTB submissions
> 
> The following series implements error reporting and handling for ioatdma.
> An error flag field has been added to return DMA error status via the
> generic DMA descriptor so the upper layer can be notified if there are errors.
> The ioatdma driver now will abort remaining outstanding descriptors when an
> error has occured that requires a channel reset. The Intel NTB driver has been
> modified to handle the error condition from the DMA layer.
> 
> ---
> 
> Dave Jiang (5):
>       dmaengine: Adding error handling flag
>       dmaengine: ioatdma: Add error handling to ioat driver
>       dmaengine: ioatdma: add error strings to chanerr output
>       ntb: add DMA error handling for TX DMA
>       ntb: add DMA error handling for RX DMA
> 

Acked-by: Allen Hubbe <Allen.Hubbe@emc.com>

> 
>  drivers/dma/ioat/dma.c       |  201 +++++++++++++++++++++++++++++++++++++-----
>  drivers/dma/ioat/registers.h |    2
>  2 files changed, 181 insertions(+), 22 deletions(-)
> 
> --


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3 1/5] dmaengine: Adding error handling flag
  2016-06-28 23:15 ` [PATCH v3 1/5] dmaengine: Adding error handling flag Dave Jiang
@ 2016-07-08  5:29   ` Vinod Koul
  2016-07-08 16:00     ` Jiang, Dave
  0 siblings, 1 reply; 9+ messages in thread
From: Vinod Koul @ 2016-07-08  5:29 UTC (permalink / raw)
  To: Dave Jiang; +Cc: allen.hubbe, jdmason, dmaengine, linux-ntb, dan.j.williams

On Tue, Jun 28, 2016 at 04:15:11PM -0700, Dave Jiang wrote:
> Adding error flag for the call back descriptor to notify upper layer that
> an error has occurred with this particular DMA op.
> 
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> ---
>  0 files changed
> 
> diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
> index 0174337..6524881 100644
> --- a/include/linux/dmaengine.h
> +++ b/include/linux/dmaengine.h
> @@ -453,6 +453,20 @@ struct dmaengine_unmap_data {
>  };
>  
>  /**
> + * enum err_result_flags - result of DMA operations
> + * @ERR_DMA_NONE - no errors
> + * @ERR_DMA_READ - DMA read error
> + * @ERR_DMA_WRITE - DMA write error
> + * @ERR_DMA_ABORT - Operation aborted
> + */
> +enum err_result_flags {
> +	ERR_DMA_NONE = 0,
> +	ERR_DMA_READ,
> +	ERR_DMA_WRITE,
> +	ERR_DMA_ABORT,
> +};
> +
> +/**
>   * struct dma_async_tx_descriptor - async transaction descriptor
>   * ---dma generic offload fields---
>   * @cookie: tracking cookie for this transaction, set to -EBUSY if
> @@ -480,6 +494,7 @@ struct dma_async_tx_descriptor {
>  	dma_async_tx_callback callback;
>  	void *callback_param;
>  	struct dmaengine_unmap_data *unmap;
> +	enum err_result_flags result;
>  #ifdef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH
>  	struct dma_async_tx_descriptor *next;
>  	struct dma_async_tx_descriptor *parent;

Okay this needs more thoughts IMHO. The only issue with this approach is
that it violates one of the base fundamentals of dmaengine descriptor
submission.

Client do not touch or use a descriptor after it has been submitted. Looking
at the results inside descriptor is not right.

Perhaps we can add a query of results in the cookie or add this in
tx_status, am not sure.

FWIW, we definitely need some kind of error reporting

Thanks
-- 
~Vinod

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH v3 1/5] dmaengine: Adding error handling flag
  2016-07-08  5:29   ` Vinod Koul
@ 2016-07-08 16:00     ` Jiang, Dave
  0 siblings, 0 replies; 9+ messages in thread
From: Jiang, Dave @ 2016-07-08 16:00 UTC (permalink / raw)
  To: Koul, Vinod; +Cc: allen.hubbe, jdmason, dmaengine, linux-ntb, Williams, Dan J



> -----Original Message-----
> From: Koul, Vinod
> Sent: Thursday, July 07, 2016 10:29 PM
> To: Jiang, Dave <dave.jiang@intel.com>
> Cc: allen.hubbe@emc.com; jdmason@kudzu.us; dmaengine@vger.kernel.org; linux-ntb@googlegroups.com; Williams, Dan J
> <dan.j.williams@intel.com>
> Subject: Re: [PATCH v3 1/5] dmaengine: Adding error handling flag
> 
> On Tue, Jun 28, 2016 at 04:15:11PM -0700, Dave Jiang wrote:
> > Adding error flag for the call back descriptor to notify upper layer that
> > an error has occurred with this particular DMA op.
> >
> > Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> > ---
> >  0 files changed
> >
> > diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
> > index 0174337..6524881 100644
> > --- a/include/linux/dmaengine.h
> > +++ b/include/linux/dmaengine.h
> > @@ -453,6 +453,20 @@ struct dmaengine_unmap_data {
> >  };
> >
> >  /**
> > + * enum err_result_flags - result of DMA operations
> > + * @ERR_DMA_NONE - no errors
> > + * @ERR_DMA_READ - DMA read error
> > + * @ERR_DMA_WRITE - DMA write error
> > + * @ERR_DMA_ABORT - Operation aborted
> > + */
> > +enum err_result_flags {
> > +	ERR_DMA_NONE = 0,
> > +	ERR_DMA_READ,
> > +	ERR_DMA_WRITE,
> > +	ERR_DMA_ABORT,
> > +};
> > +
> > +/**
> >   * struct dma_async_tx_descriptor - async transaction descriptor
> >   * ---dma generic offload fields---
> >   * @cookie: tracking cookie for this transaction, set to -EBUSY if
> > @@ -480,6 +494,7 @@ struct dma_async_tx_descriptor {
> >  	dma_async_tx_callback callback;
> >  	void *callback_param;
> >  	struct dmaengine_unmap_data *unmap;
> > +	enum err_result_flags result;
> >  #ifdef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH
> >  	struct dma_async_tx_descriptor *next;
> >  	struct dma_async_tx_descriptor *parent;
> 
> Okay this needs more thoughts IMHO. The only issue with this approach is
> that it violates one of the base fundamentals of dmaengine descriptor
> submission.
> 
> Client do not touch or use a descriptor after it has been submitted. Looking
> at the results inside descriptor is not right.
> 
> Perhaps we can add a query of results in the cookie or add this in
> tx_status, am not sure.
> 
> FWIW, we definitely need some kind of error reporting

Ok. Let me think about this some more on how to do this with what you suggested. With XOR/PQ validate, we provide a checksum_result field to check for the result. We could in general have all the DMA submission calls provide something similar for error reporting.

> 
> Thanks
> --
> ~Vinod

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2016-07-08 16:00 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-28 23:15 [PATCH v3 0/5] dmaengine: ioatdma: DMA error reporting Dave Jiang
2016-06-28 23:15 ` [PATCH v3 1/5] dmaengine: Adding error handling flag Dave Jiang
2016-07-08  5:29   ` Vinod Koul
2016-07-08 16:00     ` Jiang, Dave
2016-06-28 23:15 ` [PATCH v3 2/5] dmaengine: ioatdma: Add error handling to ioat driver Dave Jiang
2016-06-28 23:15 ` [PATCH v3 3/5] dmaengine: ioatdma: add error strings to chanerr output Dave Jiang
2016-06-28 23:15 ` [PATCH v3 4/5] ntb: add DMA error handling for TX DMA Dave Jiang
2016-06-28 23:15 ` [PATCH v3 5/5] ntb: add DMA error handling for RX DMA Dave Jiang
2016-06-29 13:24 ` [PATCH v3 0/5] dmaengine: ioatdma: DMA error reporting Allen Hubbe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.