linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v6 4/6] g_NCR5380: Use unambiguous terminology for PDMA send and receive
  2017-07-01  2:40 [PATCH v6 0/6] g_NCR5380: PDMA fixes and cleanup Finn Thain
  2017-07-01  2:40 ` [PATCH v6 1/6] g_NCR5380: Fix PDMA transfer size Finn Thain
  2017-07-01  2:40 ` [PATCH v6 3/6] g_NCR5380: Cleanup comments and whitespace Finn Thain
@ 2017-07-01  2:40 ` Finn Thain
  2017-07-01  2:40 ` [PATCH v6 5/6] g_NCR5380: Re-work PDMA loops Finn Thain
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 11+ messages in thread
From: Finn Thain @ 2017-07-01  2:40 UTC (permalink / raw)
  To: James E.J. Bottomley, Martin K. Petersen, Ondrej Zary
  Cc: linux-scsi, linux-kernel, Michael Schmitz

The word "read" may be used to mean "DMA read operation" or
"SCSI READ command", though a READ command implies writing to memory.

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
---
 drivers/scsi/g_NCR5380.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index dedaed2d16e4..33e1a480c903 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -48,8 +48,8 @@
 	int pdma_residual
 
 #define NCR5380_dma_xfer_len            generic_NCR5380_dma_xfer_len
-#define NCR5380_dma_recv_setup          generic_NCR5380_pread
-#define NCR5380_dma_send_setup          generic_NCR5380_pwrite
+#define NCR5380_dma_recv_setup          generic_NCR5380_precv
+#define NCR5380_dma_send_setup          generic_NCR5380_psend
 #define NCR5380_dma_residual            generic_NCR5380_dma_residual
 
 #define NCR5380_intr                    generic_NCR5380_intr
@@ -481,7 +481,7 @@ static void generic_NCR5380_release_resources(struct Scsi_Host *instance)
 }
 
 /**
- * generic_NCR5380_pread - pseudo DMA read
+ * generic_NCR5380_precv - pseudo DMA receive
  * @hostdata: scsi host private data
  * @dst: buffer to write into
  * @len: transfer size
@@ -489,7 +489,7 @@ static void generic_NCR5380_release_resources(struct Scsi_Host *instance)
  * Perform a pseudo DMA mode receive from a 53C400 or equivalent device.
  */
 
-static inline int generic_NCR5380_pread(struct NCR5380_hostdata *hostdata,
+static inline int generic_NCR5380_precv(struct NCR5380_hostdata *hostdata,
                                         unsigned char *dst, int len)
 {
 	int blocks = len / 128;
@@ -557,7 +557,7 @@ static inline int generic_NCR5380_pread(struct NCR5380_hostdata *hostdata,
 }
 
 /**
- * generic_NCR5380_pwrite - pseudo DMA write
+ * generic_NCR5380_psend - pseudo DMA send
  * @hostdata: scsi host private data
  * @src: buffer to read from
  * @len: transfer size
@@ -565,8 +565,8 @@ static inline int generic_NCR5380_pread(struct NCR5380_hostdata *hostdata,
  * Perform a pseudo DMA mode send to a 53C400 or equivalent device.
  */
 
-static inline int generic_NCR5380_pwrite(struct NCR5380_hostdata *hostdata,
-                                         unsigned char *src, int len)
+static inline int generic_NCR5380_psend(struct NCR5380_hostdata *hostdata,
+                                        unsigned char *src, int len)
 {
 	int blocks = len / 128;
 	int start = 0;
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v6 3/6] g_NCR5380: Cleanup comments and whitespace
  2017-07-01  2:40 [PATCH v6 0/6] g_NCR5380: PDMA fixes and cleanup Finn Thain
  2017-07-01  2:40 ` [PATCH v6 1/6] g_NCR5380: Fix PDMA transfer size Finn Thain
@ 2017-07-01  2:40 ` Finn Thain
  2017-07-01  2:40 ` [PATCH v6 4/6] g_NCR5380: Use unambiguous terminology for PDMA send and receive Finn Thain
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 11+ messages in thread
From: Finn Thain @ 2017-07-01  2:40 UTC (permalink / raw)
  To: James E.J. Bottomley, Martin K. Petersen, Ondrej Zary
  Cc: linux-scsi, linux-kernel, Michael Schmitz

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
---
 drivers/scsi/g_NCR5380.c | 61 ++++++++++++++++++++++--------------------------
 1 file changed, 28 insertions(+), 33 deletions(-)

diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index 911a4300ea51..dedaed2d16e4 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -1,17 +1,17 @@
 /*
  * Generic Generic NCR5380 driver
- *	
+ *
  * Copyright 1993, Drew Eckhardt
- *	Visionary Computing
- *	(Unix and Linux consulting and custom programming)
- *	drew@colorado.edu
- *      +1 (303) 440-4894
+ * Visionary Computing
+ * (Unix and Linux consulting and custom programming)
+ * drew@colorado.edu
+ * +1 (303) 440-4894
  *
  * NCR53C400 extensions (c) 1994,1995,1996, Kevin Lentin
- *    K.Lentin@cs.monash.edu.au
+ * K.Lentin@cs.monash.edu.au
  *
  * NCR53C400A extensions (c) 1996, Ingmar Baumgart
- *    ingmar@gonzo.schwaben.de
+ * ingmar@gonzo.schwaben.de
  *
  * DTC3181E extensions (c) 1997, Ronald van Cuijlenborg
  * ronald.van.cuijlenborg@tip.nl or nutty@dds.nl
@@ -481,15 +481,14 @@ static void generic_NCR5380_release_resources(struct Scsi_Host *instance)
 }
 
 /**
- *	generic_NCR5380_pread - pseudo DMA read
- *	@hostdata: scsi host private data
- *	@dst: buffer to read into
- *	@len: buffer length
+ * generic_NCR5380_pread - pseudo DMA read
+ * @hostdata: scsi host private data
+ * @dst: buffer to write into
+ * @len: transfer size
  *
- *	Perform a pseudo DMA mode read from an NCR53C400 or equivalent
- *	controller
+ * Perform a pseudo DMA mode receive from a 53C400 or equivalent device.
  */
- 
+
 static inline int generic_NCR5380_pread(struct NCR5380_hostdata *hostdata,
                                         unsigned char *dst, int len)
 {
@@ -508,10 +507,10 @@ static inline int generic_NCR5380_pread(struct NCR5380_hostdata *hostdata,
 
 		if (hostdata->io_port && hostdata->io_width == 2)
 			insw(hostdata->io_port + hostdata->c400_host_buf,
-							dst + start, 64);
+			     dst + start, 64);
 		else if (hostdata->io_port)
 			insb(hostdata->io_port + hostdata->c400_host_buf,
-							dst + start, 128);
+			     dst + start, 128);
 		else
 			memcpy_fromio(dst + start,
 				hostdata->io + NCR53C400_host_buffer, 128);
@@ -558,13 +557,12 @@ static inline int generic_NCR5380_pread(struct NCR5380_hostdata *hostdata,
 }
 
 /**
- *	generic_NCR5380_pwrite - pseudo DMA write
- *	@hostdata: scsi host private data
- *	@dst: buffer to read into
- *	@len: buffer length
+ * generic_NCR5380_pwrite - pseudo DMA write
+ * @hostdata: scsi host private data
+ * @src: buffer to read from
+ * @len: transfer size
  *
- *	Perform a pseudo DMA mode read from an NCR53C400 or equivalent
- *	controller
+ * Perform a pseudo DMA mode send to a 53C400 or equivalent device.
  */
 
 static inline int generic_NCR5380_pwrite(struct NCR5380_hostdata *hostdata,
@@ -603,10 +601,10 @@ static inline int generic_NCR5380_pwrite(struct NCR5380_hostdata *hostdata,
 
 		if (hostdata->io_port && hostdata->io_width == 2)
 			outsw(hostdata->io_port + hostdata->c400_host_buf,
-							src + start, 64);
+			      src + start, 64);
 		else if (hostdata->io_port)
 			outsb(hostdata->io_port + hostdata->c400_host_buf,
-							src + start, 128);
+			      src + start, 128);
 		else
 			memcpy_toio(hostdata->io + NCR53C400_host_buffer,
 			            src + start, 128);
@@ -656,10 +654,8 @@ static int generic_NCR5380_dma_residual(struct NCR5380_hostdata *hostdata)
 	return hostdata->pdma_residual;
 }
 
-/*
- *	Include the NCR5380 core code that we build our driver around	
- */
- 
+/* Include the core driver code. */
+
 #include "NCR5380.c"
 
 static struct scsi_host_template driver_template = {
@@ -679,11 +675,10 @@ static struct scsi_host_template driver_template = {
 	.max_sectors		= 128,
 };
 
-
 static int generic_NCR5380_isa_match(struct device *pdev, unsigned int ndev)
 {
 	int ret = generic_NCR5380_init_one(&driver_template, pdev, base[ndev],
-					  irq[ndev], card[ndev]);
+	                                   irq[ndev], card[ndev]);
 	if (ret) {
 		if (base[ndev])
 			printk(KERN_WARNING "Card not found at address 0x%03x\n",
@@ -695,7 +690,7 @@ static int generic_NCR5380_isa_match(struct device *pdev, unsigned int ndev)
 }
 
 static int generic_NCR5380_isa_remove(struct device *pdev,
-				   unsigned int ndev)
+                                      unsigned int ndev)
 {
 	generic_NCR5380_release_resources(dev_get_drvdata(pdev));
 	dev_set_drvdata(pdev, NULL);
@@ -718,7 +713,7 @@ static struct pnp_device_id generic_NCR5380_pnp_ids[] = {
 MODULE_DEVICE_TABLE(pnp, generic_NCR5380_pnp_ids);
 
 static int generic_NCR5380_pnp_probe(struct pnp_dev *pdev,
-			       const struct pnp_device_id *id)
+                                     const struct pnp_device_id *id)
 {
 	int base, irq;
 
@@ -729,7 +724,7 @@ static int generic_NCR5380_pnp_probe(struct pnp_dev *pdev,
 	irq = pnp_irq(pdev, 0);
 
 	return generic_NCR5380_init_one(&driver_template, &pdev->dev, base, irq,
-				       id->driver_data);
+	                                id->driver_data);
 }
 
 static void generic_NCR5380_pnp_remove(struct pnp_dev *pdev)
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v6 1/6] g_NCR5380: Fix PDMA transfer size
  2017-07-01  2:40 [PATCH v6 0/6] g_NCR5380: PDMA fixes and cleanup Finn Thain
@ 2017-07-01  2:40 ` Finn Thain
  2017-07-01  2:40 ` [PATCH v6 3/6] g_NCR5380: Cleanup comments and whitespace Finn Thain
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 11+ messages in thread
From: Finn Thain @ 2017-07-01  2:40 UTC (permalink / raw)
  To: James E.J. Bottomley, Martin K. Petersen, Ondrej Zary
  Cc: linux-scsi, linux-kernel, Michael Schmitz

From: Ondrej Zary <linux@rainbow-software.org>

generic_NCR5380_dma_xfer_len() incorrectly uses cmd->transfersize
which causes rescan-scsi-bus and CD-ROM access to hang the system.
Use cmd->SCp.this_residual instead, like other NCR5380 drivers.

Signed-off-by: Ondrej Zary <linux@rainbow-software.org>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
---
 drivers/scsi/g_NCR5380.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index 67c8dac321ad..14ef4e8c4713 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -76,6 +76,7 @@
 #define IRQ_AUTO 254
 
 #define MAX_CARDS 8
+#define DMA_MAX_SIZE 32768
 
 /* old-style parameters for compatibility */
 static int ncr_irq = -1;
@@ -629,23 +630,16 @@ static inline int generic_NCR5380_pwrite(struct NCR5380_hostdata *hostdata,
 static int generic_NCR5380_dma_xfer_len(struct NCR5380_hostdata *hostdata,
                                         struct scsi_cmnd *cmd)
 {
-	int transfersize = cmd->transfersize;
+	int transfersize = cmd->SCp.this_residual;
 
 	if (hostdata->flags & FLAG_NO_PSEUDO_DMA)
 		return 0;
 
-	/* Limit transfers to 32K, for xx400 & xx406
-	 * pseudoDMA that transfers in 128 bytes blocks.
-	 */
-	if (transfersize > 32 * 1024 && cmd->SCp.this_residual &&
-	    !(cmd->SCp.this_residual % transfersize))
-		transfersize = 32 * 1024;
-
 	/* 53C400 datasheet: non-modulo-128-byte transfers should use PIO */
 	if (transfersize % 128)
 		transfersize = 0;
 
-	return transfersize;
+	return min(transfersize, DMA_MAX_SIZE);
 }
 
 /*
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v6 0/6] g_NCR5380: PDMA fixes and cleanup
@ 2017-07-01  2:40 Finn Thain
  2017-07-01  2:40 ` [PATCH v6 1/6] g_NCR5380: Fix PDMA transfer size Finn Thain
                   ` (6 more replies)
  0 siblings, 7 replies; 11+ messages in thread
From: Finn Thain @ 2017-07-01  2:40 UTC (permalink / raw)
  To: James E.J. Bottomley, Martin K. Petersen, Ondrej Zary
  Cc: linux-scsi, linux-kernel, Michael Schmitz

Ondrej, would you please test this new series?

Changed since v1:
- PDMA transfer residual is calculated earlier.
- End of DMA flag check is now polled (if there is any residual).

Changed since v2:
- Bail out of transfer loops when Gated IRQ gets asserted.
- Make udelay conditional on board type.
- Drop sg_tablesize patch due to performance regression.

Changed since v3:
- Add Ondrej's workaround for corrupt WRITE commands on DTC boards.
- Reset the 53c400 logic after any short PDMA transfer.
- Don't fail the transfer if the 53c400 logic got a reset.

Changed since v4:
- Bail out of transfer loops when Gated IRQ gets asserted. (Again.)
- Always call wait_for_53c80_registers() at end of transfer.
- Drain chip buffers after PDMA receive is interrupted.
- Rework residual calculation.
- Add new patch to correct DMA terminology.

Changed since v5:
- Rework residual calculation to account for on-chip buffer swap.
- Attempt to retain the disconnect/IRQ detection in the DTC436 workaround.
- Move all DTC436 workarounds to final patch.


Finn Thain (2):
  g_NCR5380: Cleanup comments and whitespace
  g_NCR5380: Use unambiguous terminology for PDMA send and receive

Ondrej Zary (4):
  g_NCR5380: Fix PDMA transfer size
  g_NCR5380: End PDMA transfer correctly on target disconnection
  g_NCR5380: Re-work PDMA loops
  g_NCR5380: Various DTC436 workarounds

 drivers/scsi/g_NCR5380.c | 273 ++++++++++++++++++++++++++---------------------
 1 file changed, 151 insertions(+), 122 deletions(-)

-- 
2.13.0

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v6 5/6] g_NCR5380: Re-work PDMA loops
  2017-07-01  2:40 [PATCH v6 0/6] g_NCR5380: PDMA fixes and cleanup Finn Thain
                   ` (2 preceding siblings ...)
  2017-07-01  2:40 ` [PATCH v6 4/6] g_NCR5380: Use unambiguous terminology for PDMA send and receive Finn Thain
@ 2017-07-01  2:40 ` Finn Thain
  2017-07-01  2:40 ` [PATCH v6 2/6] g_NCR5380: End PDMA transfer correctly on target disconnection Finn Thain
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 11+ messages in thread
From: Finn Thain @ 2017-07-01  2:40 UTC (permalink / raw)
  To: James E.J. Bottomley, Martin K. Petersen, Ondrej Zary
  Cc: linux-scsi, linux-kernel, Michael Schmitz

From: Ondrej Zary <linux@rainbow-software.org>

The polling loops in pread() and pwrite() can easily become infinite
loops and hang the machine.

Merge the IRQ check into host buffer wait loop and add polling limit.

Also place a limit on polling for 53C80 registers accessibility.

[Use NCR5380_poll_politely2() for register polling. Rely on polling for
gated IRQ rather than polling for phase error, like the algorithm in the
53c400 datasheet. Move DTC436 workarounds into a separate patch.
Factor-out common code as wait_for_53c80_access(). Rework the residual
correction. -- F.T.]

Signed-off-by: Ondrej Zary <linux@rainbow-software.org>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
---
 drivers/scsi/g_NCR5380.c | 173 +++++++++++++++++++++++++----------------------
 1 file changed, 92 insertions(+), 81 deletions(-)

diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index 33e1a480c903..137ec58c43ac 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -480,6 +480,28 @@ static void generic_NCR5380_release_resources(struct Scsi_Host *instance)
 		release_mem_region(base, region_size);
 }
 
+/* wait_for_53c80_access - wait for 53C80 registers to become accessible
+ * @hostdata: scsi host private data
+ *
+ * The registers within the 53C80 logic block are inaccessible until
+ * bit 7 in the 53C400 control status register gets asserted.
+ */
+
+static void wait_for_53c80_access(struct NCR5380_hostdata *hostdata)
+{
+	int count = 10000;
+
+	do {
+		if (NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG)
+			return;
+	} while (--count > 0);
+
+	scmd_printk(KERN_ERR, hostdata->connected,
+	            "53c80 registers not accessible, device will be reset\n");
+	NCR5380_write(hostdata->c400_ctl_status, CSR_RESET);
+	NCR5380_write(hostdata->c400_ctl_status, CSR_BASE);
+}
+
 /**
  * generic_NCR5380_precv - pseudo DMA receive
  * @hostdata: scsi host private data
@@ -492,18 +514,23 @@ static void generic_NCR5380_release_resources(struct Scsi_Host *instance)
 static inline int generic_NCR5380_precv(struct NCR5380_hostdata *hostdata,
                                         unsigned char *dst, int len)
 {
-	int blocks = len / 128;
+	int residual;
 	int start = 0;
 
 	NCR5380_write(hostdata->c400_ctl_status, CSR_BASE | CSR_TRANS_DIR);
-	NCR5380_write(hostdata->c400_blk_cnt, blocks);
-	while (1) {
-		if (NCR5380_read(hostdata->c400_blk_cnt) == 0)
+	NCR5380_write(hostdata->c400_blk_cnt, len / 128);
+
+	do {
+		if (NCR5380_poll_politely2(hostdata, hostdata->c400_ctl_status,
+		                           CSR_HOST_BUF_NOT_RDY, 0,
+		                           hostdata->c400_ctl_status,
+		                           CSR_GATED_53C80_IRQ,
+		                           CSR_GATED_53C80_IRQ, HZ / 64) < 0)
+			break;
+
+		if (NCR5380_read(hostdata->c400_ctl_status) &
+		    CSR_HOST_BUF_NOT_RDY)
 			break;
-		if (NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ)
-			goto out_wait;
-		while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
-			; /* FIXME - no timeout */
 
 		if (hostdata->io_port && hostdata->io_width == 2)
 			insw(hostdata->io_port + hostdata->c400_host_buf,
@@ -514,44 +541,26 @@ static inline int generic_NCR5380_precv(struct NCR5380_hostdata *hostdata,
 		else
 			memcpy_fromio(dst + start,
 				hostdata->io + NCR53C400_host_buffer, 128);
-
 		start += 128;
-		blocks--;
-	}
-
-	if (blocks) {
-		while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
-			; /* FIXME - no timeout */
+	} while (start < len);
 
-		if (hostdata->io_port && hostdata->io_width == 2)
-			insw(hostdata->io_port + hostdata->c400_host_buf,
-							dst + start, 64);
-		else if (hostdata->io_port)
-			insb(hostdata->io_port + hostdata->c400_host_buf,
-							dst + start, 128);
-		else
-			memcpy_fromio(dst + start,
-				hostdata->io + NCR53C400_host_buffer, 128);
+	residual = len - start;
 
-		start += 128;
-		blocks--;
+	if (residual != 0) {
+		/* 53c80 interrupt or transfer timeout. Reset 53c400 logic. */
+		NCR5380_write(hostdata->c400_ctl_status, CSR_RESET);
+		NCR5380_write(hostdata->c400_ctl_status, CSR_BASE);
 	}
+	wait_for_53c80_access(hostdata);
 
-	if (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ))
-		printk("53C400r: no 53C80 gated irq after transfer");
-
-out_wait:
-	hostdata->pdma_residual = len - start;
-
-	/* wait for 53C80 registers to be available */
-	while (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG))
-		;
-
-	if (NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG,
-	                          BASR_END_DMA_TRANSFER, BASR_END_DMA_TRANSFER,
-	                          HZ / 64) < 0)
+	if (residual == 0 && NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG,
+	                                           BASR_END_DMA_TRANSFER,
+	                                           BASR_END_DMA_TRANSFER,
+	                                           HZ / 64) < 0)
 		scmd_printk(KERN_ERR, hostdata->connected, "%s: End of DMA timeout (%d)\n",
-		            __func__, hostdata->pdma_residual);
+		            __func__, residual);
+
+	hostdata->pdma_residual = residual;
 
 	return 0;
 }
@@ -568,36 +577,33 @@ static inline int generic_NCR5380_precv(struct NCR5380_hostdata *hostdata,
 static inline int generic_NCR5380_psend(struct NCR5380_hostdata *hostdata,
                                         unsigned char *src, int len)
 {
-	int blocks = len / 128;
+	int residual;
 	int start = 0;
 
 	NCR5380_write(hostdata->c400_ctl_status, CSR_BASE);
-	NCR5380_write(hostdata->c400_blk_cnt, blocks);
-	while (1) {
-		if (NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ)
-			goto out_wait;
-
-		if (NCR5380_read(hostdata->c400_blk_cnt) == 0)
+	NCR5380_write(hostdata->c400_blk_cnt, len / 128);
+
+	do {
+		if (NCR5380_poll_politely2(hostdata, hostdata->c400_ctl_status,
+		                           CSR_HOST_BUF_NOT_RDY, 0,
+		                           hostdata->c400_ctl_status,
+		                           CSR_GATED_53C80_IRQ,
+		                           CSR_GATED_53C80_IRQ, HZ / 64) < 0)
 			break;
-		while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
-			; // FIXME - timeout
 
-		if (hostdata->io_port && hostdata->io_width == 2)
-			outsw(hostdata->io_port + hostdata->c400_host_buf,
-							src + start, 64);
-		else if (hostdata->io_port)
-			outsb(hostdata->io_port + hostdata->c400_host_buf,
-							src + start, 128);
-		else
-			memcpy_toio(hostdata->io + NCR53C400_host_buffer,
-			            src + start, 128);
+		if (NCR5380_read(hostdata->c400_ctl_status) &
+		    CSR_HOST_BUF_NOT_RDY) {
+			/* The chip has done a 128 B buffer swap but the first
+			 * buffer still has not reached the SCSI bus.
+			 */
+			if (start > 0)
+				start -= 128;
+			break;
+		}
 
-		start += 128;
-		blocks--;
-	}
-	if (blocks) {
-		while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
-			; // FIXME - no timeout
+		if (NCR5380_read(hostdata->c400_ctl_status) &
+		    CSR_GATED_53C80_IRQ)
+			break;
 
 		if (hostdata->io_port && hostdata->io_width == 2)
 			outsw(hostdata->io_port + hostdata->c400_host_buf,
@@ -608,28 +614,33 @@ static inline int generic_NCR5380_psend(struct NCR5380_hostdata *hostdata,
 		else
 			memcpy_toio(hostdata->io + NCR53C400_host_buffer,
 			            src + start, 128);
-
 		start += 128;
-		blocks--;
-	}
+	} while (start < len);
 
-out_wait:
-	hostdata->pdma_residual = len - start;
+	residual = len - start;
 
-	/* wait for 53C80 registers to be available */
-	while (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG)) {
-		udelay(4); /* DTC436 chip hangs without this */
-		/* FIXME - no timeout */
+	if (residual != 0) {
+		/* 53c80 interrupt or transfer timeout. Reset 53c400 logic. */
+		NCR5380_write(hostdata->c400_ctl_status, CSR_RESET);
+		NCR5380_write(hostdata->c400_ctl_status, CSR_BASE);
+	}
+	wait_for_53c80_access(hostdata);
+
+	if (residual == 0) {
+		if (NCR5380_poll_politely(hostdata, TARGET_COMMAND_REG,
+		                          TCR_LAST_BYTE_SENT, TCR_LAST_BYTE_SENT,
+		                          HZ / 64) < 0)
+			scmd_printk(KERN_ERR, hostdata->connected,
+			            "%s: Last Byte Sent timeout\n", __func__);
+
+		if (NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG,
+		                          BASR_END_DMA_TRANSFER, BASR_END_DMA_TRANSFER,
+		                          HZ / 64) < 0)
+			scmd_printk(KERN_ERR, hostdata->connected, "%s: End of DMA timeout (%d)\n",
+			            __func__, residual);
 	}
 
-	while (!(NCR5380_read(TARGET_COMMAND_REG) & TCR_LAST_BYTE_SENT))
-		; 	// TIMEOUT
-
-	if (NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG,
-	                          BASR_END_DMA_TRANSFER, BASR_END_DMA_TRANSFER,
-	                          HZ / 64) < 0)
-		scmd_printk(KERN_ERR, hostdata->connected, "%s: End of DMA timeout (%d)\n",
-		            __func__, hostdata->pdma_residual);
+	hostdata->pdma_residual = residual;
 
 	return 0;
 }
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v6 6/6] g_NCR5380: Various DTC436 workarounds
  2017-07-01  2:40 [PATCH v6 0/6] g_NCR5380: PDMA fixes and cleanup Finn Thain
                   ` (4 preceding siblings ...)
  2017-07-01  2:40 ` [PATCH v6 2/6] g_NCR5380: End PDMA transfer correctly on target disconnection Finn Thain
@ 2017-07-01  2:40 ` Finn Thain
  2017-07-01 21:49 ` [PATCH v6 0/6] g_NCR5380: PDMA fixes and cleanup Ondrej Zary
  6 siblings, 0 replies; 11+ messages in thread
From: Finn Thain @ 2017-07-01  2:40 UTC (permalink / raw)
  To: James E.J. Bottomley, Martin K. Petersen, Ondrej Zary
  Cc: linux-scsi, linux-kernel, Michael Schmitz

From: Ondrej Zary <linux@rainbow-software.org>

Limit PDMA send to 512 B to avoid data corruption on DTC3181E. The
corruption is always the same: one byte missing at the beginning of
a 128 B block. It happens only with slow Quantum LPS 240 drive, not with
faster IBM DORS-32160. It's not clear what causes this. Documentation
for the DTC436 chip has not been made available.

On DTC chips, Gated IRQ (for End of DMA) arrives early, and needs
special handling.

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
---
 drivers/scsi/g_NCR5380.c | 39 +++++++++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index 137ec58c43ac..49312bf98068 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -45,7 +45,8 @@
 	int c400_blk_cnt; \
 	int c400_host_buf; \
 	int io_width; \
-	int pdma_residual
+	int pdma_residual; \
+	int board
 
 #define NCR5380_dma_xfer_len            generic_NCR5380_dma_xfer_len
 #define NCR5380_dma_recv_setup          generic_NCR5380_precv
@@ -316,6 +317,7 @@ static int generic_NCR5380_init_one(struct scsi_host_template *tpnt,
 	}
 	hostdata = shost_priv(instance);
 
+	hostdata->board = board;
 	hostdata->io = iomem;
 	hostdata->region_size = region_size;
 
@@ -492,6 +494,8 @@ static void wait_for_53c80_access(struct NCR5380_hostdata *hostdata)
 	int count = 10000;
 
 	do {
+		if (hostdata->board == BOARD_DTC3181E)
+			udelay(4); /* DTC436 chip hangs without this */
 		if (NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG)
 			return;
 	} while (--count > 0);
@@ -521,16 +525,22 @@ static inline int generic_NCR5380_precv(struct NCR5380_hostdata *hostdata,
 	NCR5380_write(hostdata->c400_blk_cnt, len / 128);
 
 	do {
-		if (NCR5380_poll_politely2(hostdata, hostdata->c400_ctl_status,
-		                           CSR_HOST_BUF_NOT_RDY, 0,
-		                           hostdata->c400_ctl_status,
-		                           CSR_GATED_53C80_IRQ,
-		                           CSR_GATED_53C80_IRQ, HZ / 64) < 0)
-			break;
-
-		if (NCR5380_read(hostdata->c400_ctl_status) &
-		    CSR_HOST_BUF_NOT_RDY)
-			break;
+		if (hostdata->board == BOARD_DTC3181E && start == len - 128) {
+			/* Ignore early CSR_GATED_53C80_IRQ */
+			if (NCR5380_poll_politely(hostdata, hostdata->c400_ctl_status,
+			                          CSR_HOST_BUF_NOT_RDY, 0, HZ / 64) < 0)
+				break;
+		} else {
+			if (NCR5380_poll_politely2(hostdata, hostdata->c400_ctl_status,
+			                           CSR_HOST_BUF_NOT_RDY, 0,
+			                           hostdata->c400_ctl_status,
+			                           CSR_GATED_53C80_IRQ,
+			                           CSR_GATED_53C80_IRQ, HZ / 64) < 0)
+				break;
+			if (NCR5380_read(hostdata->c400_ctl_status) &
+			    CSR_HOST_BUF_NOT_RDY)
+				break;
+		}
 
 		if (hostdata->io_port && hostdata->io_width == 2)
 			insw(hostdata->io_port + hostdata->c400_host_buf,
@@ -655,7 +665,12 @@ static int generic_NCR5380_dma_xfer_len(struct NCR5380_hostdata *hostdata,
 
 	/* 53C400 datasheet: non-modulo-128-byte transfers should use PIO */
 	if (transfersize % 128)
-		transfersize = 0;
+		return 0;
+
+	/* Limit PDMA send to 512 B to avoid random corruption on DTC3181E */
+	if (hostdata->board == BOARD_DTC3181E &&
+	    cmd->sc_data_direction == DMA_TO_DEVICE)
+		transfersize = min(cmd->SCp.this_residual, 512);
 
 	return min(transfersize, DMA_MAX_SIZE);
 }
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v6 2/6] g_NCR5380: End PDMA transfer correctly on target disconnection
  2017-07-01  2:40 [PATCH v6 0/6] g_NCR5380: PDMA fixes and cleanup Finn Thain
                   ` (3 preceding siblings ...)
  2017-07-01  2:40 ` [PATCH v6 5/6] g_NCR5380: Re-work PDMA loops Finn Thain
@ 2017-07-01  2:40 ` Finn Thain
  2017-07-01  2:40 ` [PATCH v6 6/6] g_NCR5380: Various DTC436 workarounds Finn Thain
  2017-07-01 21:49 ` [PATCH v6 0/6] g_NCR5380: PDMA fixes and cleanup Ondrej Zary
  6 siblings, 0 replies; 11+ messages in thread
From: Finn Thain @ 2017-07-01  2:40 UTC (permalink / raw)
  To: James E.J. Bottomley, Martin K. Petersen, Ondrej Zary
  Cc: linux-scsi, linux-kernel, Michael Schmitz

From: Ondrej Zary <linux@rainbow-software.org>

When an IRQ arrives during PDMA transfer, pread() and pwrite() return
without waiting for the 53C80 registers to be ready and this ends up
messing up the chip state. This was observed with SONY CDU-55S which is
slow enough to disconnect during 4096-byte reads.

IRQ during PDMA is not an error so don't return -1. Instead, store the
remaining byte count for use by NCR5380_dma_residual().

[Poll for the BASR_END_DMA_TRANSFER condition rather than remove the
error message -- F.T.]

Signed-off-by: Ondrej Zary <linux@rainbow-software.org>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
---
 drivers/scsi/g_NCR5380.c | 48 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 31 insertions(+), 17 deletions(-)

diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index 14ef4e8c4713..911a4300ea51 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -44,12 +44,13 @@
 	int c400_ctl_status; \
 	int c400_blk_cnt; \
 	int c400_host_buf; \
-	int io_width
+	int io_width; \
+	int pdma_residual
 
 #define NCR5380_dma_xfer_len            generic_NCR5380_dma_xfer_len
 #define NCR5380_dma_recv_setup          generic_NCR5380_pread
 #define NCR5380_dma_send_setup          generic_NCR5380_pwrite
-#define NCR5380_dma_residual            NCR5380_dma_residual_none
+#define NCR5380_dma_residual            generic_NCR5380_dma_residual
 
 #define NCR5380_intr                    generic_NCR5380_intr
 #define NCR5380_queue_command           generic_NCR5380_queue_command
@@ -500,10 +501,8 @@ static inline int generic_NCR5380_pread(struct NCR5380_hostdata *hostdata,
 	while (1) {
 		if (NCR5380_read(hostdata->c400_blk_cnt) == 0)
 			break;
-		if (NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ) {
-			printk(KERN_ERR "53C400r: Got 53C80_IRQ start=%d, blocks=%d\n", start, blocks);
-			return -1;
-		}
+		if (NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ)
+			goto out_wait;
 		while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
 			; /* FIXME - no timeout */
 
@@ -542,13 +541,19 @@ static inline int generic_NCR5380_pread(struct NCR5380_hostdata *hostdata,
 	if (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ))
 		printk("53C400r: no 53C80 gated irq after transfer");
 
+out_wait:
+	hostdata->pdma_residual = len - start;
+
 	/* wait for 53C80 registers to be available */
 	while (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG))
 		;
 
-	if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_END_DMA_TRANSFER))
-		printk(KERN_ERR "53C400r: no end dma signal\n");
-		
+	if (NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG,
+	                          BASR_END_DMA_TRANSFER, BASR_END_DMA_TRANSFER,
+	                          HZ / 64) < 0)
+		scmd_printk(KERN_ERR, hostdata->connected, "%s: End of DMA timeout (%d)\n",
+		            __func__, hostdata->pdma_residual);
+
 	return 0;
 }
 
@@ -571,10 +576,8 @@ static inline int generic_NCR5380_pwrite(struct NCR5380_hostdata *hostdata,
 	NCR5380_write(hostdata->c400_ctl_status, CSR_BASE);
 	NCR5380_write(hostdata->c400_blk_cnt, blocks);
 	while (1) {
-		if (NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ) {
-			printk(KERN_ERR "53C400w: Got 53C80_IRQ start=%d, blocks=%d\n", start, blocks);
-			return -1;
-		}
+		if (NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ)
+			goto out_wait;
 
 		if (NCR5380_read(hostdata->c400_blk_cnt) == 0)
 			break;
@@ -612,18 +615,24 @@ static inline int generic_NCR5380_pwrite(struct NCR5380_hostdata *hostdata,
 		blocks--;
 	}
 
+out_wait:
+	hostdata->pdma_residual = len - start;
+
 	/* wait for 53C80 registers to be available */
 	while (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG)) {
 		udelay(4); /* DTC436 chip hangs without this */
 		/* FIXME - no timeout */
 	}
 
-	if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_END_DMA_TRANSFER)) {
-		printk(KERN_ERR "53C400w: no end dma signal\n");
-	}
-
 	while (!(NCR5380_read(TARGET_COMMAND_REG) & TCR_LAST_BYTE_SENT))
 		; 	// TIMEOUT
+
+	if (NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG,
+	                          BASR_END_DMA_TRANSFER, BASR_END_DMA_TRANSFER,
+	                          HZ / 64) < 0)
+		scmd_printk(KERN_ERR, hostdata->connected, "%s: End of DMA timeout (%d)\n",
+		            __func__, hostdata->pdma_residual);
+
 	return 0;
 }
 
@@ -642,6 +651,11 @@ static int generic_NCR5380_dma_xfer_len(struct NCR5380_hostdata *hostdata,
 	return min(transfersize, DMA_MAX_SIZE);
 }
 
+static int generic_NCR5380_dma_residual(struct NCR5380_hostdata *hostdata)
+{
+	return hostdata->pdma_residual;
+}
+
 /*
  *	Include the NCR5380 core code that we build our driver around	
  */
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v6 0/6] g_NCR5380: PDMA fixes and cleanup
  2017-07-01  2:40 [PATCH v6 0/6] g_NCR5380: PDMA fixes and cleanup Finn Thain
                   ` (5 preceding siblings ...)
  2017-07-01  2:40 ` [PATCH v6 6/6] g_NCR5380: Various DTC436 workarounds Finn Thain
@ 2017-07-01 21:49 ` Ondrej Zary
  2017-07-02  3:11   ` Finn Thain
  6 siblings, 1 reply; 11+ messages in thread
From: Ondrej Zary @ 2017-07-01 21:49 UTC (permalink / raw)
  To: Finn Thain
  Cc: James E.J. Bottomley, Martin K. Petersen, linux-scsi,
	linux-kernel, Michael Schmitz

On Saturday 01 July 2017 04:40:53 Finn Thain wrote:
> Ondrej, would you please test this new series?
>
> Changed since v1:
> - PDMA transfer residual is calculated earlier.
> - End of DMA flag check is now polled (if there is any residual).
>
> Changed since v2:
> - Bail out of transfer loops when Gated IRQ gets asserted.
> - Make udelay conditional on board type.
> - Drop sg_tablesize patch due to performance regression.
>
> Changed since v3:
> - Add Ondrej's workaround for corrupt WRITE commands on DTC boards.
> - Reset the 53c400 logic after any short PDMA transfer.
> - Don't fail the transfer if the 53c400 logic got a reset.
>
> Changed since v4:
> - Bail out of transfer loops when Gated IRQ gets asserted. (Again.)
> - Always call wait_for_53c80_registers() at end of transfer.
> - Drain chip buffers after PDMA receive is interrupted.
> - Rework residual calculation.
> - Add new patch to correct DMA terminology.
>
> Changed since v5:
> - Rework residual calculation to account for on-chip buffer swap.
> - Attempt to retain the disconnect/IRQ detection in the DTC436 workaround.
> - Move all DTC436 workarounds to final patch.
>
>
> Finn Thain (2):
>   g_NCR5380: Cleanup comments and whitespace
>   g_NCR5380: Use unambiguous terminology for PDMA send and receive
>
> Ondrej Zary (4):
>   g_NCR5380: Fix PDMA transfer size
>   g_NCR5380: End PDMA transfer correctly on target disconnection
>   g_NCR5380: Re-work PDMA loops
>   g_NCR5380: Various DTC436 workarounds
>
>  drivers/scsi/g_NCR5380.c | 273
> ++++++++++++++++++++++++++--------------------- 1 file changed, 151
> insertions(+), 122 deletions(-)

The write corruption is still present - "start" must be rolled back in both
IRQ and timeout cases. And 128 B is not enough , 256 is OK (why did it work
last time?). We just wrote a buffer to the chip but the chip is writing the
previous one to the drive - so if a problem arises, both buffers are lost.

This fixes the corruption (although the "start > 0" check seems wrong now):
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -598,23 +598,17 @@ static inline int generic_NCR5380_psend(struct NCR5380_hostdata *hostdata,
 		                           CSR_HOST_BUF_NOT_RDY, 0,
 		                           hostdata->c400_ctl_status,
 		                           CSR_GATED_53C80_IRQ,
-		                           CSR_GATED_53C80_IRQ, HZ / 64) < 0)
-                       break;
-
-		if (NCR5380_read(hostdata->c400_ctl_status) &
-		    CSR_HOST_BUF_NOT_RDY) {
+		                           CSR_GATED_53C80_IRQ, HZ / 64) < 0 ||
+		    (NCR5380_read(hostdata->c400_ctl_status) &
+		     (CSR_HOST_BUF_NOT_RDY | CSR_GATED_53C80_IRQ))) {
 			/* The chip has done a 128 B buffer swap but the first
 			 * buffer still has not reached the SCSI bus.
 			 */
 			if (start > 0)
-				start -= 128;
+				start -= 256;
 			break;
 		}
 
-		if (NCR5380_read(hostdata->c400_ctl_status) &
-		    CSR_GATED_53C80_IRQ)
-			break;
-
 		if (hostdata->io_port && hostdata->io_width == 2)
 			outsw(hostdata->io_port + hostdata->c400_host_buf,
 			      src + start, 64);


DTC seems to work too.

-- 
Ondrej Zary

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v6 0/6] g_NCR5380: PDMA fixes and cleanup
  2017-07-01 21:49 ` [PATCH v6 0/6] g_NCR5380: PDMA fixes and cleanup Ondrej Zary
@ 2017-07-02  3:11   ` Finn Thain
  2017-07-02 14:51     ` Ondrej Zary
  0 siblings, 1 reply; 11+ messages in thread
From: Finn Thain @ 2017-07-02  3:11 UTC (permalink / raw)
  To: Ondrej Zary
  Cc: James E.J. Bottomley, Martin K. Petersen, linux-scsi,
	linux-kernel, Michael Schmitz

On Sat, 1 Jul 2017, Ondrej Zary wrote:

> 
> The write corruption is still present - "start" must be rolled back in 
> both IRQ and timeout cases.

Your original algorithm aborts the transfer for a timeout. Same with mine.

The bug must be a elsewhere. 

> And 128 B is not enough , 256 is OK (why did it work last time?).

When I get contradictory results it usually means I booted the wrong build 
or built the wrong branch.

Actually, I think that adding 128 to the residual is correct in some 
sitations, and 256 is correct in other situations.

> We just wrote a buffer to the chip but the chip is writing the previous 
> one to the drive - so if a problem arises, both buffers are lost.
> 

I see. I guess we have to take buffer swaps into account.

> This fixes the corruption (although the "start > 0" check seems wrong now):
> --- a/drivers/scsi/g_NCR5380.c
> +++ b/drivers/scsi/g_NCR5380.c
> @@ -598,23 +598,17 @@ static inline int generic_NCR5380_psend(struct NCR5380_hostdata *hostdata,
>  		                           CSR_HOST_BUF_NOT_RDY, 0,
>  		                           hostdata->c400_ctl_status,
>  		                           CSR_GATED_53C80_IRQ,
> -		                           CSR_GATED_53C80_IRQ, HZ / 64) < 0)
> -                       break;
> -
> -		if (NCR5380_read(hostdata->c400_ctl_status) &
> -		    CSR_HOST_BUF_NOT_RDY) {
> +		                           CSR_GATED_53C80_IRQ, HZ / 64) < 0 ||
> +		    (NCR5380_read(hostdata->c400_ctl_status) &
> +		     (CSR_HOST_BUF_NOT_RDY | CSR_GATED_53C80_IRQ))) {

You could add a printk to the timeout branch. If it executes, something is 
seriously wrong. E.g.

-	break;
+	{ pr_err("send timeout %02x, %d/%d\n", NCR5380_read(hostdata->c400_ctl_status), start, len); break; }

>  			/* The chip has done a 128 B buffer swap but the first
>  			 * buffer still has not reached the SCSI bus.
>  			 */
>  			if (start > 0)
> -				start -= 128;
> +				start -= 256;
>  			break;
>  		}

BTW, that change carries the risk of 'start' going negative and the 
residual exceeding the length of the original transfer.

But I agree with you that there's a problem with the residual.

If I understand correctly, the 53c400 can't do a buffer swap until the 
disk acknowledges each of the 128 bytes from the buffer. But I guess the 
first buffer is special because the disk will not see the first byte of 
the transfer until after the first buffer swap.

And it appears that the last buffer is also special: we have to wait for 
CSR_HOST_BUF_NOT_RDY even after start == len otherwise we may not detect a 
failure and fix the residual. So I think the datasheet is right; we have 
to iterate until the block counter goes to zero.

I think it is safe to say that when CSR_HOST_BUF_NOT_RDY, 'start' is 
between 128 and 256 B ahead of the disk. Otherwise, the host buffer is 
empty and 'start' is no more than 128 B ahead of the disk.

>  
> -		if (NCR5380_read(hostdata->c400_ctl_status) &
> -		    CSR_GATED_53C80_IRQ)
> -			break;
> -
>  		if (hostdata->io_port && hostdata->io_width == 2)
>  			outsw(hostdata->io_port + hostdata->c400_host_buf,
>  			      src + start, 64);
> 
> 
> DTC seems to work too.
> 

OK. Thanks for testing. Please try the patch below on top of v6.

diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index 49312bf98068..74bbfaf0f397 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -525,8 +525,8 @@ static inline int generic_NCR5380_precv(struct NCR5380_hostdata *hostdata,
 	NCR5380_write(hostdata->c400_blk_cnt, len / 128);
 
 	do {
-		if (hostdata->board == BOARD_DTC3181E && start == len - 128) {
-			/* Ignore early CSR_GATED_53C80_IRQ */
+		if (start == len - 128) {
+			/* Ignore End of DMA interrupt for the last buffer */
 			if (NCR5380_poll_politely(hostdata, hostdata->c400_ctl_status,
 			                          CSR_HOST_BUF_NOT_RDY, 0, HZ / 64) < 0)
 				break;
@@ -603,17 +603,27 @@ static inline int generic_NCR5380_psend(struct NCR5380_hostdata *hostdata,
 
 		if (NCR5380_read(hostdata->c400_ctl_status) &
 		    CSR_HOST_BUF_NOT_RDY) {
-			/* The chip has done a 128 B buffer swap but the first
-			 * buffer still has not reached the SCSI bus.
-			 */
-			if (start > 0)
+			/* Both 128 B buffers are in use */
+			if (start >= 128)
+				start -= 128;
+			if (start >= 128)
 				start -= 128;
 			break;
 		}
 
+		if (start >= len && NCR5380_read(hostdata->c400_blk_cnt) == 0)
+			break;
+
 		if (NCR5380_read(hostdata->c400_ctl_status) &
-		    CSR_GATED_53C80_IRQ)
+		    CSR_GATED_53C80_IRQ) {
+			/* Host buffer is empty, other one is in use */
+			if (start >= 128)
+				start -= 128;
 			break;
+		}
+
+		if (start >= len)
+			continue;
 
 		if (hostdata->io_port && hostdata->io_width == 2)
 			outsw(hostdata->io_port + hostdata->c400_host_buf,
@@ -625,7 +635,7 @@ static inline int generic_NCR5380_psend(struct NCR5380_hostdata *hostdata,
 			memcpy_toio(hostdata->io + NCR53C400_host_buffer,
 			            src + start, 128);
 		start += 128;
-	} while (start < len);
+	} while (1);
 
 	residual = len - start;
 

-- 

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v6 0/6] g_NCR5380: PDMA fixes and cleanup
  2017-07-02  3:11   ` Finn Thain
@ 2017-07-02 14:51     ` Ondrej Zary
  2017-07-03  8:01       ` Finn Thain
  0 siblings, 1 reply; 11+ messages in thread
From: Ondrej Zary @ 2017-07-02 14:51 UTC (permalink / raw)
  To: Finn Thain
  Cc: James E.J. Bottomley, Martin K. Petersen, linux-scsi,
	linux-kernel, Michael Schmitz

On Sunday 02 July 2017 05:11:27 Finn Thain wrote:
> On Sat, 1 Jul 2017, Ondrej Zary wrote:
> > The write corruption is still present - "start" must be rolled back in
> > both IRQ and timeout cases.
>
> Your original algorithm aborts the transfer for a timeout. Same with mine.

I do "start -= 2 * 128" even after timeout.

> The bug must be a elsewhere.
>
> > And 128 B is not enough , 256 is OK (why did it work last time?).
>
> When I get contradictory results it usually means I booted the wrong build
> or built the wrong branch.

I've just retested PATCHv5, it really misses 128 bytes and works if I
add "residual += 128;".

> Actually, I think that adding 128 to the residual is correct in some
> sitations, and 256 is correct in other situations.
>
> > We just wrote a buffer to the chip but the chip is writing the previous
> > one to the drive - so if a problem arises, both buffers are lost.
>
> I see. I guess we have to take buffer swaps into account.
>
> > This fixes the corruption (although the "start > 0" check seems wrong
> > now): --- a/drivers/scsi/g_NCR5380.c
> > +++ b/drivers/scsi/g_NCR5380.c
> > @@ -598,23 +598,17 @@ static inline int generic_NCR5380_psend(struct
> > NCR5380_hostdata *hostdata, CSR_HOST_BUF_NOT_RDY, 0,
> >  		                           hostdata->c400_ctl_status,
> >  		                           CSR_GATED_53C80_IRQ,
> > -		                           CSR_GATED_53C80_IRQ, HZ / 64) < 0)
> > -                       break;
> > -
> > -		if (NCR5380_read(hostdata->c400_ctl_status) &
> > -		    CSR_HOST_BUF_NOT_RDY) {
> > +		                           CSR_GATED_53C80_IRQ, HZ / 64) < 0 ||
> > +		    (NCR5380_read(hostdata->c400_ctl_status) &
> > +		     (CSR_HOST_BUF_NOT_RDY | CSR_GATED_53C80_IRQ))) {
>
> You could add a printk to the timeout branch. If it executes, something is
> seriously wrong. E.g.
>
> -	break;
> +	{ pr_err("send timeout %02x, %d/%d\n",
> NCR5380_read(hostdata->c400_ctl_status), start, len); break; }

Yes, timeouts do happen:
[ 9671.909223] send timeout 14, 3840/4096
[ 9672.978079] send timeout 14, 2816/4096
[ 9675.323751] send timeout 14, 1280/4096

> >  			/* The chip has done a 128 B buffer swap but the first
> >  			 * buffer still has not reached the SCSI bus.
> >  			 */
> >  			if (start > 0)
> > -				start -= 128;
> > +				start -= 256;
> >  			break;
> >  		}
>
> BTW, that change carries the risk of 'start' going negative and the
> residual exceeding the length of the original transfer.
>
> But I agree with you that there's a problem with the residual.
>
> If I understand correctly, the 53c400 can't do a buffer swap until the
> disk acknowledges each of the 128 bytes from the buffer. But I guess the
> first buffer is special because the disk will not see the first byte of
> the transfer until after the first buffer swap.
>
> And it appears that the last buffer is also special: we have to wait for
> CSR_HOST_BUF_NOT_RDY even after start == len otherwise we may not detect a
> failure and fix the residual. So I think the datasheet is right; we have
> to iterate until the block counter goes to zero.
>
> I think it is safe to say that when CSR_HOST_BUF_NOT_RDY, 'start' is
> between 128 and 256 B ahead of the disk. Otherwise, the host buffer is
> empty and 'start' is no more than 128 B ahead of the disk.
>
> > -		if (NCR5380_read(hostdata->c400_ctl_status) &
> > -		    CSR_GATED_53C80_IRQ)
> > -			break;
> > -
> >  		if (hostdata->io_port && hostdata->io_width == 2)
> >  			outsw(hostdata->io_port + hostdata->c400_host_buf,
> >  			      src + start, 64);
> >
> >
> > DTC seems to work too.
>
> OK. Thanks for testing. Please try the patch below on top of v6.

It misses 256B blocks. It's caused by the timeouts, this patch fixes it:

--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -598,11 +598,9 @@ static inline int generic_NCR5380_psend(struct NCR5380_hostdata *hostdata,
 		                           CSR_HOST_BUF_NOT_RDY, 0,
 		                           hostdata->c400_ctl_status,
 		                           CSR_GATED_53C80_IRQ,
-		                           CSR_GATED_53C80_IRQ, HZ / 64) < 0)
-			break;
-
-		if (NCR5380_read(hostdata->c400_ctl_status) &
-		    CSR_HOST_BUF_NOT_RDY) {
+		                           CSR_GATED_53C80_IRQ, HZ / 64) < 0 ||
+		    (NCR5380_read(hostdata->c400_ctl_status) &
+		     CSR_HOST_BUF_NOT_RDY)) {
 			/* Both 128 B buffers are in use */
 			if (start >= 128)
 				start -= 128;


-- 
Ondrej Zary

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v6 0/6] g_NCR5380: PDMA fixes and cleanup
  2017-07-02 14:51     ` Ondrej Zary
@ 2017-07-03  8:01       ` Finn Thain
  0 siblings, 0 replies; 11+ messages in thread
From: Finn Thain @ 2017-07-03  8:01 UTC (permalink / raw)
  To: Ondrej Zary
  Cc: James E.J. Bottomley, Martin K. Petersen, linux-scsi,
	linux-kernel, Michael Schmitz

On Sun, 2 Jul 2017, Ondrej Zary wrote:

> On Sunday 02 July 2017 05:11:27 Finn Thain wrote:
> > On Sat, 1 Jul 2017, Ondrej Zary wrote:
> > > The write corruption is still present - "start" must be rolled back 
> > > in both IRQ and timeout cases.
> >
> > Your original algorithm aborts the transfer for a timeout. Same with 
> > mine.
> 
> I do "start -= 2 * 128" even after timeout.
> 

Right. My mistake.

> > The bug must be a elsewhere.
> >
> > > And 128 B is not enough , 256 is OK (why did it work last time?).
> >
> > When I get contradictory results it usually means I booted the wrong 
> > build or built the wrong branch.
> 
> I've just retested PATCHv5, it really misses 128 bytes and works if I 
> add "residual += 128;".
> 
> > Actually, I think that adding 128 to the residual is correct in some 
> > sitations, and 256 is correct in other situations.
> >
> > > We just wrote a buffer to the chip but the chip is writing the 
> > > previous one to the drive - so if a problem arises, both buffers are 
> > > lost.
> >
> > I see. I guess we have to take buffer swaps into account.
> >
> > > This fixes the corruption (although the "start > 0" check seems wrong
> > > now):
> > > --- a/drivers/scsi/g_NCR5380.c
> > > +++ b/drivers/scsi/g_NCR5380.c
> > > @@ -598,23 +598,17 @@ static inline int generic_NCR5380_psend(struct
> > > NCR5380_hostdata *hostdata, CSR_HOST_BUF_NOT_RDY, 0,
> > >  		                           hostdata->c400_ctl_status,
> > >  		                           CSR_GATED_53C80_IRQ,
> > > -		                           CSR_GATED_53C80_IRQ, HZ / 64) < 0)
> > > -                       break;
> > > -
> > > -		if (NCR5380_read(hostdata->c400_ctl_status) &
> > > -		    CSR_HOST_BUF_NOT_RDY) {
> > > +		                           CSR_GATED_53C80_IRQ, HZ / 64) < 0 ||
> > > +		    (NCR5380_read(hostdata->c400_ctl_status) &
> > > +		     (CSR_HOST_BUF_NOT_RDY | CSR_GATED_53C80_IRQ))) {
> >
> > You could add a printk to the timeout branch. If it executes, 
> > something is seriously wrong. E.g.
> >
> > -	break;
> > +	{ pr_err("send timeout %02x, %d/%d\n",
> > NCR5380_read(hostdata->c400_ctl_status), start, len); break; }
> 
> Yes, timeouts do happen:
> [ 9671.909223] send timeout 14, 3840/4096
> [ 9672.978079] send timeout 14, 2816/4096
> [ 9675.323751] send timeout 14, 1280/4096
> 

Interesting that these occur unpredictably at 1.07 and 2.34 seconds apart 
(I'm assuming that these were logged during continuous write activity.)

The 0x14 means CSR_53C80_INTR | CSR_HOST_BUF_NOT_RDY which is normal.

The 'start' values are multiples of 256, not 128, which is curious.

This problem looks like the DTC436 PDMA send problem that required the 512 
byte transfer limit.

Can you make the problem go away by setting sg_tablesize = SG_NONE?

Does this happen with some boards or targets and not others?

The workaround for the DTC436 problem didn't seem to harm performance 
(timeouts will) so we might use the same workaround here. But I am 
concerned that we actually caused this problem by over-estimating the 
residual somehow.

If the residual was too large, the driver would try to send too much data, 
and the scsi target would never drain the scsi buffer, which would lead to 
a CSR_HOST_BUF_NOT_RDY timeout. This may be detectable as data corruption, 
but not necessarily.

Is it possible that a generic_NCR5380_psend() call which terminates due to 
CSR_HOST_BUF_NOT_RDY always follows a generic_NCR5380_psend() call which 
terminated due to CSR_GATED_53C80_IRQ?

Other than that, I've no explanation for the timeout, except maybe device 
erratum.

> > >  			/* The chip has done a 128 B buffer swap but the first
> > >  			 * buffer still has not reached the SCSI bus.
> > >  			 */
> > >  			if (start > 0)
> > > -				start -= 128;
> > > +				start -= 256;
> > >  			break;
> > >  		}
> >
> > BTW, that change carries the risk of 'start' going negative and the 
> > residual exceeding the length of the original transfer.
> >
> > But I agree with you that there's a problem with the residual.
> >
> > If I understand correctly, the 53c400 can't do a buffer swap until the 
> > disk acknowledges each of the 128 bytes from the buffer. But I guess 
> > the first buffer is special because the disk will not see the first 
> > byte of the transfer until after the first buffer swap.
> >
> > And it appears that the last buffer is also special: we have to wait 
> > for CSR_HOST_BUF_NOT_RDY even after start == len otherwise we may not 
> > detect a failure and fix the residual. So I think the datasheet is 
> > right; we have to iterate until the block counter goes to zero.
> >
> > I think it is safe to say that when CSR_HOST_BUF_NOT_RDY, 'start' is 
> > between 128 and 256 B ahead of the disk. Otherwise, the host buffer is 
> > empty and 'start' is no more than 128 B ahead of the disk.
> >
> > > -		if (NCR5380_read(hostdata->c400_ctl_status) &
> > > -		    CSR_GATED_53C80_IRQ)
> > > -			break;
> > > -
> > >  		if (hostdata->io_port && hostdata->io_width == 2)
> > >  			outsw(hostdata->io_port + hostdata->c400_host_buf,
> > >  			      src + start, 64);
> > >
> > >
> > > DTC seems to work too.
> >
> > OK. Thanks for testing. Please try the patch below on top of v6.
> 
> It misses 256B blocks. It's caused by the timeouts, this patch fixes it:
> 
> --- a/drivers/scsi/g_NCR5380.c
> +++ b/drivers/scsi/g_NCR5380.c
> @@ -598,11 +598,9 @@ static inline int generic_NCR5380_psend(struct NCR5380_hostdata *hostdata,
>  		                           CSR_HOST_BUF_NOT_RDY, 0,
>  		                           hostdata->c400_ctl_status,
>  		                           CSR_GATED_53C80_IRQ,
> -		                           CSR_GATED_53C80_IRQ, HZ / 64) < 0)
> -			break;
> -
> -		if (NCR5380_read(hostdata->c400_ctl_status) &
> -		    CSR_HOST_BUF_NOT_RDY) {
> +		                           CSR_GATED_53C80_IRQ, HZ / 64) < 0 ||
> +		    (NCR5380_read(hostdata->c400_ctl_status) &
> +		     CSR_HOST_BUF_NOT_RDY)) {
>  			/* Both 128 B buffers are in use */
>  			if (start >= 128)
>  				start -= 128;
> 

Thanks for this. I've included this in v7 because even if we fix the 
timeouts during normal operation, we still need this change for timeouts 
in abnormal situations.

-- 

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2017-07-03  8:01 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-01  2:40 [PATCH v6 0/6] g_NCR5380: PDMA fixes and cleanup Finn Thain
2017-07-01  2:40 ` [PATCH v6 1/6] g_NCR5380: Fix PDMA transfer size Finn Thain
2017-07-01  2:40 ` [PATCH v6 3/6] g_NCR5380: Cleanup comments and whitespace Finn Thain
2017-07-01  2:40 ` [PATCH v6 4/6] g_NCR5380: Use unambiguous terminology for PDMA send and receive Finn Thain
2017-07-01  2:40 ` [PATCH v6 5/6] g_NCR5380: Re-work PDMA loops Finn Thain
2017-07-01  2:40 ` [PATCH v6 2/6] g_NCR5380: End PDMA transfer correctly on target disconnection Finn Thain
2017-07-01  2:40 ` [PATCH v6 6/6] g_NCR5380: Various DTC436 workarounds Finn Thain
2017-07-01 21:49 ` [PATCH v6 0/6] g_NCR5380: PDMA fixes and cleanup Ondrej Zary
2017-07-02  3:11   ` Finn Thain
2017-07-02 14:51     ` Ondrej Zary
2017-07-03  8:01       ` Finn Thain

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).