All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 4/4] libata: kill NULL qc handling from ->eng_timeout callbacks
  2006-02-01 15:56 [PATCHSET] libata: various fixes related to EH, take #3 Tejun Heo
                   ` (2 preceding siblings ...)
  2006-02-01 15:56 ` [PATCH 1/4] libata: implement ATA_FLAG_IN_EH port flag Tejun Heo
@ 2006-02-01 15:56 ` Tejun Heo
  2006-02-09  6:33   ` Jeff Garzik
  3 siblings, 1 reply; 9+ messages in thread
From: Tejun Heo @ 2006-02-01 15:56 UTC (permalink / raw)
  To: jgarzik, linux-ide, albertcc; +Cc: Tejun Heo

->eng_timeout is not invoked with NULL qc anymore.  Kill NULL qc
handling from all ->eng_timeout callbacks.

Signed-off-by: Tejun Heo <htejun@gmail.com>

---

 drivers/scsi/ahci.c         |   12 +++---------
 drivers/scsi/libata-core.c  |   12 +-----------
 drivers/scsi/sata_mv.c      |    9 ++-------
 drivers/scsi/sata_promise.c |    9 +--------
 drivers/scsi/sata_sil24.c   |    5 -----
 drivers/scsi/sata_sx4.c     |    9 +--------
 6 files changed, 8 insertions(+), 48 deletions(-)

2ad8a077475d5453c7127723b2bf00a4a5083d81
diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c
index c840d5e..85fcb5e 100644
--- a/drivers/scsi/ahci.c
+++ b/drivers/scsi/ahci.c
@@ -676,19 +676,13 @@ static void ahci_eng_timeout(struct ata_
 
 	spin_lock_irqsave(&host_set->lock, flags);
 
+	ahci_restart_port(ap, readl(port_mmio + PORT_IRQ_STAT));
 	qc = ata_qc_from_tag(ap, ap->active_tag);
-	if (!qc) {
-		printk(KERN_ERR "ata%u: BUG: timeout without command\n",
-		       ap->id);
-	} else {
-		ahci_restart_port(ap, readl(port_mmio + PORT_IRQ_STAT));
-		qc->err_mask |= AC_ERR_TIMEOUT;
-	}
+	qc->err_mask |= AC_ERR_TIMEOUT;
 
 	spin_unlock_irqrestore(&host_set->lock, flags);
 
-	if (qc)
-		ata_eh_qc_complete(qc);
+	ata_eh_qc_complete(qc);
 }
 
 static inline int ahci_host_intr(struct ata_port *ap, struct ata_queued_cmd *qc)
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 9a785cf..32ac63c 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -3873,20 +3873,10 @@ static void ata_qc_timeout(struct ata_qu
 
 void ata_eng_timeout(struct ata_port *ap)
 {
-	struct ata_queued_cmd *qc;
-
 	DPRINTK("ENTER\n");
 
-	qc = ata_qc_from_tag(ap, ap->active_tag);
-	if (qc)
-		ata_qc_timeout(qc);
-	else {
-		printk(KERN_ERR "ata%u: BUG: timeout without command\n",
-		       ap->id);
-		goto out;
-	}
+	ata_qc_timeout(ata_qc_from_tag(ap, ap->active_tag));
 
-out:
 	DPRINTK("EXIT\n");
 }
 
diff --git a/drivers/scsi/sata_mv.c b/drivers/scsi/sata_mv.c
index b55dd83..0fe2e0c 100644
--- a/drivers/scsi/sata_mv.c
+++ b/drivers/scsi/sata_mv.c
@@ -2020,13 +2020,8 @@ static void mv_eng_timeout(struct ata_po
 	mv_err_intr(ap);
 	mv_stop_and_reset(ap);
 
-	if (!qc) {
-		printk(KERN_ERR "ata%u: BUG: timeout without command\n",
-		       ap->id);
-	} else {
-		qc->err_mask |= AC_ERR_TIMEOUT;
-		ata_eh_qc_complete(qc);
-	}
+	qc->err_mask |= AC_ERR_TIMEOUT;
+	ata_eh_qc_complete(qc);
 }
 
 /**
diff --git a/drivers/scsi/sata_promise.c b/drivers/scsi/sata_promise.c
index 0950a8e..1221463 100644
--- a/drivers/scsi/sata_promise.c
+++ b/drivers/scsi/sata_promise.c
@@ -431,11 +431,6 @@ static void pdc_eng_timeout(struct ata_p
 	spin_lock_irqsave(&host_set->lock, flags);
 
 	qc = ata_qc_from_tag(ap, ap->active_tag);
-	if (!qc) {
-		printk(KERN_ERR "ata%u: BUG: timeout without command\n",
-		       ap->id);
-		goto out;
-	}
 
 	switch (qc->tf.protocol) {
 	case ATA_PROT_DMA:
@@ -455,10 +450,8 @@ static void pdc_eng_timeout(struct ata_p
 		break;
 	}
 
-out:
 	spin_unlock_irqrestore(&host_set->lock, flags);
-	if (qc)
-		ata_eh_qc_complete(qc);
+	ata_eh_qc_complete(qc);
 	DPRINTK("EXIT\n");
 }
 
diff --git a/drivers/scsi/sata_sil24.c b/drivers/scsi/sata_sil24.c
index 7222fc7..1160fda 100644
--- a/drivers/scsi/sata_sil24.c
+++ b/drivers/scsi/sata_sil24.c
@@ -638,11 +638,6 @@ static void sil24_eng_timeout(struct ata
 	struct ata_queued_cmd *qc;
 
 	qc = ata_qc_from_tag(ap, ap->active_tag);
-	if (!qc) {
-		printk(KERN_ERR "ata%u: BUG: timeout without command\n",
-		       ap->id);
-		return;
-	}
 
 	printk(KERN_ERR "ata%u: command timeout\n", ap->id);
 	qc->err_mask |= AC_ERR_TIMEOUT;
diff --git a/drivers/scsi/sata_sx4.c b/drivers/scsi/sata_sx4.c
index 9f992fb..b4ffa3f 100644
--- a/drivers/scsi/sata_sx4.c
+++ b/drivers/scsi/sata_sx4.c
@@ -866,11 +866,6 @@ static void pdc_eng_timeout(struct ata_p
 	spin_lock_irqsave(&host_set->lock, flags);
 
 	qc = ata_qc_from_tag(ap, ap->active_tag);
-	if (!qc) {
-		printk(KERN_ERR "ata%u: BUG: timeout without command\n",
-		       ap->id);
-		goto out;
-	}
 
 	switch (qc->tf.protocol) {
 	case ATA_PROT_DMA:
@@ -889,10 +884,8 @@ static void pdc_eng_timeout(struct ata_p
 		break;
 	}
 
-out:
 	spin_unlock_irqrestore(&host_set->lock, flags);
-	if (qc)
-		ata_eh_qc_complete(qc);
+	ata_eh_qc_complete(qc);
 	DPRINTK("EXIT\n");
 }
 
-- 
1.1.3



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCHSET] libata: various fixes related to EH, take #3
@ 2006-02-01 15:56 Tejun Heo
  2006-02-01 15:56 ` [PATCH 2/4] libata: EH / pio tasks synchronization Tejun Heo
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Tejun Heo @ 2006-02-01 15:56 UTC (permalink / raw)
  To: jgarzik, linux-ide, albertcc; +Cc: htejun


Hello, Jeff.

This patchset is what's left of the previous take [1].  From the
previous take, patch #9 and #10 were rejected and #11 and #13 were
accepted but failed to apply due to the earlier rejection.  I still
think patch #9 and #10 are correct and gave my rationale in the
original thread [2].

This patchset contains those same patches - #9, #10, #11 and #13, but
they have been reordered to ease integration.  #11 and #13 are now at
the head.

Thanks.

--
tejun

[1] http://marc.theaimsgroup.com/?l=linux-ide&m=113798939526779&w=2
[2] http://marc.theaimsgroup.com/?l=linux-ide&m=113880834210485&w=2


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/4] libata: implement ATA_FLAG_IN_EH port flag
  2006-02-01 15:56 [PATCHSET] libata: various fixes related to EH, take #3 Tejun Heo
  2006-02-01 15:56 ` [PATCH 2/4] libata: EH / pio tasks synchronization Tejun Heo
  2006-02-01 15:56 ` [PATCH 3/4] libata: fix handling of race between timeout and completion Tejun Heo
@ 2006-02-01 15:56 ` Tejun Heo
  2006-02-09  6:18   ` Jeff Garzik
  2006-02-01 15:56 ` [PATCH 4/4] libata: kill NULL qc handling from ->eng_timeout callbacks Tejun Heo
  3 siblings, 1 reply; 9+ messages in thread
From: Tejun Heo @ 2006-02-01 15:56 UTC (permalink / raw)
  To: jgarzik, linux-ide, albertcc; +Cc: Tejun Heo

ATA_FLAG_IN_EH flag is set on entry to EH and cleared on completion.
This patch just sets and clears the flag.  Following patches will
build normal qc execution / EH synchronization aroung this flag.

Signed-off-by: Tejun Heo <htejun@gmail.com>

---

 drivers/scsi/libata-scsi.c |    9 +++++++++
 include/linux/libata.h     |    2 ++
 2 files changed, 11 insertions(+), 0 deletions(-)

402b1d5c510fe7f9ad0457bcedb3a4742e79d649
diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
index 6df8293..0e14259 100644
--- a/drivers/scsi/libata-scsi.c
+++ b/drivers/scsi/libata-scsi.c
@@ -735,6 +735,11 @@ int ata_scsi_error(struct Scsi_Host *hos
 
 	DPRINTK("ENTER\n");
 
+	spin_lock_irqsave(&ap->host_set->lock, flags);
+	assert(!(ap->flags & ATA_FLAG_IN_EH));
+	ap->flags |= ATA_FLAG_IN_EH;
+	spin_unlock_irqrestore(&ap->host_set->lock, flags);
+
 	ap = (struct ata_port *) &host->hostdata[0];
 	ap->ops->eng_timeout(ap);
 
@@ -742,6 +747,10 @@ int ata_scsi_error(struct Scsi_Host *hos
 
 	scsi_eh_flush_done_q(&ap->eh_done_q);
 
+	spin_lock_irqsave(&ap->host_set->lock, flags);
+	ap->flags &= ~ATA_FLAG_IN_EH;
+	spin_unlock_irqrestore(&ap->host_set->lock, flags);
+
 	DPRINTK("EXIT\n");
 	return 0;
 }
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 474cdfa..55176df 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -162,6 +162,8 @@ enum {
 	ATA_FLAG_PIO_LBA48	= (1 << 13), /* Host DMA engine is LBA28 only */
 	ATA_FLAG_IRQ_MASK	= (1 << 14), /* Mask IRQ in PIO xfers */
 
+	ATA_FLAG_IN_EH		= (1 << 15), /* EH in progress */
+
 	ATA_QCFLAG_ACTIVE	= (1 << 1), /* cmd not yet ack'd to scsi lyer */
 	ATA_QCFLAG_SG		= (1 << 3), /* have s/g table? */
 	ATA_QCFLAG_SINGLE	= (1 << 4), /* no s/g, just a single buffer */
-- 
1.1.3



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/4] libata: fix handling of race between timeout and completion
  2006-02-01 15:56 [PATCHSET] libata: various fixes related to EH, take #3 Tejun Heo
  2006-02-01 15:56 ` [PATCH 2/4] libata: EH / pio tasks synchronization Tejun Heo
@ 2006-02-01 15:56 ` Tejun Heo
  2006-02-09  6:33   ` Jeff Garzik
  2006-02-01 15:56 ` [PATCH 1/4] libata: implement ATA_FLAG_IN_EH port flag Tejun Heo
  2006-02-01 15:56 ` [PATCH 4/4] libata: kill NULL qc handling from ->eng_timeout callbacks Tejun Heo
  3 siblings, 1 reply; 9+ messages in thread
From: Tejun Heo @ 2006-02-01 15:56 UTC (permalink / raw)
  To: jgarzik, linux-ide, albertcc; +Cc: Tejun Heo

If a qc completes after SCSI timer expires but before libata EH kicks
in, the qc gets completed but the scsicmd still gets passed to libata
EH resulting in ->eng_timeout invocation with NULL qc.  Currently none
of ->eng_timeout callbacks handles this properly.  This patch makes
ata_scsi_error() bypass ->eng_timeout and handle this rare case.

Signed-off-by: Tejun Heo <htejun@gmail.com>

---

 drivers/scsi/libata-scsi.c |   39 ++++++++++++++++++++++++++++++++++++---
 1 files changed, 36 insertions(+), 3 deletions(-)

22f1716710352b49e5a1598b0c4efdebfa33014b
diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
index 0e14259..9435645 100644
--- a/drivers/scsi/libata-scsi.c
+++ b/drivers/scsi/libata-scsi.c
@@ -731,17 +731,50 @@ int ata_scsi_slave_config(struct scsi_de
 
 int ata_scsi_error(struct Scsi_Host *host)
 {
-	struct ata_port *ap;
+	struct ata_port *ap = (struct ata_port *) &host->hostdata[0];
+	struct ata_queued_cmd *qc;
+	unsigned long flags;
 
 	DPRINTK("ENTER\n");
 
 	spin_lock_irqsave(&ap->host_set->lock, flags);
+	qc = ata_qc_from_tag(ap, ap->active_tag);
 	assert(!(ap->flags & ATA_FLAG_IN_EH));
 	ap->flags |= ATA_FLAG_IN_EH;
 	spin_unlock_irqrestore(&ap->host_set->lock, flags);
 
-	ap = (struct ata_port *) &host->hostdata[0];
-	ap->ops->eng_timeout(ap);
+	if (qc) {
+		ap->ops->eng_timeout(ap);
+	} else {
+		struct scsi_cmnd *scmd;
+		unsigned char *sb;
+
+		/* The scmd had timed out but the corresponding qc
+		 * completed successfully inbetween timer expiration
+		 * and here.  Retry if possible.
+		 *
+		 * It is better to enter eng_timeout and perform EH
+		 * before retrying the command, but this case should
+		 * be _very_ rare and eng_timeout isn't ready for
+		 * NULL-qc case.
+		 */
+		scmd = list_entry(host->eh_cmd_q.next,
+				  struct scsi_cmnd, eh_entry);
+		sb = scmd->sense_buffer;
+
+		/* Timeout, fake parity for now */
+		scmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
+		sb[0] = 0x70;
+		sb[7] = 0x0a;
+		sb[2] = ABORTED_COMMAND;
+		sb[12] = 0x47;
+		sb[13] = 0x00;
+
+		printk(KERN_WARNING "ata%u: interrupt and timer raced for "
+		       "scsicmd %p\n", ap->id, scmd);
+
+		scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
+	}
 
 	assert(host->host_failed == 0 && list_empty(&host->eh_cmd_q));
 
-- 
1.1.3



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/4] libata: EH / pio tasks synchronization
  2006-02-01 15:56 [PATCHSET] libata: various fixes related to EH, take #3 Tejun Heo
@ 2006-02-01 15:56 ` Tejun Heo
  2006-02-01 15:56 ` [PATCH 3/4] libata: fix handling of race between timeout and completion Tejun Heo
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 9+ messages in thread
From: Tejun Heo @ 2006-02-01 15:56 UTC (permalink / raw)
  To: jgarzik, linux-ide, albertcc; +Cc: Tejun Heo

This patch makes sure that pio tasks are flushed before proceeding
with EH.

Signed-off-by: Tejun Heo <htejun@gmail.com>

---

 drivers/scsi/libata-core.c |   56 ++++++++++++++++++++++++++++++++++++++++++--
 include/linux/libata.h     |    3 ++
 2 files changed, 55 insertions(+), 4 deletions(-)

8bf3455f64987bfb33a304113b98085f67e5ef62
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 249e67f..9a785cf 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -1074,19 +1074,66 @@ static unsigned int ata_pio_modes(const 
 static inline void
 ata_queue_packet_task(struct ata_port *ap)
 {
-	queue_work(ata_wq, &ap->packet_task);
+	if (!(ap->flags & ATA_FLAG_FLUSH_PIO_TASK))
+		queue_work(ata_wq, &ap->packet_task);
 }
 
 static inline void
 ata_queue_pio_task(struct ata_port *ap)
 {
-	queue_work(ata_wq, &ap->pio_task);
+	if (!(ap->flags & ATA_FLAG_FLUSH_PIO_TASK))
+		queue_work(ata_wq, &ap->pio_task);
 }
 
 static inline void
 ata_queue_delayed_pio_task(struct ata_port *ap, unsigned long delay)
 {
-	queue_delayed_work(ata_wq, &ap->pio_task, delay);
+	if (!(ap->flags & ATA_FLAG_FLUSH_PIO_TASK))
+		queue_delayed_work(ata_wq, &ap->pio_task, delay);
+}
+
+/**
+ *	ata_flush_pio_tasks - Flush pio_task and packet_task
+ *	@ap: the target ata_port
+ *
+ *	After this function completes, pio_task and packet_task are
+ *	guranteed not to be running or scheduled.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep)
+ */
+
+static void ata_flush_pio_tasks(struct ata_port *ap)
+{
+	int tmp = 0;
+	unsigned long flags;
+
+	DPRINTK("ENTER\n");
+
+	spin_lock_irqsave(&ap->host_set->lock, flags);
+	ap->flags |= ATA_FLAG_FLUSH_PIO_TASK;
+	spin_unlock_irqrestore(&ap->host_set->lock, flags);
+
+	DPRINTK("flush #1\n");
+	flush_workqueue(ata_wq);
+
+	/*
+	 * At this point, if a task is running, it's guaranteed to see
+	 * the FLUSH flag; thus, it will never queue pio tasks again.
+	 * Cancel and flush.
+	 */
+	tmp |= cancel_delayed_work(&ap->pio_task);
+	tmp |= cancel_delayed_work(&ap->packet_task);
+	if (!tmp) {
+		DPRINTK("flush #2\n");
+		flush_workqueue(ata_wq);
+	}
+
+	spin_lock_irqsave(&ap->host_set->lock, flags);
+	ap->flags &= ~ATA_FLAG_FLUSH_PIO_TASK;
+	spin_unlock_irqrestore(&ap->host_set->lock, flags);
+
+	DPRINTK("EXIT\n");
 }
 
 void ata_qc_complete_internal(struct ata_queued_cmd *qc)
@@ -3767,6 +3814,9 @@ static void ata_qc_timeout(struct ata_qu
 
 	DPRINTK("ENTER\n");
 
+	ata_flush_pio_tasks(ap);
+	ap->hsm_task_state = HSM_ST_IDLE;
+
 	spin_lock_irqsave(&host_set->lock, flags);
 
 	switch (qc->tf.protocol) {
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 55176df..f4cd1eb 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -162,7 +162,8 @@ enum {
 	ATA_FLAG_PIO_LBA48	= (1 << 13), /* Host DMA engine is LBA28 only */
 	ATA_FLAG_IRQ_MASK	= (1 << 14), /* Mask IRQ in PIO xfers */
 
-	ATA_FLAG_IN_EH		= (1 << 15), /* EH in progress */
+	ATA_FLAG_FLUSH_PIO_TASK	= (1 << 15), /* Flush PIO task */
+	ATA_FLAG_IN_EH		= (1 << 16), /* EH in progress */
 
 	ATA_QCFLAG_ACTIVE	= (1 << 1), /* cmd not yet ack'd to scsi lyer */
 	ATA_QCFLAG_SG		= (1 << 3), /* have s/g table? */
-- 
1.1.3



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/4] libata: implement ATA_FLAG_IN_EH port flag
  2006-02-01 15:56 ` [PATCH 1/4] libata: implement ATA_FLAG_IN_EH port flag Tejun Heo
@ 2006-02-09  6:18   ` Jeff Garzik
  0 siblings, 0 replies; 9+ messages in thread
From: Jeff Garzik @ 2006-02-09  6:18 UTC (permalink / raw)
  To: Tejun Heo; +Cc: linux-ide, albertcc

Tejun Heo wrote:
> ATA_FLAG_IN_EH flag is set on entry to EH and cleared on completion.
> This patch just sets and clears the flag.  Following patches will
> build normal qc execution / EH synchronization aroung this flag.
> 
> Signed-off-by: Tejun Heo <htejun@gmail.com>

applied 1-2



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/4] libata: fix handling of race between timeout and completion
  2006-02-01 15:56 ` [PATCH 3/4] libata: fix handling of race between timeout and completion Tejun Heo
@ 2006-02-09  6:33   ` Jeff Garzik
  2006-02-09  9:08     ` Tejun
  0 siblings, 1 reply; 9+ messages in thread
From: Jeff Garzik @ 2006-02-09  6:33 UTC (permalink / raw)
  To: Tejun Heo; +Cc: linux-ide, albertcc

Tejun Heo wrote:
> If a qc completes after SCSI timer expires but before libata EH kicks
> in, the qc gets completed but the scsicmd still gets passed to libata
> EH resulting in ->eng_timeout invocation with NULL qc.  Currently none
> of ->eng_timeout callbacks handles this properly.  This patch makes
> ata_scsi_error() bypass ->eng_timeout and handle this rare case.
> 
> Signed-off-by: Tejun Heo <htejun@gmail.com>

OK in general (I acknowledge the problem you point out), but NAK for 
this patch.


> +		scmd = list_entry(host->eh_cmd_q.next,
> +				  struct scsi_cmnd, eh_entry);
> +		sb = scmd->sense_buffer;
> +
> +		/* Timeout, fake parity for now */
> +		scmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
> +		sb[0] = 0x70;
> +		sb[7] = 0x0a;
> +		sb[2] = ABORTED_COMMAND;
> +		sb[12] = 0x47;
> +		sb[13] = 0x00;
> +
> +		printk(KERN_WARNING "ata%u: interrupt and timer raced for "
> +		       "scsicmd %p\n", ap->id, scmd);
> +
> +		scsi_eh_finish_cmd(scmd, &ap->eh_done_q);

OK in general, but I disagree with the handling of the qc==NULL case.

If you hit the "if scsi timer already fired" shortcut in scsi_done(), 
that demonstrates clear intent to complete the scsi command.  Thus, when 
libata EH handling starts, our only task for that scsi command is to 
complete it.

Signalling an aborted command stomps all over the current, valid SCSI 
command results.

As a side note, this area of code is part of the reason why I was 
thinking I wanted ...FLAG_EH_TIMEOUT.  My thought was that libata sets 
that in ->eh_timed_out(). ata_qc_complete() would check that flag, and 
refuse to call __ata_qc_complete() if it was set.  Doing so causes both 
the qc and the scsi command to be completed inside the EH handler.  But 
that's just an off-the-cuff thought...

	Jeff



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 4/4] libata: kill NULL qc handling from ->eng_timeout callbacks
  2006-02-01 15:56 ` [PATCH 4/4] libata: kill NULL qc handling from ->eng_timeout callbacks Tejun Heo
@ 2006-02-09  6:33   ` Jeff Garzik
  0 siblings, 0 replies; 9+ messages in thread
From: Jeff Garzik @ 2006-02-09  6:33 UTC (permalink / raw)
  To: Tejun Heo; +Cc: linux-ide, albertcc

Tejun Heo wrote:
> ->eng_timeout is not invoked with NULL qc anymore.  Kill NULL qc
> handling from all ->eng_timeout callbacks.
> 
> Signed-off-by: Tejun Heo <htejun@gmail.com>
> 
> ---
> 
>  drivers/scsi/ahci.c         |   12 +++---------
>  drivers/scsi/libata-core.c  |   12 +-----------
>  drivers/scsi/sata_mv.c      |    9 ++-------
>  drivers/scsi/sata_promise.c |    9 +--------
>  drivers/scsi/sata_sil24.c   |    5 -----
>  drivers/scsi/sata_sx4.c     |    9 +--------
>  6 files changed, 8 insertions(+), 48 deletions(-)

ACK, but depends on NAK'd #3 of course



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/4] libata: fix handling of race between timeout and completion
  2006-02-09  6:33   ` Jeff Garzik
@ 2006-02-09  9:08     ` Tejun
  0 siblings, 0 replies; 9+ messages in thread
From: Tejun @ 2006-02-09  9:08 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: linux-ide, albertcc

Jeff Garzik wrote:
> Tejun Heo wrote:
> 
>> If a qc completes after SCSI timer expires but before libata EH kicks
>> in, the qc gets completed but the scsicmd still gets passed to libata
>> EH resulting in ->eng_timeout invocation with NULL qc.  Currently none
>> of ->eng_timeout callbacks handles this properly.  This patch makes
>> ata_scsi_error() bypass ->eng_timeout and handle this rare case.
>>
>> Signed-off-by: Tejun Heo <htejun@gmail.com>
> 
> 
> OK in general (I acknowledge the problem you point out), but NAK for 
> this patch.
> 
> 
>> +        scmd = list_entry(host->eh_cmd_q.next,
>> +                  struct scsi_cmnd, eh_entry);
>> +        sb = scmd->sense_buffer;
>> +
>> +        /* Timeout, fake parity for now */
>> +        scmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
>> +        sb[0] = 0x70;
>> +        sb[7] = 0x0a;
>> +        sb[2] = ABORTED_COMMAND;
>> +        sb[12] = 0x47;
>> +        sb[13] = 0x00;
>> +
>> +        printk(KERN_WARNING "ata%u: interrupt and timer raced for "
>> +               "scsicmd %p\n", ap->id, scmd);
>> +
>> +        scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
> 
> 
> OK in general, but I disagree with the handling of the qc==NULL case.
> 
> If you hit the "if scsi timer already fired" shortcut in scsi_done(), 
> that demonstrates clear intent to complete the scsi command.  Thus, when 
> libata EH handling starts, our only task for that scsi command is to 
> complete it.
> 
> Signalling an aborted command stomps all over the current, valid SCSI 
> command results.

Good day, Jeff.

I tried that but the problem is that if scsi timeout expires and then qc 
completes before eh kicks in, we lost some of completion information and 
thus I figured aborting (thus retrying) the commands is the way to go, 
but you're right.  The scsi status and stuff are recorded in scmd and we 
should honor those.  Thanks for pointing out.

> As a side note, this area of code is part of the reason why I was 
> thinking I wanted ...FLAG_EH_TIMEOUT.  My thought was that libata sets 
> that in ->eh_timed_out(). ata_qc_complete() would check that flag, and 
> refuse to call __ata_qc_complete() if it was set.  Doing so causes both 
> the qc and the scsi command to be completed inside the EH handler.  But 
> that's just an off-the-cuff thought...

Hmmm... right.  I'm not very sure about how the synchronization should 
be done, but if it can be done that way, that sounds much better than my 
dangling scmd handling hack.  I'll give it a try.

-- 
tejun

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2006-02-09  9:08 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-02-01 15:56 [PATCHSET] libata: various fixes related to EH, take #3 Tejun Heo
2006-02-01 15:56 ` [PATCH 2/4] libata: EH / pio tasks synchronization Tejun Heo
2006-02-01 15:56 ` [PATCH 3/4] libata: fix handling of race between timeout and completion Tejun Heo
2006-02-09  6:33   ` Jeff Garzik
2006-02-09  9:08     ` Tejun
2006-02-01 15:56 ` [PATCH 1/4] libata: implement ATA_FLAG_IN_EH port flag Tejun Heo
2006-02-09  6:18   ` Jeff Garzik
2006-02-01 15:56 ` [PATCH 4/4] libata: kill NULL qc handling from ->eng_timeout callbacks Tejun Heo
2006-02-09  6:33   ` Jeff Garzik

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.