* [PATCH 1/4] sym53c8xx_2: move PCI EEH handling to host reset
2021-08-19 9:07 [PATCH 0/4] sym53c8xx_2: Fixes for SCSI EH rework Hannes Reinecke
@ 2021-08-19 9:07 ` Hannes Reinecke
2021-08-20 5:13 ` Christoph Hellwig
2021-08-19 9:07 ` [PATCH 2/4] sym53c8xx_2: split off host reset from sym_eh_handler() Hannes Reinecke
` (2 subsequent siblings)
3 siblings, 1 reply; 6+ messages in thread
From: Hannes Reinecke @ 2021-08-19 9:07 UTC (permalink / raw)
To: Martin K. Petersen
Cc: Christoph Hellwig, James Bottomley, Matthew Wilcox, linux-scsi,
Hannes Reinecke
When PCI EEH is active the entire card will be reset; which means
that we _have_ to do the equivalent of a eh_host_reset().
So trying to do it from other EH callbacks is pointless, and we
should delegate it to eh_host_reset().
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
drivers/scsi/sym53c8xx_2/sym_glue.c | 97 ++++++++++++++++++-----------
1 file changed, 60 insertions(+), 37 deletions(-)
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c
index 16b65fc4405c..dafdab5869fa 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.c
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.c
@@ -569,12 +569,11 @@ static void sym53c8xx_timer(struct timer_list *t)
* Generic method for our eh processing.
* The 'op' argument tells what we have to do.
*/
-static int sym_eh_handler(int op, char *opname, struct scsi_cmnd *cmd)
+static int sym_eh_handler(struct Scsi_Host *shost, int op,
+ char *opname, struct scsi_cmnd *cmd)
{
struct sym_ucmd *ucmd = SYM_UCMD_PTR(cmd);
- struct Scsi_Host *shost = cmd->device->host;
struct sym_data *sym_data = shost_priv(shost);
- struct pci_dev *pdev = sym_data->pdev;
struct sym_hcb *np = sym_data->ncb;
SYM_QUEHEAD *qp;
int cmd_queued = 0;
@@ -583,36 +582,6 @@ static int sym_eh_handler(int op, char *opname, struct scsi_cmnd *cmd)
scmd_printk(KERN_WARNING, cmd, "%s operation started\n", opname);
- /* We may be in an error condition because the PCI bus
- * went down. In this case, we need to wait until the
- * PCI bus is reset, the card is reset, and only then
- * proceed with the scsi error recovery. There's no
- * point in hurrying; take a leisurely wait.
- */
-#define WAIT_FOR_PCI_RECOVERY 35
- if (pci_channel_offline(pdev)) {
- int finished_reset = 0;
- init_completion(&eh_done);
- spin_lock_irq(shost->host_lock);
- /* Make sure we didn't race */
- if (pci_channel_offline(pdev)) {
- BUG_ON(sym_data->io_reset);
- sym_data->io_reset = &eh_done;
- } else {
- finished_reset = 1;
- }
- spin_unlock_irq(shost->host_lock);
- if (!finished_reset)
- finished_reset = wait_for_completion_timeout
- (sym_data->io_reset,
- WAIT_FOR_PCI_RECOVERY*HZ);
- spin_lock_irq(shost->host_lock);
- sym_data->io_reset = NULL;
- spin_unlock_irq(shost->host_lock);
- if (!finished_reset)
- return SCSI_FAILED;
- }
-
spin_lock_irq(shost->host_lock);
/* This one is queued in some place -> to wait for completion */
FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) {
@@ -672,22 +641,76 @@ static int sym_eh_handler(int op, char *opname, struct scsi_cmnd *cmd)
*/
static int sym53c8xx_eh_abort_handler(struct scsi_cmnd *cmd)
{
- return sym_eh_handler(SYM_EH_ABORT, "ABORT", cmd);
+ struct Scsi_Host *shost = cmd->device->host;
+ struct sym_data *sym_data = shost_priv(shost);
+ struct pci_dev *pdev = sym_data->pdev;
+
+ if (pci_channel_offline(pdev))
+ return FAILED;
+ return sym_eh_handler(shost, SYM_EH_ABORT, "ABORT", cmd);
}
static int sym53c8xx_eh_device_reset_handler(struct scsi_cmnd *cmd)
{
- return sym_eh_handler(SYM_EH_DEVICE_RESET, "DEVICE RESET", cmd);
+ struct Scsi_Host *shost = cmd->device->host;
+ struct sym_data *sym_data = shost_priv(shost);
+ struct pci_dev *pdev = sym_data->pdev;
+
+ if (pci_channel_offline(pdev))
+ return FAILED;
+ return sym_eh_handler(shost, SYM_EH_DEVICE_RESET, "DEVICE RESET", cmd);
}
static int sym53c8xx_eh_bus_reset_handler(struct scsi_cmnd *cmd)
{
- return sym_eh_handler(SYM_EH_BUS_RESET, "BUS RESET", cmd);
+ struct Scsi_Host *shost = cmd->device->host;
+ struct sym_data *sym_data = shost_priv(shost);
+ struct pci_dev *pdev = sym_data->pdev;
+
+ if (pci_channel_offline(pdev))
+ return FAILED;
+ return sym_eh_handler(shost, SYM_EH_BUS_RESET, "BUS RESET", cmd);
}
static int sym53c8xx_eh_host_reset_handler(struct scsi_cmnd *cmd)
{
- return sym_eh_handler(SYM_EH_HOST_RESET, "HOST RESET", cmd);
+ struct Scsi_Host *shost = cmd->device->host;
+ struct sym_data *sym_data = shost_priv(shost);
+ struct pci_dev *pdev = sym_data->pdev;
+
+ /* We may be in an error condition because the PCI bus
+ * went down. In this case, we need to wait until the
+ * PCI bus is reset, the card is reset, and only then
+ * proceed with the scsi error recovery. There's no
+ * point in hurrying; take a leisurely wait.
+ */
+#define WAIT_FOR_PCI_RECOVERY 35
+ if (pci_channel_offline(pdev)) {
+ struct completion eh_done;
+ int finished_reset = 0;
+
+ init_completion(&eh_done);
+ spin_lock_irq(shost->host_lock);
+ /* Make sure we didn't race */
+ if (pci_channel_offline(pdev)) {
+ BUG_ON(sym_data->io_reset);
+ sym_data->io_reset = &eh_done;
+ } else {
+ finished_reset = 1;
+ }
+ spin_unlock_irq(shost->host_lock);
+ if (!finished_reset)
+ finished_reset = wait_for_completion_timeout
+ (sym_data->io_reset,
+ WAIT_FOR_PCI_RECOVERY*HZ);
+ spin_lock_irq(shost->host_lock);
+ sym_data->io_reset = NULL;
+ spin_unlock_irq(shost->host_lock);
+ if (!finished_reset)
+ return SCSI_FAILED;
+ }
+
+ return sym_eh_handler(shost, SYM_EH_HOST_RESET, "HOST RESET", cmd);
}
/*
--
2.29.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH 1/4] sym53c8xx_2: move PCI EEH handling to host reset
2021-08-19 9:07 ` [PATCH 1/4] sym53c8xx_2: move PCI EEH handling to host reset Hannes Reinecke
@ 2021-08-20 5:13 ` Christoph Hellwig
0 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2021-08-20 5:13 UTC (permalink / raw)
To: Hannes Reinecke
Cc: Martin K. Petersen, Christoph Hellwig, James Bottomley,
Matthew Wilcox, linux-scsi
On Thu, Aug 19, 2021 at 11:07:13AM +0200, Hannes Reinecke wrote:
> When PCI EEH is active the entire card will be reset; which means
> that we _have_ to do the equivalent of a eh_host_reset().
> So trying to do it from other EH callbacks is pointless, and we
> should delegate it to eh_host_reset().
>
> Signed-off-by: Hannes Reinecke <hare@suse.de>
Looks good,
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH 2/4] sym53c8xx_2: split off host reset from sym_eh_handler()
2021-08-19 9:07 [PATCH 0/4] sym53c8xx_2: Fixes for SCSI EH rework Hannes Reinecke
2021-08-19 9:07 ` [PATCH 1/4] sym53c8xx_2: move PCI EEH handling to host reset Hannes Reinecke
@ 2021-08-19 9:07 ` Hannes Reinecke
2021-08-19 9:07 ` [PATCH 3/4] sym53c8xx_2: split off bus " Hannes Reinecke
2021-08-19 9:07 ` [PATCH 4/4] sym53c8xx_2: rework reset handling Hannes Reinecke
3 siblings, 0 replies; 6+ messages in thread
From: Hannes Reinecke @ 2021-08-19 9:07 UTC (permalink / raw)
To: Martin K. Petersen
Cc: Christoph Hellwig, James Bottomley, Matthew Wilcox, linux-scsi,
Hannes Reinecke
For host reset we need to wait for all pending command to complete;
there might be more than one command queued and we'll need to wait
for all of them.
So split off the host reset functionality from sym_eh_handler()
and add a wait loop for all commands.
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
drivers/scsi/sym53c8xx_2/sym_glue.c | 42 ++++++++++++++++++++++++-----
1 file changed, 35 insertions(+), 7 deletions(-)
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c
index dafdab5869fa..86cba915b62d 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.c
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.c
@@ -563,7 +563,27 @@ static void sym53c8xx_timer(struct timer_list *t)
#define SYM_EH_ABORT 0
#define SYM_EH_DEVICE_RESET 1
#define SYM_EH_BUS_RESET 2
-#define SYM_EH_HOST_RESET 3
+
+static int sym_eh_wait_for_commands(struct Scsi_Host *shost)
+{
+ struct sym_data *sym_data = shost_priv(shost);
+ struct sym_hcb *np = sym_data->ncb;
+ int rval = SUCCESS, count = 10; /* 5 seconds */
+
+ /* Wait for any queued commands to complete */
+ spin_lock_irq(shost->host_lock);
+ while (!sym_que_empty(&np->busy_ccbq)) {
+ if (--count == 0) {
+ rval = FAILED;
+ break;
+ }
+ spin_unlock_irq(shost->host_lock);
+ msleep(500);
+ spin_lock_irq(shost->host_lock);
+ }
+ spin_unlock_irq(shost->host_lock);
+ return rval;
+}
/*
* Generic method for our eh processing.
@@ -605,11 +625,6 @@ static int sym_eh_handler(struct Scsi_Host *shost, int op,
sym_reset_scsi_bus(np, 1);
sts = 0;
break;
- case SYM_EH_HOST_RESET:
- sym_reset_scsi_bus(np, 0);
- sym_start_up(shost, 1);
- sts = 0;
- break;
default:
break;
}
@@ -677,6 +692,10 @@ static int sym53c8xx_eh_host_reset_handler(struct scsi_cmnd *cmd)
struct Scsi_Host *shost = cmd->device->host;
struct sym_data *sym_data = shost_priv(shost);
struct pci_dev *pdev = sym_data->pdev;
+ struct sym_hcb *np = sym_data->ncb;
+ int rval = SUCCESS;
+
+ shost_printk(KERN_WARNING, shost, "HOST RESET operation started\n");
/* We may be in an error condition because the PCI bus
* went down. In this case, we need to wait until the
@@ -710,7 +729,16 @@ static int sym53c8xx_eh_host_reset_handler(struct scsi_cmnd *cmd)
return SCSI_FAILED;
}
- return sym_eh_handler(shost, SYM_EH_HOST_RESET, "HOST RESET", cmd);
+ spin_lock_irq(shost->host_lock);
+ sym_reset_scsi_bus(np, 0);
+ sym_start_up(shost, 1);
+ spin_unlock_irq(shost->host_lock);
+
+ rval = sym_eh_wait_for_commands(shost);
+
+ shost_printk(KERN_WARNING, shost, "HOST RESET operation %s.\n",
+ rval == SUCCESS ? "complete" : "failed");
+ return rval;
}
/*
--
2.29.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 3/4] sym53c8xx_2: split off bus reset from sym_eh_handler()
2021-08-19 9:07 [PATCH 0/4] sym53c8xx_2: Fixes for SCSI EH rework Hannes Reinecke
2021-08-19 9:07 ` [PATCH 1/4] sym53c8xx_2: move PCI EEH handling to host reset Hannes Reinecke
2021-08-19 9:07 ` [PATCH 2/4] sym53c8xx_2: split off host reset from sym_eh_handler() Hannes Reinecke
@ 2021-08-19 9:07 ` Hannes Reinecke
2021-08-19 9:07 ` [PATCH 4/4] sym53c8xx_2: rework reset handling Hannes Reinecke
3 siblings, 0 replies; 6+ messages in thread
From: Hannes Reinecke @ 2021-08-19 9:07 UTC (permalink / raw)
To: Martin K. Petersen
Cc: Christoph Hellwig, James Bottomley, Matthew Wilcox, linux-scsi,
Hannes Reinecke
Bus reset is similar to host reset, and we also need to wait for
all commands to complete. So split off bus reset from sym_eh_handler()
and wait for all commands to complete after resetting the SCSI bus.
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
drivers/scsi/sym53c8xx_2/sym_glue.c | 23 +++++++++++++++++------
1 file changed, 17 insertions(+), 6 deletions(-)
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c
index 86cba915b62d..817b2584b4aa 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.c
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.c
@@ -562,7 +562,6 @@ static void sym53c8xx_timer(struct timer_list *t)
*/
#define SYM_EH_ABORT 0
#define SYM_EH_DEVICE_RESET 1
-#define SYM_EH_BUS_RESET 2
static int sym_eh_wait_for_commands(struct Scsi_Host *shost)
{
@@ -621,10 +620,6 @@ static int sym_eh_handler(struct Scsi_Host *shost, int op,
case SYM_EH_DEVICE_RESET:
sts = sym_reset_scsi_target(np, cmd->device->id);
break;
- case SYM_EH_BUS_RESET:
- sym_reset_scsi_bus(np, 1);
- sts = 0;
- break;
default:
break;
}
@@ -681,10 +676,26 @@ static int sym53c8xx_eh_bus_reset_handler(struct scsi_cmnd *cmd)
struct Scsi_Host *shost = cmd->device->host;
struct sym_data *sym_data = shost_priv(shost);
struct pci_dev *pdev = sym_data->pdev;
+ struct sym_hcb *np = sym_data->ncb;
+ int rval = SUCCESS;
if (pci_channel_offline(pdev))
return FAILED;
- return sym_eh_handler(shost, SYM_EH_BUS_RESET, "BUS RESET", cmd);
+
+ shost_printk(KERN_WARNING, shost, "BUS RESET operation started\n");
+
+ spin_lock_irq(shost->host_lock);
+ if (sym_reset_scsi_bus(np, 1))
+ rval = FAILED;
+ spin_unlock_irq(shost->host_lock);
+
+ if (rval == SUCCESS)
+ rval = sym_eh_wait_for_commands(shost);
+
+ shost_printk(KERN_WARNING, shost, "BUS RESET operation %s.\n",
+ rval == SUCCESS ? "complete" : "failed");
+
+ return rval;
}
static int sym53c8xx_eh_host_reset_handler(struct scsi_cmnd *cmd)
--
2.29.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 4/4] sym53c8xx_2: rework reset handling
2021-08-19 9:07 [PATCH 0/4] sym53c8xx_2: Fixes for SCSI EH rework Hannes Reinecke
` (2 preceding siblings ...)
2021-08-19 9:07 ` [PATCH 3/4] sym53c8xx_2: split off bus " Hannes Reinecke
@ 2021-08-19 9:07 ` Hannes Reinecke
3 siblings, 0 replies; 6+ messages in thread
From: Hannes Reinecke @ 2021-08-19 9:07 UTC (permalink / raw)
To: Martin K. Petersen
Cc: Christoph Hellwig, James Bottomley, Matthew Wilcox, linux-scsi,
Hannes Reinecke, Hannes Reinecke
Split off the combined abort and device reset handling into
distinct functions.
And the current device reset handler really is a target reset,
so rename it.
Signed-off-by: Hannes Reinecke <hare@suse.com>
---
drivers/scsi/sym53c8xx_2/sym_glue.c | 86 ++++++++++++++++-------------
1 file changed, 49 insertions(+), 37 deletions(-)
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c
index 817b2584b4aa..8773fbdfd559 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.c
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.c
@@ -585,13 +585,12 @@ static int sym_eh_wait_for_commands(struct Scsi_Host *shost)
}
/*
- * Generic method for our eh processing.
- * The 'op' argument tells what we have to do.
+ * Error handlers called from the eh thread (one thread per HBA).
*/
-static int sym_eh_handler(struct Scsi_Host *shost, int op,
- char *opname, struct scsi_cmnd *cmd)
+static int sym53c8xx_eh_abort_handler(struct scsi_cmnd *cmd)
{
struct sym_ucmd *ucmd = SYM_UCMD_PTR(cmd);
+ struct Scsi_Host *shost = cmd->device->host;
struct sym_data *sym_data = shost_priv(shost);
struct sym_hcb *np = sym_data->ncb;
SYM_QUEHEAD *qp;
@@ -599,7 +598,7 @@ static int sym_eh_handler(struct Scsi_Host *shost, int op,
int sts = -1;
struct completion eh_done;
- scmd_printk(KERN_WARNING, cmd, "%s operation started\n", opname);
+ scmd_printk(KERN_WARNING, cmd, "ABORT operation started\n");
spin_lock_irq(shost->host_lock);
/* This one is queued in some place -> to wait for completion */
@@ -611,19 +610,7 @@ static int sym_eh_handler(struct Scsi_Host *shost, int op,
}
}
- /* Try to proceed the operation we have been asked for */
- sts = -1;
- switch(op) {
- case SYM_EH_ABORT:
- sts = sym_abort_scsiio(np, cmd, 1);
- break;
- case SYM_EH_DEVICE_RESET:
- sts = sym_reset_scsi_target(np, cmd->device->id);
- break;
- default:
- break;
- }
-
+ sts = sym_abort_scsiio(np, cmd, 1);
/* On error, restore everything and cross fingers :) */
if (sts)
cmd_queued = 0;
@@ -640,35 +627,60 @@ static int sym_eh_handler(struct Scsi_Host *shost, int op,
spin_unlock_irq(shost->host_lock);
}
- dev_warn(&cmd->device->sdev_gendev, "%s operation %s.\n", opname,
+ dev_warn(&cmd->device->sdev_gendev, "ABORT operation %s.\n",
sts==0 ? "complete" :sts==-2 ? "timed-out" : "failed");
return sts ? SCSI_FAILED : SCSI_SUCCESS;
}
-
-/*
- * Error handlers called from the eh thread (one thread per HBA).
- */
-static int sym53c8xx_eh_abort_handler(struct scsi_cmnd *cmd)
+static int sym53c8xx_eh_target_reset_handler(struct scsi_cmnd *cmd)
{
- struct Scsi_Host *shost = cmd->device->host;
+ struct scsi_target *starget = scsi_target(cmd->device);
+ struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
struct sym_data *sym_data = shost_priv(shost);
struct pci_dev *pdev = sym_data->pdev;
+ struct sym_hcb *np = sym_data->ncb;
+ SYM_QUEHEAD *qp;
+ int sts;
+ struct completion eh_done;
+ starget_printk(KERN_WARNING, starget,
+ "TARGET RESET operation started\n");
+
+ /*
+ * Escalate to host reset if the PCI bus went down
+ */
if (pci_channel_offline(pdev))
- return FAILED;
- return sym_eh_handler(shost, SYM_EH_ABORT, "ABORT", cmd);
-}
+ return SCSI_FAILED;
-static int sym53c8xx_eh_device_reset_handler(struct scsi_cmnd *cmd)
-{
- struct Scsi_Host *shost = cmd->device->host;
- struct sym_data *sym_data = shost_priv(shost);
- struct pci_dev *pdev = sym_data->pdev;
+ spin_lock_irq(shost->host_lock);
+ sts = sym_reset_scsi_target(np, starget->id);
+ if (!sts) {
+ FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) {
+ struct sym_ccb *cp = sym_que_entry(qp, struct sym_ccb,
+ link_ccbq);
+ struct scsi_cmnd *cmd = cp->cmd;
+ struct sym_ucmd *ucmd;
+
+ if (!cmd || cmd->device->channel != starget->channel ||
+ cmd->device->id != starget->id)
+ continue;
- if (pci_channel_offline(pdev))
- return FAILED;
- return sym_eh_handler(shost, SYM_EH_DEVICE_RESET, "DEVICE RESET", cmd);
+ ucmd = SYM_UCMD_PTR(cmd);
+ init_completion(&eh_done);
+ ucmd->eh_done = &eh_done;
+ spin_unlock_irq(shost->host_lock);
+ if (!wait_for_completion_timeout(&eh_done, 5*HZ)) {
+ ucmd->eh_done = NULL;
+ sts = -2;
+ }
+ spin_lock_irq(shost->host_lock);
+ }
+ }
+ spin_unlock_irq(shost->host_lock);
+
+ starget_printk(KERN_WARNING, starget, "TARGET RESET operation %s.\n",
+ sts==0 ? "complete" :sts==-2 ? "timed-out" : "failed");
+ return SCSI_SUCCESS;
}
static int sym53c8xx_eh_bus_reset_handler(struct scsi_cmnd *cmd)
@@ -1699,7 +1711,7 @@ static struct scsi_host_template sym2_template = {
.slave_configure = sym53c8xx_slave_configure,
.slave_destroy = sym53c8xx_slave_destroy,
.eh_abort_handler = sym53c8xx_eh_abort_handler,
- .eh_device_reset_handler = sym53c8xx_eh_device_reset_handler,
+ .eh_target_reset_handler = sym53c8xx_eh_target_reset_handler,
.eh_bus_reset_handler = sym53c8xx_eh_bus_reset_handler,
.eh_host_reset_handler = sym53c8xx_eh_host_reset_handler,
.this_id = 7,
--
2.29.2
^ permalink raw reply related [flat|nested] 6+ messages in thread