* [PATCH v6 1/5] Split scsi_internal_device_block()
2017-05-02 18:00 [PATCH v6 0/5] Avoid that __scsi_remove_device() hangs Bart Van Assche
@ 2017-05-02 18:00 ` Bart Van Assche
2017-05-02 18:00 ` [PATCH v6 2/5] Create two versions of scsi_internal_device_unblock() Bart Van Assche
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Bart Van Assche @ 2017-05-02 18:00 UTC (permalink / raw)
To: Martin K . Petersen, James Bottomley
Cc: linux-scsi, Bart Van Assche, Christoph Hellwig, Hannes Reinecke,
Johannes Thumshirn, Sreekanth Reddy
Instead of passing a "wait" argument to scsi_internal_device_block(),
split this function into a function that waits and a function that
doesn't wait. This will make it easier to serialize SCSI device state
changes through a mutex.
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Sreekanth Reddy <sreekanth.reddy@broadcom.com>
---
drivers/scsi/mpt3sas/mpt3sas_scsih.c | 4 +-
drivers/scsi/scsi_lib.c | 73 +++++++++++++++++++++++-------------
include/scsi/scsi_device.h | 2 +-
3 files changed, 50 insertions(+), 29 deletions(-)
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 919ba2bb15f1..25e89cfe4417 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -2859,7 +2859,7 @@ _scsih_internal_device_block(struct scsi_device *sdev,
sas_device_priv_data->sas_target->handle);
sas_device_priv_data->block = 1;
- r = scsi_internal_device_block(sdev, false);
+ r = scsi_internal_device_block_nowait(sdev);
if (r == -EINVAL)
sdev_printk(KERN_WARNING, sdev,
"device_block failed with return(%d) for handle(0x%04x)\n",
@@ -2895,7 +2895,7 @@ _scsih_internal_device_unblock(struct scsi_device *sdev,
"performing a block followed by an unblock\n",
r, sas_device_priv_data->sas_target->handle);
sas_device_priv_data->block = 1;
- r = scsi_internal_device_block(sdev, false);
+ r = scsi_internal_device_block_nowait(sdev);
if (r)
sdev_printk(KERN_WARNING, sdev, "retried device_block "
"failed with return(%d) for handle(0x%04x)\n",
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index b7b340c494ab..ca20d3702b45 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2937,28 +2937,20 @@ scsi_target_resume(struct scsi_target *starget)
EXPORT_SYMBOL(scsi_target_resume);
/**
- * scsi_internal_device_block - internal function to put a device temporarily into the SDEV_BLOCK state
- * @sdev: device to block
- * @wait: Whether or not to wait until ongoing .queuecommand() /
- * .queue_rq() calls have finished.
+ * scsi_internal_device_block_nowait - try to transition to the SDEV_BLOCK state
+ * @sdev: device to block
*
- * Block request made by scsi lld's to temporarily stop all
- * scsi commands on the specified device. May sleep.
+ * Pause SCSI command processing on the specified device. Does not sleep.
*
- * Returns zero if successful or error if not
+ * Returns zero if successful or a negative error code upon failure.
*
- * Notes:
- * This routine transitions the device to the SDEV_BLOCK state
- * (which must be a legal transition). When the device is in this
- * state, all commands are deferred until the scsi lld reenables
- * the device with scsi_device_unblock or device_block_tmo fires.
- *
- * To do: avoid that scsi_send_eh_cmnd() calls queuecommand() after
- * scsi_internal_device_block() has blocked a SCSI device and also
- * remove the rport mutex lock and unlock calls from srp_queuecommand().
+ * Notes:
+ * This routine transitions the device to the SDEV_BLOCK state (which must be
+ * a legal transition). When the device is in this state, command processing
+ * is paused until the device leaves the SDEV_BLOCK state. See also
+ * scsi_internal_device_unblock_nowait().
*/
-int
-scsi_internal_device_block(struct scsi_device *sdev, bool wait)
+int scsi_internal_device_block_nowait(struct scsi_device *sdev)
{
struct request_queue *q = sdev->request_queue;
unsigned long flags;
@@ -2978,21 +2970,50 @@ scsi_internal_device_block(struct scsi_device *sdev, bool wait)
* request queue.
*/
if (q->mq_ops) {
- if (wait)
- blk_mq_quiesce_queue(q);
- else
- blk_mq_stop_hw_queues(q);
+ blk_mq_stop_hw_queues(q);
} else {
spin_lock_irqsave(q->queue_lock, flags);
blk_stop_queue(q);
spin_unlock_irqrestore(q->queue_lock, flags);
- if (wait)
- scsi_wait_for_queuecommand(sdev);
}
return 0;
}
-EXPORT_SYMBOL_GPL(scsi_internal_device_block);
+EXPORT_SYMBOL_GPL(scsi_internal_device_block_nowait);
+
+/**
+ * scsi_internal_device_block - try to transition to the SDEV_BLOCK state
+ * @sdev: device to block
+ *
+ * Pause SCSI command processing on the specified device and wait until all
+ * ongoing scsi_request_fn() / scsi_queue_rq() calls have finished. May sleep.
+ *
+ * Returns zero if successful or a negative error code upon failure.
+ *
+ * Note:
+ * This routine transitions the device to the SDEV_BLOCK state (which must be
+ * a legal transition). When the device is in this state, command processing
+ * is paused until the device leaves the SDEV_BLOCK state. See also
+ * scsi_internal_device_unblock().
+ *
+ * To do: avoid that scsi_send_eh_cmnd() calls queuecommand() after
+ * scsi_internal_device_block() has blocked a SCSI device and also
+ * remove the rport mutex lock and unlock calls from srp_queuecommand().
+ */
+static int scsi_internal_device_block(struct scsi_device *sdev)
+{
+ struct request_queue *q = sdev->request_queue;
+ int err;
+
+ err = scsi_internal_device_block_nowait(sdev);
+ if (err == 0) {
+ if (q->mq_ops)
+ blk_mq_quiesce_queue(q);
+ else
+ scsi_wait_for_queuecommand(sdev);
+ }
+ return err;
+}
/**
* scsi_internal_device_unblock - resume a device after a block request
@@ -3049,7 +3070,7 @@ EXPORT_SYMBOL_GPL(scsi_internal_device_unblock);
static void
device_block(struct scsi_device *sdev, void *data)
{
- scsi_internal_device_block(sdev, true);
+ scsi_internal_device_block(sdev);
}
static int
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 080c7ce9bae8..e2f43ae3e264 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -472,7 +472,7 @@ static inline int scsi_device_created(struct scsi_device *sdev)
sdev->sdev_state == SDEV_CREATED_BLOCK;
}
-int scsi_internal_device_block(struct scsi_device *sdev, bool wait);
+int scsi_internal_device_block_nowait(struct scsi_device *sdev);
int scsi_internal_device_unblock(struct scsi_device *sdev,
enum scsi_device_state new_state);
--
2.12.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v6 2/5] Create two versions of scsi_internal_device_unblock()
2017-05-02 18:00 [PATCH v6 0/5] Avoid that __scsi_remove_device() hangs Bart Van Assche
2017-05-02 18:00 ` [PATCH v6 1/5] Split scsi_internal_device_block() Bart Van Assche
@ 2017-05-02 18:00 ` Bart Van Assche
2017-05-02 18:00 ` [PATCH v6 3/5] Protect SCSI device state changes with a mutex Bart Van Assche
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Bart Van Assche @ 2017-05-02 18:00 UTC (permalink / raw)
To: Martin K . Petersen, James Bottomley
Cc: linux-scsi, Bart Van Assche, Christoph Hellwig, Hannes Reinecke,
Johannes Thumshirn, Sreekanth Reddy
This will make it easier to serialize SCSI device state changes
through a mutex.
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Sreekanth Reddy <sreekanth.reddy@broadcom.com>
---
drivers/scsi/mpt3sas/mpt3sas_scsih.c | 4 ++--
drivers/scsi/scsi_lib.c | 46 +++++++++++++++++++++++++-----------
include/scsi/scsi_device.h | 4 ++--
3 files changed, 36 insertions(+), 18 deletions(-)
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 25e89cfe4417..d671f6e6062c 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -2883,7 +2883,7 @@ _scsih_internal_device_unblock(struct scsi_device *sdev,
sdev_printk(KERN_WARNING, sdev, "device_unblock and setting to running, "
"handle(0x%04x)\n", sas_device_priv_data->sas_target->handle);
sas_device_priv_data->block = 0;
- r = scsi_internal_device_unblock(sdev, SDEV_RUNNING);
+ r = scsi_internal_device_unblock_nowait(sdev, SDEV_RUNNING);
if (r == -EINVAL) {
/* The device has been set to SDEV_RUNNING by SD layer during
* device addition but the request queue is still stopped by
@@ -2902,7 +2902,7 @@ _scsih_internal_device_unblock(struct scsi_device *sdev,
r, sas_device_priv_data->sas_target->handle);
sas_device_priv_data->block = 0;
- r = scsi_internal_device_unblock(sdev, SDEV_RUNNING);
+ r = scsi_internal_device_unblock_nowait(sdev, SDEV_RUNNING);
if (r)
sdev_printk(KERN_WARNING, sdev, "retried device_unblock"
" failed with return(%d) for handle(0x%04x)\n",
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index ca20d3702b45..79bb05fa09d5 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -3016,24 +3016,22 @@ static int scsi_internal_device_block(struct scsi_device *sdev)
}
/**
- * scsi_internal_device_unblock - resume a device after a block request
+ * scsi_internal_device_unblock_nowait - resume a device after a block request
* @sdev: device to resume
- * @new_state: state to set devices to after unblocking
+ * @new_state: state to set the device to after unblocking
*
- * Called by scsi lld's or the midlayer to restart the device queue
- * for the previously suspended scsi device. Called from interrupt or
- * normal process context.
+ * Restart the device queue for a previously suspended SCSI device. Does not
+ * sleep.
*
- * Returns zero if successful or error if not.
+ * Returns zero if successful or a negative error code upon failure.
*
- * Notes:
- * This routine transitions the device to the SDEV_RUNNING state
- * or to one of the offline states (which must be a legal transition)
- * allowing the midlayer to goose the queue for this device.
+ * Notes:
+ * This routine transitions the device to the SDEV_RUNNING state or to one of
+ * the offline states (which must be a legal transition) allowing the midlayer
+ * to goose the queue for this device.
*/
-int
-scsi_internal_device_unblock(struct scsi_device *sdev,
- enum scsi_device_state new_state)
+int scsi_internal_device_unblock_nowait(struct scsi_device *sdev,
+ enum scsi_device_state new_state)
{
struct request_queue *q = sdev->request_queue;
unsigned long flags;
@@ -3065,7 +3063,27 @@ scsi_internal_device_unblock(struct scsi_device *sdev,
return 0;
}
-EXPORT_SYMBOL_GPL(scsi_internal_device_unblock);
+EXPORT_SYMBOL_GPL(scsi_internal_device_unblock_nowait);
+
+/**
+ * scsi_internal_device_unblock - resume a device after a block request
+ * @sdev: device to resume
+ * @new_state: state to set the device to after unblocking
+ *
+ * Restart the device queue for a previously suspended SCSI device. May sleep.
+ *
+ * Returns zero if successful or a negative error code upon failure.
+ *
+ * Notes:
+ * This routine transitions the device to the SDEV_RUNNING state or to one of
+ * the offline states (which must be a legal transition) allowing the midlayer
+ * to goose the queue for this device.
+ */
+static int scsi_internal_device_unblock(struct scsi_device *sdev,
+ enum scsi_device_state new_state)
+{
+ return scsi_internal_device_unblock_nowait(sdev, new_state);
+}
static void
device_block(struct scsi_device *sdev, void *data)
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index e2f43ae3e264..bb784045ba71 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -473,8 +473,8 @@ static inline int scsi_device_created(struct scsi_device *sdev)
}
int scsi_internal_device_block_nowait(struct scsi_device *sdev);
-int scsi_internal_device_unblock(struct scsi_device *sdev,
- enum scsi_device_state new_state);
+int scsi_internal_device_unblock_nowait(struct scsi_device *sdev,
+ enum scsi_device_state new_state);
/* accessor functions for the SCSI parameters */
static inline int scsi_device_sync(struct scsi_device *sdev)
--
2.12.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v6 3/5] Protect SCSI device state changes with a mutex
2017-05-02 18:00 [PATCH v6 0/5] Avoid that __scsi_remove_device() hangs Bart Van Assche
2017-05-02 18:00 ` [PATCH v6 1/5] Split scsi_internal_device_block() Bart Van Assche
2017-05-02 18:00 ` [PATCH v6 2/5] Create two versions of scsi_internal_device_unblock() Bart Van Assche
@ 2017-05-02 18:00 ` Bart Van Assche
2017-05-02 18:00 ` [PATCH v6 4/5] Introduce scsi_start_queue() Bart Van Assche
2017-05-02 18:00 ` [PATCH v6 5/5] Make __scsi_remove_device go straight from BLOCKED to DEL Bart Van Assche
4 siblings, 0 replies; 6+ messages in thread
From: Bart Van Assche @ 2017-05-02 18:00 UTC (permalink / raw)
To: Martin K . Petersen, James Bottomley
Cc: linux-scsi, Bart Van Assche, Christoph Hellwig, Hannes Reinecke,
Johannes Thumshirn
Enable this mechanism for all scsi_target_*block() callers but not
for the scsi_internal_device_unblock() calls from the mpt3sas driver
because that driver can call scsi_internal_device_unblock() from
atomic context.
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
---
drivers/scsi/scsi_error.c | 8 +++++++-
drivers/scsi/scsi_lib.c | 27 +++++++++++++++++++++------
drivers/scsi/scsi_scan.c | 16 +++++++++-------
drivers/scsi/scsi_sysfs.c | 24 +++++++++++++++++++-----
drivers/scsi/scsi_transport_srp.c | 7 ++++---
drivers/scsi/sd.c | 7 +++++--
include/scsi/scsi_device.h | 1 +
7 files changed, 66 insertions(+), 24 deletions(-)
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index f2cafae150bc..02f5f7f49885 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1696,11 +1696,17 @@ static void scsi_eh_offline_sdevs(struct list_head *work_q,
struct list_head *done_q)
{
struct scsi_cmnd *scmd, *next;
+ struct scsi_device *sdev;
list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
sdev_printk(KERN_INFO, scmd->device, "Device offlined - "
"not ready after error recovery\n");
- scsi_device_set_state(scmd->device, SDEV_OFFLINE);
+ sdev = scmd->device;
+
+ mutex_lock(&sdev->state_mutex);
+ scsi_device_set_state(sdev, SDEV_OFFLINE);
+ mutex_unlock(&sdev->state_mutex);
+
if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD) {
/*
* FIXME: Handle lost cmds.
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 79bb05fa09d5..d2854558437d 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2875,7 +2875,12 @@ static void scsi_wait_for_queuecommand(struct scsi_device *sdev)
int
scsi_device_quiesce(struct scsi_device *sdev)
{
- int err = scsi_device_set_state(sdev, SDEV_QUIESCE);
+ int err;
+
+ mutex_lock(&sdev->state_mutex);
+ err = scsi_device_set_state(sdev, SDEV_QUIESCE);
+ mutex_unlock(&sdev->state_mutex);
+
if (err)
return err;
@@ -2903,10 +2908,11 @@ void scsi_device_resume(struct scsi_device *sdev)
* so assume the state is being managed elsewhere (for example
* device deleted during suspend)
*/
- if (sdev->sdev_state != SDEV_QUIESCE ||
- scsi_device_set_state(sdev, SDEV_RUNNING))
- return;
- scsi_run_queue(sdev->request_queue);
+ mutex_lock(&sdev->state_mutex);
+ if (sdev->sdev_state == SDEV_QUIESCE &&
+ scsi_device_set_state(sdev, SDEV_RUNNING) == 0)
+ scsi_run_queue(sdev->request_queue);
+ mutex_unlock(&sdev->state_mutex);
}
EXPORT_SYMBOL(scsi_device_resume);
@@ -3005,6 +3011,7 @@ static int scsi_internal_device_block(struct scsi_device *sdev)
struct request_queue *q = sdev->request_queue;
int err;
+ mutex_lock(&sdev->state_mutex);
err = scsi_internal_device_block_nowait(sdev);
if (err == 0) {
if (q->mq_ops)
@@ -3012,6 +3019,8 @@ static int scsi_internal_device_block(struct scsi_device *sdev)
else
scsi_wait_for_queuecommand(sdev);
}
+ mutex_unlock(&sdev->state_mutex);
+
return err;
}
@@ -3082,7 +3091,13 @@ EXPORT_SYMBOL_GPL(scsi_internal_device_unblock_nowait);
static int scsi_internal_device_unblock(struct scsi_device *sdev,
enum scsi_device_state new_state)
{
- return scsi_internal_device_unblock_nowait(sdev, new_state);
+ int ret;
+
+ mutex_lock(&sdev->state_mutex);
+ ret = scsi_internal_device_unblock_nowait(sdev, new_state);
+ mutex_unlock(&sdev->state_mutex);
+
+ return ret;
}
static void
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 6f7128f49c30..e6de4eee97a3 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -231,6 +231,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
sdev->id = starget->id;
sdev->lun = lun;
sdev->channel = starget->channel;
+ mutex_init(&sdev->state_mutex);
sdev->sdev_state = SDEV_CREATED;
INIT_LIST_HEAD(&sdev->siblings);
INIT_LIST_HEAD(&sdev->same_target_siblings);
@@ -943,16 +944,17 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
/* set the device running here so that slave configure
* may do I/O */
+ mutex_lock(&sdev->state_mutex);
ret = scsi_device_set_state(sdev, SDEV_RUNNING);
- if (ret) {
+ if (ret)
ret = scsi_device_set_state(sdev, SDEV_BLOCK);
+ mutex_unlock(&sdev->state_mutex);
- if (ret) {
- sdev_printk(KERN_ERR, sdev,
- "in wrong state %s to complete scan\n",
- scsi_device_state_name(sdev->sdev_state));
- return SCSI_SCAN_NO_RESPONSE;
- }
+ if (ret) {
+ sdev_printk(KERN_ERR, sdev,
+ "in wrong state %s to complete scan\n",
+ scsi_device_state_name(sdev->sdev_state));
+ return SCSI_SCAN_NO_RESPONSE;
}
if (*bflags & BLIST_MS_192_BYTES_FOR_3F)
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 82dfe07b1d47..a91537a3abbf 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -719,7 +719,7 @@ static ssize_t
store_state_field(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
- int i;
+ int i, ret;
struct scsi_device *sdev = to_scsi_device(dev);
enum scsi_device_state state = 0;
@@ -734,9 +734,11 @@ store_state_field(struct device *dev, struct device_attribute *attr,
if (!state)
return -EINVAL;
- if (scsi_device_set_state(sdev, state))
- return -EINVAL;
- return count;
+ mutex_lock(&sdev->state_mutex);
+ ret = scsi_device_set_state(sdev, state);
+ mutex_unlock(&sdev->state_mutex);
+
+ return ret == 0 ? count : -EINVAL;
}
static ssize_t
@@ -1272,6 +1274,7 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
void __scsi_remove_device(struct scsi_device *sdev)
{
struct device *dev = &sdev->sdev_gendev;
+ int res;
/*
* This cleanup path is not reentrant and while it is impossible
@@ -1282,7 +1285,15 @@ void __scsi_remove_device(struct scsi_device *sdev)
return;
if (sdev->is_visible) {
- if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0)
+ /*
+ * If scsi_internal_target_block() is running concurrently,
+ * wait until it has finished before changing the device state.
+ */
+ mutex_lock(&sdev->state_mutex);
+ res = scsi_device_set_state(sdev, SDEV_CANCEL);
+ mutex_unlock(&sdev->state_mutex);
+
+ if (res != 0)
return;
bsg_unregister_queue(sdev->request_queue);
@@ -1298,7 +1309,10 @@ void __scsi_remove_device(struct scsi_device *sdev)
* scsi_run_queue() invocations have finished before tearing down the
* device.
*/
+ mutex_lock(&sdev->state_mutex);
scsi_device_set_state(sdev, SDEV_DEL);
+ mutex_unlock(&sdev->state_mutex);
+
blk_cleanup_queue(sdev->request_queue);
cancel_work_sync(&sdev->requeue_work);
diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c
index 3c5d89852e9f..f617021c94f7 100644
--- a/drivers/scsi/scsi_transport_srp.c
+++ b/drivers/scsi/scsi_transport_srp.c
@@ -554,11 +554,12 @@ int srp_reconnect_rport(struct srp_rport *rport)
* invoking scsi_target_unblock() won't change the state of
* these devices into running so do that explicitly.
*/
- spin_lock_irq(shost->host_lock);
- __shost_for_each_device(sdev, shost)
+ shost_for_each_device(sdev, shost) {
+ mutex_lock(&sdev->state_mutex);
if (sdev->sdev_state == SDEV_OFFLINE)
sdev->sdev_state = SDEV_RUNNING;
- spin_unlock_irq(shost->host_lock);
+ mutex_unlock(&sdev->state_mutex);
+ }
} else if (rport->state == SRP_RPORT_RUNNING) {
/*
* srp_reconnect_rport() has been invoked with fast_io_fail
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 35ad5e8a31ab..3ff6572f3cab 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1700,8 +1700,9 @@ static const struct block_device_operations sd_fops = {
static int sd_eh_action(struct scsi_cmnd *scmd, int eh_disp)
{
struct scsi_disk *sdkp = scsi_disk(scmd->request->rq_disk);
+ struct scsi_device *sdev = scmd->device;
- if (!scsi_device_online(scmd->device) ||
+ if (!scsi_device_online(sdev) ||
!scsi_medium_access_command(scmd) ||
host_byte(scmd->result) != DID_TIME_OUT ||
eh_disp != SUCCESS)
@@ -1724,7 +1725,9 @@ static int sd_eh_action(struct scsi_cmnd *scmd, int eh_disp)
if (sdkp->medium_access_timed_out >= sdkp->max_medium_access_timeouts) {
scmd_printk(KERN_ERR, scmd,
"Medium access timeout failure. Offlining disk!\n");
- scsi_device_set_state(scmd->device, SDEV_OFFLINE);
+ mutex_lock(&sdev->state_mutex);
+ scsi_device_set_state(sdev, SDEV_OFFLINE);
+ mutex_unlock(&sdev->state_mutex);
return FAILED;
}
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index bb784045ba71..d95735d842ba 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -207,6 +207,7 @@ struct scsi_device {
void *handler_data;
unsigned char access_state;
+ struct mutex state_mutex;
enum scsi_device_state sdev_state;
unsigned long sdev_data[0];
} __attribute__((aligned(sizeof(unsigned long))));
--
2.12.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v6 4/5] Introduce scsi_start_queue()
2017-05-02 18:00 [PATCH v6 0/5] Avoid that __scsi_remove_device() hangs Bart Van Assche
` (2 preceding siblings ...)
2017-05-02 18:00 ` [PATCH v6 3/5] Protect SCSI device state changes with a mutex Bart Van Assche
@ 2017-05-02 18:00 ` Bart Van Assche
2017-05-02 18:00 ` [PATCH v6 5/5] Make __scsi_remove_device go straight from BLOCKED to DEL Bart Van Assche
4 siblings, 0 replies; 6+ messages in thread
From: Bart Van Assche @ 2017-05-02 18:00 UTC (permalink / raw)
To: Martin K . Petersen, James Bottomley
Cc: linux-scsi, Bart Van Assche, Israel Rukshin, Max Gurtovoy,
Hannes Reinecke, Benjamin Block
This patch does not change any functionality.
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Israel Rukshin <israelr@mellanox.com>
Cc: Max Gurtovoy <maxg@mellanox.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Benjamin Block <bblock@linux.vnet.ibm.com>
---
drivers/scsi/scsi_lib.c | 25 +++++++++++++++----------
drivers/scsi/scsi_priv.h | 1 +
2 files changed, 16 insertions(+), 10 deletions(-)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index d2854558437d..bbce1f1db515 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -3024,6 +3024,20 @@ static int scsi_internal_device_block(struct scsi_device *sdev)
return err;
}
+void scsi_start_queue(struct scsi_device *sdev)
+{
+ struct request_queue *q = sdev->request_queue;
+ unsigned long flags;
+
+ if (q->mq_ops) {
+ blk_mq_start_stopped_hw_queues(q, false);
+ } else {
+ spin_lock_irqsave(q->queue_lock, flags);
+ blk_start_queue(q);
+ spin_unlock_irqrestore(q->queue_lock, flags);
+ }
+}
+
/**
* scsi_internal_device_unblock_nowait - resume a device after a block request
* @sdev: device to resume
@@ -3042,9 +3056,6 @@ static int scsi_internal_device_block(struct scsi_device *sdev)
int scsi_internal_device_unblock_nowait(struct scsi_device *sdev,
enum scsi_device_state new_state)
{
- struct request_queue *q = sdev->request_queue;
- unsigned long flags;
-
/*
* Try to transition the scsi device to SDEV_RUNNING or one of the
* offlined states and goose the device queue if successful.
@@ -3062,13 +3073,7 @@ int scsi_internal_device_unblock_nowait(struct scsi_device *sdev,
sdev->sdev_state != SDEV_OFFLINE)
return -EINVAL;
- if (q->mq_ops) {
- blk_mq_start_stopped_hw_queues(q, false);
- } else {
- spin_lock_irqsave(q->queue_lock, flags);
- blk_start_queue(q);
- spin_unlock_irqrestore(q->queue_lock, flags);
- }
+ scsi_start_queue(sdev);
return 0;
}
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index f11bd102d6d5..c7629e31a75b 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -89,6 +89,7 @@ extern void scsi_run_host_queues(struct Scsi_Host *shost);
extern void scsi_requeue_run_queue(struct work_struct *work);
extern struct request_queue *scsi_alloc_queue(struct scsi_device *sdev);
extern struct request_queue *scsi_mq_alloc_queue(struct scsi_device *sdev);
+extern void scsi_start_queue(struct scsi_device *sdev);
extern int scsi_mq_setup_tags(struct Scsi_Host *shost);
extern void scsi_mq_destroy_tags(struct Scsi_Host *shost);
extern int scsi_init_queue(void);
--
2.12.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v6 5/5] Make __scsi_remove_device go straight from BLOCKED to DEL
2017-05-02 18:00 [PATCH v6 0/5] Avoid that __scsi_remove_device() hangs Bart Van Assche
` (3 preceding siblings ...)
2017-05-02 18:00 ` [PATCH v6 4/5] Introduce scsi_start_queue() Bart Van Assche
@ 2017-05-02 18:00 ` Bart Van Assche
4 siblings, 0 replies; 6+ messages in thread
From: Bart Van Assche @ 2017-05-02 18:00 UTC (permalink / raw)
To: Martin K . Petersen, James Bottomley
Cc: linux-scsi, Bart Van Assche, Israel Rukshin, Max Gurtovoy,
Hannes Reinecke, Benjamin Block
If a device is blocked, make __scsi_remove_device() cause it to
transition to the DEL state. This means that all the commands
issued in .shutdown() will error in the mid-layer, thus making
the removal proceed without being stopped.
This patch is a slightly modified version of a patch from James
Bottomley. This patch avoids that the following lockup occurs:
Call Trace:
schedule+0x35/0x80
schedule_timeout+0x237/0x2d0
io_schedule_timeout+0xa6/0x110
wait_for_completion_io+0xa3/0x110
blk_execute_rq+0xdf/0x120
scsi_execute+0xce/0x150 [scsi_mod]
scsi_execute_req_flags+0x8f/0xf0 [scsi_mod]
sd_sync_cache+0xa9/0x190 [sd_mod]
sd_shutdown+0x6a/0x100 [sd_mod]
sd_remove+0x64/0xc0 [sd_mod]
__device_release_driver+0x8d/0x120
device_release_driver+0x1e/0x30
bus_remove_device+0xf9/0x170
device_del+0x127/0x240
__scsi_remove_device+0xc1/0xd0 [scsi_mod]
scsi_forget_host+0x57/0x60 [scsi_mod]
scsi_remove_host+0x72/0x110 [scsi_mod]
srp_remove_work+0x8b/0x200 [ib_srp]
Reported-by: Israel Rukshin <israelr@mellanox.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Israel Rukshin <israelr@mellanox.com>
Cc: Max Gurtovoy <maxg@mellanox.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Benjamin Block <bblock@linux.vnet.ibm.com>
---
drivers/scsi/scsi_lib.c | 2 +-
drivers/scsi/scsi_sysfs.c | 13 +++++++++++++
2 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index bbce1f1db515..b83dca6b495b 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2618,7 +2618,6 @@ scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state)
case SDEV_QUIESCE:
case SDEV_OFFLINE:
case SDEV_TRANSPORT_OFFLINE:
- case SDEV_BLOCK:
break;
default:
goto illegal;
@@ -2632,6 +2631,7 @@ scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state)
case SDEV_OFFLINE:
case SDEV_TRANSPORT_OFFLINE:
case SDEV_CANCEL:
+ case SDEV_BLOCK:
case SDEV_CREATED_BLOCK:
break;
default:
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index a91537a3abbf..1f243ac16010 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1290,7 +1290,20 @@ void __scsi_remove_device(struct scsi_device *sdev)
* wait until it has finished before changing the device state.
*/
mutex_lock(&sdev->state_mutex);
+ /*
+ * If blocked, we go straight to DEL and restart the queue so
+ * any commands issued during driver shutdown (like sync
+ * cache) are errored immediately.
+ */
res = scsi_device_set_state(sdev, SDEV_CANCEL);
+ if (res != 0) {
+ res = scsi_device_set_state(sdev, SDEV_DEL);
+ if (res == 0) {
+ scsi_start_queue(sdev);
+ sdev_printk(KERN_DEBUG, sdev,
+ "Changed state from BLOCKED to DEL\n");
+ }
+ }
mutex_unlock(&sdev->state_mutex);
if (res != 0)
--
2.12.2
^ permalink raw reply related [flat|nested] 6+ messages in thread