All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/4] Avoid that __scsi_remove_device() hangs
@ 2017-04-10 17:53 Bart Van Assche
  2017-04-10 17:53 ` [PATCH v2 1/4] Introduce scsi_start_queue() Bart Van Assche
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Bart Van Assche @ 2017-04-10 17:53 UTC (permalink / raw)
  To: Martin K . Petersen, James Bottomley; +Cc: linux-scsi, Bart Van Assche

Several weeks ago Israel Rukshin reported that __scsi_remove_device()
hangs if it is waiting for the SYNCHRONIZE CACHE command submitted by
the sd driver to finish if the block layer queue is stopped and does
not get restarted. This patch series avoids that that hang occurs.

Bart Van Assche (4):
  Introduce scsi_start_queue()
  Introduce scsi_execute_async()
  sd: Make synchronize cache upon shutdown asynchronous
  Avoid that __scsi_remove_device() hangs

 drivers/scsi/scsi_lib.c    | 114 ++++++++++++++++++++++++++++++++++-----------
 drivers/scsi/scsi_priv.h   |   1 +
 drivers/scsi/scsi_sysfs.c  |   9 ++++
 drivers/scsi/sd.c          |  25 +++++++++-
 include/scsi/scsi_device.h |   4 ++
 5 files changed, 124 insertions(+), 29 deletions(-)

-- 
2.12.0

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v2 1/4] Introduce scsi_start_queue()
  2017-04-10 17:53 [PATCH v2 0/4] Avoid that __scsi_remove_device() hangs Bart Van Assche
@ 2017-04-10 17:53 ` Bart Van Assche
  2017-04-10 17:54 ` [PATCH v2 2/4] Introduce scsi_execute_async() Bart Van Assche
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 7+ messages in thread
From: Bart Van Assche @ 2017-04-10 17:53 UTC (permalink / raw)
  To: Martin K . Petersen, James Bottomley
  Cc: linux-scsi, Bart Van Assche, Israel Rukshin, Max Gurtovoy,
	Hannes Reinecke

This patch does not change any functionality.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Israel Rukshin <israelr@mellanox.com>
Cc: Max Gurtovoy <maxg@mellanox.com>
Cc: Hannes Reinecke <hare@suse.de>
---
 drivers/scsi/scsi_lib.c  | 25 +++++++++++++++----------
 drivers/scsi/scsi_priv.h |  1 +
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index f41e6b84a1bd..4893f2908edf 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2987,6 +2987,20 @@ scsi_internal_device_block(struct scsi_device *sdev, bool wait)
 }
 EXPORT_SYMBOL_GPL(scsi_internal_device_block);
  
+void scsi_start_queue(struct scsi_device *sdev)
+{
+	struct request_queue *q = sdev->request_queue;
+	unsigned long flags;
+
+	if (q->mq_ops) {
+		blk_mq_start_stopped_hw_queues(q, false);
+	} else {
+		spin_lock_irqsave(q->queue_lock, flags);
+		blk_start_queue(q);
+		spin_unlock_irqrestore(q->queue_lock, flags);
+	}
+}
+
 /**
  * scsi_internal_device_unblock - resume a device after a block request
  * @sdev:	device to resume
@@ -3007,9 +3021,6 @@ int
 scsi_internal_device_unblock(struct scsi_device *sdev,
 			     enum scsi_device_state new_state)
 {
-	struct request_queue *q = sdev->request_queue; 
-	unsigned long flags;
-
 	/*
 	 * Try to transition the scsi device to SDEV_RUNNING or one of the
 	 * offlined states and goose the device queue if successful.
@@ -3027,13 +3038,7 @@ scsi_internal_device_unblock(struct scsi_device *sdev,
 		 sdev->sdev_state != SDEV_OFFLINE)
 		return -EINVAL;
 
-	if (q->mq_ops) {
-		blk_mq_start_stopped_hw_queues(q, false);
-	} else {
-		spin_lock_irqsave(q->queue_lock, flags);
-		blk_start_queue(q);
-		spin_unlock_irqrestore(q->queue_lock, flags);
-	}
+	scsi_start_queue(sdev);
 
 	return 0;
 }
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index f11bd102d6d5..c7629e31a75b 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -89,6 +89,7 @@ extern void scsi_run_host_queues(struct Scsi_Host *shost);
 extern void scsi_requeue_run_queue(struct work_struct *work);
 extern struct request_queue *scsi_alloc_queue(struct scsi_device *sdev);
 extern struct request_queue *scsi_mq_alloc_queue(struct scsi_device *sdev);
+extern void scsi_start_queue(struct scsi_device *sdev);
 extern int scsi_mq_setup_tags(struct Scsi_Host *shost);
 extern void scsi_mq_destroy_tags(struct Scsi_Host *shost);
 extern int scsi_init_queue(void);
-- 
2.12.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 2/4] Introduce scsi_execute_async()
  2017-04-10 17:53 [PATCH v2 0/4] Avoid that __scsi_remove_device() hangs Bart Van Assche
  2017-04-10 17:53 ` [PATCH v2 1/4] Introduce scsi_start_queue() Bart Van Assche
@ 2017-04-10 17:54 ` Bart Van Assche
  2017-04-10 17:54 ` [PATCH v2 3/4] sd: Make synchronize cache upon shutdown asynchronous Bart Van Assche
  2017-04-10 17:54 ` [PATCH v2 4/4] Avoid that __scsi_remove_device() hangs Bart Van Assche
  3 siblings, 0 replies; 7+ messages in thread
From: Bart Van Assche @ 2017-04-10 17:54 UTC (permalink / raw)
  To: Martin K . Petersen, James Bottomley
  Cc: linux-scsi, Bart Van Assche, Israel Rukshin, Max Gurtovoy,
	Hannes Reinecke

Move the code for submitting a SCSI command from scsi_execute()
into scsi_build_rq(). Introduce scsi_execute_async(). This patch
does not change any functionality.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Israel Rukshin <israelr@mellanox.com>
Cc: Max Gurtovoy <maxg@mellanox.com>
Cc: Hannes Reinecke <hare@suse.de>
---
 drivers/scsi/scsi_lib.c    | 89 +++++++++++++++++++++++++++++++++++++---------
 include/scsi/scsi_device.h |  4 +++
 2 files changed, 76 insertions(+), 17 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 4893f2908edf..b68699cba6a2 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -213,6 +213,73 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
 	__scsi_queue_insert(cmd, reason, 1);
 }
 
+static struct request *scsi_build_rq(const struct scsi_device *sdev,
+		const unsigned char *cmd, int data_direction, void *buffer,
+		unsigned bufflen, int timeout, int retries, u64 flags,
+		req_flags_t rq_flags)
+{
+	struct request *req;
+	struct scsi_request *rq;
+	int ret;
+
+	req = blk_get_request(sdev->request_queue,
+			data_direction == DMA_TO_DEVICE ?
+			REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM);
+	if (IS_ERR(req))
+		return req;
+	rq = scsi_req(req);
+	scsi_req_init(req);
+
+	if (bufflen) {
+		ret = blk_rq_map_kern(sdev->request_queue, req,
+				      buffer, bufflen, __GFP_RECLAIM);
+		if (ret) {
+			blk_put_request(req);
+			return ERR_PTR(ret);
+		}
+	}
+
+	rq->cmd_len = COMMAND_SIZE(cmd[0]);
+	memcpy(rq->cmd, cmd, rq->cmd_len);
+	req->retries = retries;
+	req->timeout = timeout;
+	req->cmd_flags |= flags;
+	req->rq_flags |= rq_flags | RQF_QUIET | RQF_PREEMPT;
+
+	return req;
+}
+
+/**
+ * scsi_execute_async - insert a SCSI request
+ * @sdev:	scsi device
+ * @cmd:	scsi command
+ * @data_direction: data direction
+ * @buffer:	data buffer
+ * @bufflen:	length of buffer
+ * @timeout:	request timeout in seconds
+ * @retries:	number of times to retry request
+ * @flags:	flags for ->cmd_flags
+ * @rq_flags:	flags for ->rq_flags
+ */
+int scsi_execute_async(const struct scsi_device *sdev,
+		const unsigned char *cmd, int data_direction, void *buffer,
+		unsigned bufflen, int timeout, int retries, u64 flags,
+		req_flags_t rq_flags)
+{
+	struct request *req;
+
+	req = scsi_build_rq(sdev, cmd, data_direction, buffer, bufflen, timeout,
+			    retries, flags, rq_flags);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+	/*
+	 * head injection *required* here otherwise quiesce won't work
+	 */
+	blk_execute_rq_nowait(req->q, NULL, req, 1, NULL);
+
+	return 0;
+}
+EXPORT_SYMBOL(scsi_execute_async);
 
 /**
  * scsi_execute - insert request and wait for the result
@@ -242,24 +309,12 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 	struct scsi_request *rq;
 	int ret = DRIVER_ERROR << 24;
 
-	req = blk_get_request(sdev->request_queue,
-			data_direction == DMA_TO_DEVICE ?
-			REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM);
+	req = scsi_build_rq(sdev, cmd, data_direction, buffer, bufflen,
+			    timeout, retries, flags, rq_flags);
 	if (IS_ERR(req))
-		return ret;
-	rq = scsi_req(req);
-	scsi_req_init(req);
+		return PTR_ERR(req);
 
-	if (bufflen &&	blk_rq_map_kern(sdev->request_queue, req,
-					buffer, bufflen, __GFP_RECLAIM))
-		goto out;
-
-	rq->cmd_len = COMMAND_SIZE(cmd[0]);
-	memcpy(rq->cmd, cmd, rq->cmd_len);
-	req->retries = retries;
-	req->timeout = timeout;
-	req->cmd_flags |= flags;
-	req->rq_flags |= rq_flags | RQF_QUIET | RQF_PREEMPT;
+	rq = scsi_req(req);
 
 	/*
 	 * head injection *required* here otherwise quiesce won't work
@@ -282,7 +337,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 	if (sshdr)
 		scsi_normalize_sense(rq->sense, rq->sense_len, sshdr);
 	ret = req->errors;
- out:
+
 	blk_put_request(req);
 
 	return ret;
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 080c7ce9bae8..cdff28519393 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -408,6 +408,10 @@ extern const char *scsi_device_state_name(enum scsi_device_state);
 extern int scsi_is_sdev_device(const struct device *);
 extern int scsi_is_target_device(const struct device *);
 extern void scsi_sanitize_inquiry_string(unsigned char *s, int len);
+extern int scsi_execute_async(const struct scsi_device *sdev,
+		const unsigned char *cmd, int data_direction, void *buffer,
+		unsigned bufflen, int timeout, int retries, u64 flags,
+		req_flags_t rq_flags);
 extern int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 			int data_direction, void *buffer, unsigned bufflen,
 			unsigned char *sense, struct scsi_sense_hdr *sshdr,
-- 
2.12.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 3/4] sd: Make synchronize cache upon shutdown asynchronous
  2017-04-10 17:53 [PATCH v2 0/4] Avoid that __scsi_remove_device() hangs Bart Van Assche
  2017-04-10 17:53 ` [PATCH v2 1/4] Introduce scsi_start_queue() Bart Van Assche
  2017-04-10 17:54 ` [PATCH v2 2/4] Introduce scsi_execute_async() Bart Van Assche
@ 2017-04-10 17:54 ` Bart Van Assche
  2017-04-12 14:41   ` Benjamin Block
  2017-04-10 17:54 ` [PATCH v2 4/4] Avoid that __scsi_remove_device() hangs Bart Van Assche
  3 siblings, 1 reply; 7+ messages in thread
From: Bart Van Assche @ 2017-04-10 17:54 UTC (permalink / raw)
  To: Martin K . Petersen, James Bottomley
  Cc: linux-scsi, Bart Van Assche, Israel Rukshin, Max Gurtovoy,
	Hannes Reinecke

This patch avoids that sd_shutdown() hangs on the SYNCHRONIZE CACHE
command if the block layer queue has been stopped by
scsi_target_block().

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Israel Rukshin <israelr@mellanox.com>
Cc: Max Gurtovoy <maxg@mellanox.com>
Cc: Hannes Reinecke <hare@suse.de>
---
 drivers/scsi/sd.c | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index fe0f7997074e..8e98b7684893 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1489,6 +1489,22 @@ static unsigned int sd_check_events(struct gendisk *disk, unsigned int clearing)
 	return retval;
 }
 
+/*
+ * Issue a SYNCHRONIZE CACHE command asynchronously. Since blk_cleanup_queue()
+ * waits for all commands to finish, __scsi_remove_device() will wait for the
+ * SYNCHRONIZE CACHE command to finish.
+ */
+static int sd_sync_cache_async(struct scsi_disk *sdkp)
+{
+	const struct scsi_device *sdp = sdkp->device;
+	const int timeout = sdp->request_queue->rq_timeout *
+			    SD_FLUSH_TIMEOUT_MULTIPLIER;
+	const unsigned char cmd[10] = { SYNCHRONIZE_CACHE };
+
+	return scsi_execute_async(sdp, cmd, DMA_NONE, NULL, 0, timeout,
+				  SD_MAX_RETRIES, 0, 0);
+}
+
 static int sd_sync_cache(struct scsi_disk *sdkp)
 {
 	int retries, res;
@@ -3356,6 +3372,8 @@ static int sd_start_stop_device(struct scsi_disk *sdkp, int start)
 static void sd_shutdown(struct device *dev)
 {
 	struct scsi_disk *sdkp = dev_get_drvdata(dev);
+	const bool stop_disk = system_state != SYSTEM_RESTART &&
+		sdkp->device->manage_start_stop;
 
 	if (!sdkp)
 		return;         /* this can happen */
@@ -3365,10 +3383,13 @@ static void sd_shutdown(struct device *dev)
 
 	if (sdkp->WCE && sdkp->media_present) {
 		sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
-		sd_sync_cache(sdkp);
+		if (stop_disk)
+			sd_sync_cache(sdkp);
+		else
+			sd_sync_cache_async(sdkp);
 	}
 
-	if (system_state != SYSTEM_RESTART && sdkp->device->manage_start_stop) {
+	if (stop_disk) {
 		sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n");
 		sd_start_stop_device(sdkp, 0);
 	}
-- 
2.12.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 4/4] Avoid that __scsi_remove_device() hangs
  2017-04-10 17:53 [PATCH v2 0/4] Avoid that __scsi_remove_device() hangs Bart Van Assche
                   ` (2 preceding siblings ...)
  2017-04-10 17:54 ` [PATCH v2 3/4] sd: Make synchronize cache upon shutdown asynchronous Bart Van Assche
@ 2017-04-10 17:54 ` Bart Van Assche
  3 siblings, 0 replies; 7+ messages in thread
From: Bart Van Assche @ 2017-04-10 17:54 UTC (permalink / raw)
  To: Martin K . Petersen, James Bottomley
  Cc: linux-scsi, Bart Van Assche, Max Gurtovoy, Hannes Reinecke

Since scsi_target_unblock() uses starget_for_each_device(), since
starget_for_each_device() uses scsi_device_get(), since
scsi_device_get() fails after unloading of the LLD kernel module
has been started scsi_target_unblock() may skip devices that were
affected by scsi_target_block(). Ensure that __scsi_remove_device()
does not hang for blocked SCSI devices. This patch avoids that
unloading the ib_srp kernel module can trigger the following hang:

Call Trace:
 schedule+0x35/0x80
 schedule_timeout+0x237/0x2d0
 io_schedule_timeout+0xa6/0x110
 wait_for_completion_io+0xa3/0x110
 blk_execute_rq+0xdf/0x120
 scsi_execute+0xce/0x150 [scsi_mod]
 scsi_execute_req_flags+0x8f/0xf0 [scsi_mod]
 sd_sync_cache+0xa9/0x190 [sd_mod]
 sd_shutdown+0x6a/0x100 [sd_mod]
 sd_remove+0x64/0xc0 [sd_mod]
 __device_release_driver+0x8d/0x120
 device_release_driver+0x1e/0x30
 bus_remove_device+0xf9/0x170
 device_del+0x127/0x240
 __scsi_remove_device+0xc1/0xd0 [scsi_mod]
 scsi_forget_host+0x57/0x60 [scsi_mod]
 scsi_remove_host+0x72/0x110 [scsi_mod]
 srp_remove_work+0x8b/0x200 [ib_srp]

Reported-by: Israel Rukshin <israelr@mellanox.com>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Max Gurtovoy <maxg@mellanox.com>
Cc: Hannes Reinecke <hare@suse.de>
---
 drivers/scsi/scsi_sysfs.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 82dfe07b1d47..e090c35ba6ee 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1299,6 +1299,15 @@ void __scsi_remove_device(struct scsi_device *sdev)
 	 * device.
 	 */
 	scsi_device_set_state(sdev, SDEV_DEL);
+	/*
+	 * Since scsi_target_unblock() is a no-op after unloading of the SCSI
+	 * LLD has started, explicitly restart the queue. Do this after the
+	 * device state has been changed into SDEV_DEL because
+	 * scsi_prep_state_check() returns BLKPREP_KILL for the SDEV_DEL state
+	 * Do this before calling blk_cleanup_queue() to avoid that that
+	 * function encounters a stopped queue.
+	 */
+	scsi_start_queue(sdev);
 	blk_cleanup_queue(sdev->request_queue);
 	cancel_work_sync(&sdev->requeue_work);
 
-- 
2.12.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 3/4] sd: Make synchronize cache upon shutdown asynchronous
  2017-04-10 17:54 ` [PATCH v2 3/4] sd: Make synchronize cache upon shutdown asynchronous Bart Van Assche
@ 2017-04-12 14:41   ` Benjamin Block
  2017-04-13  0:28     ` Bart Van Assche
  0 siblings, 1 reply; 7+ messages in thread
From: Benjamin Block @ 2017-04-12 14:41 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Martin K . Petersen, James Bottomley, linux-scsi, Israel Rukshin,
	Max Gurtovoy, Hannes Reinecke

On Mon, Apr 10, 2017 at 10:54:01AM -0700, Bart Van Assche wrote:
> This patch avoids that sd_shutdown() hangs on the SYNCHRONIZE CACHE
> command if the block layer queue has been stopped by
> scsi_target_block().
> 
> Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
> Cc: Israel Rukshin <israelr@mellanox.com>
> Cc: Max Gurtovoy <maxg@mellanox.com>
> Cc: Hannes Reinecke <hare@suse.de>
> ---
>  drivers/scsi/sd.c | 25 +++++++++++++++++++++++--
>  1 file changed, 23 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index fe0f7997074e..8e98b7684893 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -1489,6 +1489,22 @@ static unsigned int sd_check_events(struct gendisk *disk, unsigned int clearing)
>  	return retval;
>  }
> 
> +/*
> + * Issue a SYNCHRONIZE CACHE command asynchronously. Since blk_cleanup_queue()
> + * waits for all commands to finish, __scsi_remove_device() will wait for the
> + * SYNCHRONIZE CACHE command to finish.
> + */
> +static int sd_sync_cache_async(struct scsi_disk *sdkp)
> +{
> +	const struct scsi_device *sdp = sdkp->device;
> +	const int timeout = sdp->request_queue->rq_timeout *
> +			    SD_FLUSH_TIMEOUT_MULTIPLIER;
> +	const unsigned char cmd[10] = { SYNCHRONIZE_CACHE };
> +
> +	return scsi_execute_async(sdp, cmd, DMA_NONE, NULL, 0, timeout,
> +				  SD_MAX_RETRIES, 0, 0);
> +}
> +

OK, so I take it the problem is when the queue is stopped, then the
completion in blk_execute_rq() will never be triggered and then we wait
for a timeout there, or potentially forever?

But then what is the point in trying to do it async here anyway? Won't
that just be doomed in the same way, just that we don't see the effect?

>  static int sd_sync_cache(struct scsi_disk *sdkp)
>  {
>  	int retries, res;
> @@ -3356,6 +3372,8 @@ static int sd_start_stop_device(struct scsi_disk *sdkp, int start)
>  static void sd_shutdown(struct device *dev)
>  {
>  	struct scsi_disk *sdkp = dev_get_drvdata(dev);
> +	const bool stop_disk = system_state != SYSTEM_RESTART &&
> +		sdkp->device->manage_start_stop;
> 
>  	if (!sdkp)
>  		return;         /* this can happen */

That seems wrong then. You already dereference sdkp before the function
checks whether or not the pointer is valid (at least if you can trust
the comment there).

> @@ -3365,10 +3383,13 @@ static void sd_shutdown(struct device *dev)
> 
>  	if (sdkp->WCE && sdkp->media_present) {
>  		sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
> -		sd_sync_cache(sdkp);
> +		if (stop_disk)
> +			sd_sync_cache(sdkp);
> +		else
> +			sd_sync_cache_async(sdkp);

That makes the function-documentation obsolete, doesn't it?


                                                    Beste Grüße / Best regards,
                                                      - Benjamin Block

>  	}
> 
> -	if (system_state != SYSTEM_RESTART && sdkp->device->manage_start_stop) {
> +	if (stop_disk) {
>  		sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n");
>  		sd_start_stop_device(sdkp, 0);
>  	}
> -- 
> 2.12.0
> 
-- 
Linux on z Systems Development         /         IBM Systems & Technology Group
		  IBM Deutschland Research & Development GmbH 
Vorsitz. AufsR.: Martina Koederitz     /        Geschäftsführung: Dirk Wittkopp
Sitz der Gesellschaft: Böblingen / Registergericht: AmtsG Stuttgart, HRB 243294

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 3/4] sd: Make synchronize cache upon shutdown asynchronous
  2017-04-12 14:41   ` Benjamin Block
@ 2017-04-13  0:28     ` Bart Van Assche
  0 siblings, 0 replies; 7+ messages in thread
From: Bart Van Assche @ 2017-04-13  0:28 UTC (permalink / raw)
  To: bblock; +Cc: linux-scsi, James.Bottomley, israelr, hare, maxg, martin.petersen

On Wed, 2017-04-12 at 16:41 +0200, Benjamin Block wrote:
> On Mon, Apr 10, 2017 at 10:54:01AM -0700, Bart Van Assche wrote:
> > [ ... ]
> OK, so I take it the problem is when the queue is stopped, then the
> completion in blk_execute_rq() will never be triggered and then we wait
> for a timeout there, or potentially forever?

Hello Benjamin,

Thanks for the review.

If a request is queued after a queue has been stopped then that request
will never be started and hence even the timeout timer won't be started.
blk_cleanup_queue() hangs if invoked for a stopped queue and one or more
requests have not yet been started.

> But then what is the point in trying to do it async here anyway? Won't
> that just be doomed in the same way, just that we don't see the effect?

Have you noticed that patch 4/4 in this series restarts the queue just
before calling blk_cleanup_queue()?

Anyway, can you have a look at the patch below and see whether this new
version addresses all the concerns you had reported in your previous
e-mail?

Thanks,

Bart.


Subject: [PATCH] sd: Make synchronize cache upon shutdown asynchronous

This patch avoids that sd_shutdown() hangs on the SYNCHRONIZE CACHE
command if the block layer queue has been stopped by
scsi_target_block().
---
 drivers/scsi/sd.c | 45 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index fe0f7997074e..deff564fe649 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1489,6 +1489,33 @@ static unsigned int sd_check_events(struct gendisk *disk, unsigned int clearing)
 	return retval;
 }
 
+static void sd_sync_cache_done(struct request *rq, int e)
+{
+	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
+
+	sd_printk(KERN_DEBUG, sdkp, "%s\n", __func__);
+
+	blk_put_request(rq);
+}
+
+/*
+ * Issue a SYNCHRONIZE CACHE command asynchronously. Since blk_cleanup_queue()
+ * waits for all commands to finish, __scsi_remove_device() will wait for the
+ * SYNCHRONIZE CACHE command to finish.
+ */
+static int sd_sync_cache_async(struct scsi_disk *sdkp)
+{
+	const struct scsi_device *sdp = sdkp->device;
+	const int timeout = sdp->request_queue->rq_timeout *
+			    SD_FLUSH_TIMEOUT_MULTIPLIER;
+	const unsigned char cmd[10] = { SYNCHRONIZE_CACHE };
+
+	sd_printk(KERN_DEBUG, sdkp, "%s\n", __func__);
+	return scsi_execute_async(sdp, sdkp->disk, cmd, DMA_NONE, NULL, 0,
+				  timeout, SD_MAX_RETRIES, 0, 0,
+				  sd_sync_cache_done);
+}
+
 static int sd_sync_cache(struct scsi_disk *sdkp)
 {
 	int retries, res;
@@ -3349,13 +3376,15 @@ static int sd_start_stop_device(struct scsi_disk *sdkp, int start)
 }
 
 /*
- * Send a SYNCHRONIZE CACHE instruction down to the device through
- * the normal SCSI command structure.  Wait for the command to
- * complete.
+ * Send a SYNCHRONIZE CACHE instruction down to the device through the normal
+ * SCSI command structure. When stopping the disk, wait for the command to
+ * complete. When not stopping the disk, the blk_cleanup_queue() call in
+ * __scsi_remove_device() will wait for this command to complete.
  */
 static void sd_shutdown(struct device *dev)
 {
 	struct scsi_disk *sdkp = dev_get_drvdata(dev);
+	bool stop_disk;
 
 	if (!sdkp)
 		return;         /* this can happen */
@@ -3363,12 +3392,18 @@ static void sd_shutdown(struct device *dev)
 	if (pm_runtime_suspended(dev))
 		return;
 
+	stop_disk = system_state != SYSTEM_RESTART &&
+		sdkp->device->manage_start_stop;
+
 	if (sdkp->WCE && sdkp->media_present) {
 		sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
-		sd_sync_cache(sdkp);
+		if (stop_disk)
+			sd_sync_cache(sdkp);
+		else
+			sd_sync_cache_async(sdkp);
 	}
 
-	if (system_state != SYSTEM_RESTART && sdkp->device->manage_start_stop) {
+	if (stop_disk) {
 		sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n");
 		sd_start_stop_device(sdkp, 0);
 	}
-- 
2.12.2

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-04-13  0:28 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-10 17:53 [PATCH v2 0/4] Avoid that __scsi_remove_device() hangs Bart Van Assche
2017-04-10 17:53 ` [PATCH v2 1/4] Introduce scsi_start_queue() Bart Van Assche
2017-04-10 17:54 ` [PATCH v2 2/4] Introduce scsi_execute_async() Bart Van Assche
2017-04-10 17:54 ` [PATCH v2 3/4] sd: Make synchronize cache upon shutdown asynchronous Bart Van Assche
2017-04-12 14:41   ` Benjamin Block
2017-04-13  0:28     ` Bart Van Assche
2017-04-10 17:54 ` [PATCH v2 4/4] Avoid that __scsi_remove_device() hangs Bart Van Assche

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.