* [PATCH] scsi: storvsc: Allow only one remove lun work item to be issued per lun
@ 2017-04-15 13:43 Cathy Avery
2017-04-15 14:06 ` Christoph Hellwig
0 siblings, 1 reply; 6+ messages in thread
From: Cathy Avery @ 2017-04-15 13:43 UTC (permalink / raw)
To: kys, jejb, martin.petersen
Cc: sthemmin, haiyangz, devel, linux-kernel, linux-scsi
When running multipath on a VM if all available paths go down
the driver can schedule large amounts of storvsc_remove_lun
work items to the same lun. In response to the failing paths
typically storvsc responds by taking host->scan_mutex and issuing
a TUR per lun. If there has been heavy IO to the failed device
all the failed IOs are returned from the host. A remove lun work
item is issued per failed IO. If the outstanding TURs have not been
completed in a timely manner the scan_mutex is never released or
released too late. Consequently the many remove lun work items are
not completed as scsi_remove_device also tries to take host->scan_mutex.
This results in dragging the VM down and sometimes completely.
This patch only allows one remove lun to be issued to a particular
lun while it is an instantiated member of the scsi stack.
Signed-off-by: Cathy Avery <cavery@redhat.com>
---
drivers/scsi/storvsc_drv.c | 33 +++++++++++++++++++++++++++++++--
1 file changed, 31 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 016639d..9dbb5bf 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -478,6 +478,10 @@ struct storvsc_device {
u64 port_name;
};
+struct storvsc_dev_hostdata {
+ atomic_t req_remove_lun;
+};
+
struct hv_host_device {
struct hv_device *dev;
unsigned int port;
@@ -918,6 +922,8 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
u8 asc, u8 ascq)
{
struct storvsc_scan_work *wrk;
+ struct storvsc_dev_hostdata *hostdata;
+ struct scsi_device *sdev;
void (*process_err_fn)(struct work_struct *work);
bool do_work = false;
@@ -953,8 +959,17 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
}
break;
case SRB_STATUS_INVALID_LUN:
- do_work = true;
- process_err_fn = storvsc_remove_lun;
+ sdev = scsi_device_lookup(host, 0, vm_srb->target_id,
+ vm_srb->lun);
+ if (sdev) {
+ hostdata = sdev->hostdata;
+ if (hostdata &&
+ !atomic_cmpxchg(&hostdata->req_remove_lun, 0, 1)) {
+ do_work = true;
+ process_err_fn = storvsc_remove_lun;
+ }
+ scsi_device_put(sdev);
+ }
break;
case SRB_STATUS_ABORTED:
if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID &&
@@ -1426,9 +1441,22 @@ static int storvsc_device_configure(struct scsi_device *sdevice)
sdevice->no_write_same = 0;
}
+ sdevice->hostdata = kzalloc(sizeof(struct storvsc_dev_hostdata),
+ GFP_ATOMIC);
+ if (!sdevice->hostdata)
+ return -ENOMEM;
+
return 0;
}
+static void storvsc_device_destroy(struct scsi_device *sdevice)
+{
+ if (sdevice->hostdata) {
+ kfree(sdevice->hostdata);
+ sdevice->hostdata = NULL;
+ }
+}
+
static int storvsc_get_chs(struct scsi_device *sdev, struct block_device * bdev,
sector_t capacity, int *info)
{
@@ -1669,6 +1697,7 @@ static struct scsi_host_template scsi_driver = {
.eh_timed_out = storvsc_eh_timed_out,
.slave_alloc = storvsc_device_alloc,
.slave_configure = storvsc_device_configure,
+ .slave_destroy = storvsc_device_destroy,
.cmd_per_lun = 255,
.this_id = -1,
.use_clustering = ENABLE_CLUSTERING,
--
2.5.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] scsi: storvsc: Allow only one remove lun work item to be issued per lun
2017-04-15 13:43 [PATCH] scsi: storvsc: Allow only one remove lun work item to be issued per lun Cathy Avery
@ 2017-04-15 14:06 ` Christoph Hellwig
0 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2017-04-15 14:06 UTC (permalink / raw)
To: Cathy Avery
Cc: kys, jejb, martin.petersen, sthemmin, haiyangz, devel,
linux-kernel, linux-scsi
Just add a singlethreaded workqueue for storvsc_handle_error and you'll
get serialization for all error handling for free.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] scsi: storvsc: Allow only one remove lun work item to be issued per lun
@ 2017-04-15 14:06 ` Christoph Hellwig
0 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2017-04-15 14:06 UTC (permalink / raw)
To: Cathy Avery
Cc: sthemmin, linux-scsi, martin.petersen, haiyangz, linux-kernel,
devel, jejb
Just add a singlethreaded workqueue for storvsc_handle_error and you'll
get serialization for all error handling for free.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] scsi: storvsc: Allow only one remove lun work item to be issued per lun
2017-04-15 14:06 ` Christoph Hellwig
@ 2017-04-17 19:44 ` Cathy Avery
-1 siblings, 0 replies; 6+ messages in thread
From: Cathy Avery @ 2017-04-17 19:44 UTC (permalink / raw)
To: Christoph Hellwig
Cc: kys, jejb, martin.petersen, sthemmin, haiyangz, devel,
linux-kernel, linux-scsi
On 04/15/2017 10:06 AM, Christoph Hellwig wrote:
> Just add a singlethreaded workqueue for storvsc_handle_error and you'll
> get serialization for all error handling for free.
The problem I am seeing is that many work items can be queued up for the
same lun before it goes away. The single threaded queue would have to allow
for only a queue of one and no more. Either that or each work item for a
particular lun must have the same memory address so it gets
rejected if it you try to queue a remove to the same lun twice.
Maybe I am not understanding your suggestion correctly.
Thanks,
Cathy
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] scsi: storvsc: Allow only one remove lun work item to be issued per lun
@ 2017-04-17 19:44 ` Cathy Avery
0 siblings, 0 replies; 6+ messages in thread
From: Cathy Avery @ 2017-04-17 19:44 UTC (permalink / raw)
To: Christoph Hellwig
Cc: sthemmin, linux-scsi, martin.petersen, haiyangz, linux-kernel,
devel, jejb
On 04/15/2017 10:06 AM, Christoph Hellwig wrote:
> Just add a singlethreaded workqueue for storvsc_handle_error and you'll
> get serialization for all error handling for free.
The problem I am seeing is that many work items can be queued up for the
same lun before it goes away. The single threaded queue would have to allow
for only a queue of one and no more. Either that or each work item for a
particular lun must have the same memory address so it gets
rejected if it you try to queue a remove to the same lun twice.
Maybe I am not understanding your suggestion correctly.
Thanks,
Cathy
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH] scsi: storvsc: Allow only one remove lun work item to be issued per lun
@ 2017-10-17 17:32 Cathy Avery
0 siblings, 0 replies; 6+ messages in thread
From: Cathy Avery @ 2017-10-17 17:32 UTC (permalink / raw)
To: kys, hch, haiyangz, jejb, martin.petersen, dan.carpenter
Cc: devel, linux-kernel, linux-scsi
When running multipath on a VM if all available paths go down
the driver can schedule large amounts of storvsc_remove_lun
work items to the same lun. In response to the failing paths
typically storvsc responds by taking host->scan_mutex and issuing
a TUR per lun. If there has been heavy IO to the failed device
all the failed IOs are returned from the host. A remove lun work
item is issued per failed IO. If the outstanding TURs have not been
completed in a timely manner the scan_mutex is never released or
released too late. Consequently the many remove lun work items are
not completed as scsi_remove_device also tries to take host->scan_mutex.
This results in dragging the VM down and sometimes completely.
This patch only allows one remove lun to be issued to a particular
lun while it is an instantiated member of the scsi stack.
Changes since v1:
Use single threaded workqueue to serialize work in
storvsc_handle_error [Christoph Hellwig]
Signed-off-by: Cathy Avery <cavery@redhat.com>
---
drivers/scsi/storvsc_drv.c | 27 ++++++++++++++++++++++-----
1 file changed, 22 insertions(+), 5 deletions(-)
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 5e7200f..6febcdb 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -486,6 +486,8 @@ struct hv_host_device {
unsigned int port;
unsigned char path;
unsigned char target;
+ struct workqueue_struct *handle_error_wq;
+ char work_q_name[20];
};
struct storvsc_scan_work {
@@ -922,6 +924,7 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
{
struct storvsc_scan_work *wrk;
void (*process_err_fn)(struct work_struct *work);
+ struct hv_host_device *host_dev = shost_priv(host);
bool do_work = false;
switch (SRB_STATUS(vm_srb->srb_status)) {
@@ -988,7 +991,7 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
wrk->lun = vm_srb->lun;
wrk->tgt_id = vm_srb->target_id;
INIT_WORK(&wrk->work, process_err_fn);
- schedule_work(&wrk->work);
+ queue_work(host_dev->handle_error_wq, &wrk->work);
}
@@ -1803,10 +1806,19 @@ static int storvsc_probe(struct hv_device *device,
if (stor_device->num_sc != 0)
host->nr_hw_queues = stor_device->num_sc + 1;
+ /*
+ * Set the error handler work queue.
+ */
+ snprintf(host_dev->work_q_name, sizeof(host_dev->work_q_name),
+ "storvsc_error_wq_%d", host->host_no);
+ host_dev->handle_error_wq =
+ create_singlethread_workqueue(host_dev->work_q_name);
+ if (!host_dev->handle_error_wq)
+ goto err_out2;
/* Register the HBA and start the scsi bus scan */
ret = scsi_add_host(host, &device->device);
if (ret != 0)
- goto err_out2;
+ goto err_out3;
if (!dev_is_ide) {
scsi_scan_host(host);
@@ -1815,7 +1827,7 @@ static int storvsc_probe(struct hv_device *device,
device->dev_instance.b[4]);
ret = scsi_add_device(host, 0, target, 0);
if (ret)
- goto err_out3;
+ goto err_out4;
}
#if IS_ENABLED(CONFIG_SCSI_FC_ATTRS)
if (host->transportt == fc_transport_template) {
@@ -1827,14 +1839,17 @@ static int storvsc_probe(struct hv_device *device,
fc_host_port_name(host) = stor_device->port_name;
stor_device->rport = fc_remote_port_add(host, 0, &ids);
if (!stor_device->rport)
- goto err_out3;
+ goto err_out4;
}
#endif
return 0;
-err_out3:
+err_out4:
scsi_remove_host(host);
+err_out3:
+ destroy_workqueue(host_dev->handle_error_wq);
+
err_out2:
/*
* Once we have connected with the host, we would need to
@@ -1858,6 +1873,7 @@ static int storvsc_remove(struct hv_device *dev)
{
struct storvsc_device *stor_device = hv_get_drvdata(dev);
struct Scsi_Host *host = stor_device->host;
+ struct hv_host_device *host_dev = shost_priv(host);
#if IS_ENABLED(CONFIG_SCSI_FC_ATTRS)
if (host->transportt == fc_transport_template) {
@@ -1865,6 +1881,7 @@ static int storvsc_remove(struct hv_device *dev)
fc_remove_host(host);
}
#endif
+ destroy_workqueue(host_dev->handle_error_wq);
scsi_remove_host(host);
storvsc_dev_remove(dev);
scsi_host_put(host);
--
2.5.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2017-10-17 17:32 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-15 13:43 [PATCH] scsi: storvsc: Allow only one remove lun work item to be issued per lun Cathy Avery
2017-04-15 14:06 ` Christoph Hellwig
2017-04-15 14:06 ` Christoph Hellwig
2017-04-17 19:44 ` Cathy Avery
2017-04-17 19:44 ` Cathy Avery
2017-10-17 17:32 Cathy Avery
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.