All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] Namespace handling updates and fixes
@ 2015-05-18 19:30 Keith Busch
  2015-05-18 19:30 ` [PATCH 1/4] blk: Define blk_set_queue_dying in header Keith Busch
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Keith Busch @ 2015-05-18 19:30 UTC (permalink / raw)


This makes it possible to take down namespaces dynamically and hot remove
mounted namespaces. It's combining a few presvious patches submitted
to the list, and this replaces them all since they're touching a lot of
the same code.

Keith Busch (4):
  blk: Define blk_set_queue_dying in header
  NVMe: Decouple nvmeq hctx from ns request queue
  NVMe: Automatic namespace rescan
  NVMe: Remove disks before shutdown

 drivers/block/nvme-core.c |  198 ++++++++++++++++++++++++++++++++-------------
 include/linux/blkdev.h    |    1 +
 include/linux/nvme.h      |    2 +
 include/uapi/linux/nvme.h |    4 +
 4 files changed, 148 insertions(+), 57 deletions(-)

-- 
1.7.10.4

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/4] blk: Define blk_set_queue_dying in header
  2015-05-18 19:30 [PATCH 0/4] Namespace handling updates and fixes Keith Busch
@ 2015-05-18 19:30 ` Keith Busch
  2015-05-18 19:30 ` [PATCH 2/4] NVMe: Decouple nvmeq hctx from ns request queue Keith Busch
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 10+ messages in thread
From: Keith Busch @ 2015-05-18 19:30 UTC (permalink / raw)


The 'blk_set_queue_dying()' function is an exported symbol, but it wasn't
defined in a header so wasn't usable to other components.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 include/linux/blkdev.h |    1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5d93a66..ed44ecd 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -998,6 +998,7 @@ extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn,
 extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
 extern struct request_queue *blk_init_allocated_queue(struct request_queue *,
 						      request_fn_proc *, spinlock_t *);
+extern void blk_set_queue_dying(struct request_queue *q);
 extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/4] NVMe: Decouple nvmeq hctx from ns request queue
  2015-05-18 19:30 [PATCH 0/4] Namespace handling updates and fixes Keith Busch
  2015-05-18 19:30 ` [PATCH 1/4] blk: Define blk_set_queue_dying in header Keith Busch
@ 2015-05-18 19:30 ` Keith Busch
  2015-05-19  7:46   ` Christoph Hellwig
  2015-05-18 19:30 ` [PATCH 3/4] NVMe: Automatic namespace rescan Keith Busch
  2015-05-18 19:30 ` [PATCH 4/4] NVMe: Remove disks before shutdown Keith Busch
  3 siblings, 1 reply; 10+ messages in thread
From: Keith Busch @ 2015-05-18 19:30 UTC (permalink / raw)


Preparing for namespaces to dynamically attach/detach, we can't tie
the nvmeq's hctx to a namespace request queue: they may be removed at
any time. This patch has the driver create an io request queue that
unaffiliated to any namespace so we always have a valid hctx for the
io queues.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/block/nvme-core.c |   26 ++++++++++++++++----------
 include/linux/nvme.h      |    1 +
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 85b8036..bd36d34 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -201,13 +201,6 @@ static int nvme_admin_init_request(void *data, struct request *req,
 	return 0;
 }
 
-static void nvme_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
-{
-	struct nvme_queue *nvmeq = hctx->driver_data;
-
-	nvmeq->hctx = NULL;
-}
-
 static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 			  unsigned int hctx_idx)
 {
@@ -1550,7 +1543,6 @@ static struct blk_mq_ops nvme_mq_admin_ops = {
 	.queue_rq	= nvme_admin_queue_rq,
 	.map_queue	= blk_mq_map_queue,
 	.init_hctx	= nvme_admin_init_hctx,
-	.exit_hctx	= nvme_exit_hctx,
 	.init_request	= nvme_admin_init_request,
 	.timeout	= nvme_timeout,
 };
@@ -1559,7 +1551,6 @@ static struct blk_mq_ops nvme_mq_ops = {
 	.queue_rq	= nvme_queue_rq,
 	.map_queue	= blk_mq_map_queue,
 	.init_hctx	= nvme_init_hctx,
-	.exit_hctx	= nvme_exit_hctx,
 	.init_request	= nvme_init_request,
 	.timeout	= nvme_timeout,
 };
@@ -2337,6 +2328,17 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	if (blk_mq_alloc_tag_set(&dev->tagset))
 		return 0;
 
+	dev->io_q = blk_mq_init_queue(&dev->tagset);
+	if (IS_ERR(dev->io_q)) {
+		blk_mq_free_tag_set(&dev->tagset);
+		return 0;
+	}
+	if (!blk_get_queue(dev->io_q)) {
+		blk_cleanup_queue(dev->io_q);
+		blk_mq_free_tag_set(&dev->tagset);
+		return 0;
+	}
+
 	for (i = 1; i <= nn; i++)
 		nvme_alloc_ns(dev, i);
 
@@ -2727,7 +2729,11 @@ static void nvme_free_dev(struct kref *kref)
 	put_device(dev->device);
 	nvme_free_namespaces(dev);
 	nvme_release_instance(dev);
-	blk_mq_free_tag_set(&dev->tagset);
+	if (!IS_ERR_OR_NULL(dev->io_q)) {
+		blk_cleanup_queue(dev->io_q);
+		blk_mq_free_tag_set(&dev->tagset);
+		blk_put_queue(dev->io_q);
+	}
 	blk_put_queue(dev->admin_q);
 	kfree(dev->queues);
 	kfree(dev->entry);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 8dbd05e..013c38f 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -71,6 +71,7 @@ struct nvme_dev {
 	struct list_head node;
 	struct nvme_queue **queues;
 	struct request_queue *admin_q;
+	struct request_queue *io_q;
 	struct blk_mq_tag_set tagset;
 	struct blk_mq_tag_set admin_tagset;
 	u32 __iomem *dbs;
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/4] NVMe: Automatic namespace rescan
  2015-05-18 19:30 [PATCH 0/4] Namespace handling updates and fixes Keith Busch
  2015-05-18 19:30 ` [PATCH 1/4] blk: Define blk_set_queue_dying in header Keith Busch
  2015-05-18 19:30 ` [PATCH 2/4] NVMe: Decouple nvmeq hctx from ns request queue Keith Busch
@ 2015-05-18 19:30 ` Keith Busch
  2015-05-18 19:30 ` [PATCH 4/4] NVMe: Remove disks before shutdown Keith Busch
  3 siblings, 0 replies; 10+ messages in thread
From: Keith Busch @ 2015-05-18 19:30 UTC (permalink / raw)


This has the driver rescan the device for namespace changes after each
device reset or namespace change asynchronous event. Namespaces may be
dynamically allocated and deleted or attached and detached. There could
potentially be many of these that we don't want polluting /dev/ with
unusable block handles, so this will delete the disks if the namespace
is not active as indicated by the response from identify namespace. This
will it also skip adding the disk if no capacity is assigned to the
namespace in the first place.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/block/nvme-core.c |  158 +++++++++++++++++++++++++++++++++------------
 include/linux/nvme.h      |    1 +
 include/uapi/linux/nvme.h |    4 ++
 3 files changed, 123 insertions(+), 40 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index bd36d34..767ea54 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -300,9 +300,16 @@ static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
 
 	if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ)
 		++nvmeq->dev->event_limit;
-	if (status == NVME_SC_SUCCESS)
-		dev_warn(nvmeq->q_dmadev,
-			"async event result %08x\n", result);
+	if (status != NVME_SC_SUCCESS)
+		return;
+
+	switch (result & 0xff07) {
+	case NVME_AER_NOTICE_NS_CHANGED:
+		dev_info(nvmeq->q_dmadev, "rescanning\n");
+		schedule_work(&nvmeq->dev->scan_work);
+	default:
+		dev_warn(nvmeq->q_dmadev, "async event result %08x\n", result);
+	}
 }
 
 static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
@@ -1990,11 +1997,14 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 		return 0;
 	}
 	if (nvme_identify(dev, ns->ns_id, 0, dma_addr)) {
-		dev_warn(&dev->pci_dev->dev,
-			"identify failed ns:%d, setting capacity to 0\n",
-			ns->ns_id);
+		dev_warn(&dev->pci_dev->dev, "identify failed ns:%d\n",
+								ns->ns_id);
 		memset(id, 0, sizeof(*id));
 	}
+	if (id->ncap == 0) {
+		dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr);
+		return -ENODEV;
+	}
 
 	old_ms = ns->ms;
 	lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
@@ -2027,7 +2037,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 								!ns->ext)
 		nvme_init_integrity(ns);
 
-	if (id->ncap == 0 || (ns->ms && !blk_get_integrity(disk)))
+	if (ns->ms && !blk_get_integrity(disk))
 		set_capacity(disk, 0);
 	else
 		set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
@@ -2135,18 +2145,17 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
 	disk->flags = GENHD_FL_EXT_DEVT;
 	sprintf(disk->disk_name, "nvme%dn%d", dev->instance, nsid);
 
-	/*
-	 * Initialize capacity to 0 until we establish the namespace format and
-	 * setup integrity extentions if necessary. The revalidate_disk after
-	 * add_disk allows the driver to register with integrity if the format
-	 * requires it.
-	 */
-	set_capacity(disk, 0);
-	nvme_revalidate_disk(ns->disk);
+	if (nvme_revalidate_disk(ns->disk))
+		goto out_free_disk;
+
 	add_disk(ns->disk);
 	if (ns->ms)
 		revalidate_disk(ns->disk);
 	return;
+
+ out_free_disk:
+	kfree(disk);
+	list_del(&ns->list);
  out_free_queue:
 	blk_cleanup_queue(ns->queue);
  out_free_ns:
@@ -2264,6 +2273,91 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	return result;
 }
 
+static void nvme_free_namespace(struct nvme_ns *ns)
+{
+	list_del(&ns->list);
+
+	spin_lock(&dev_list_lock);
+	ns->disk->private_data = NULL;
+	spin_unlock(&dev_list_lock);
+
+	put_disk(ns->disk);
+	kfree(ns);
+}
+
+static struct nvme_ns *nvme_find_ns(struct nvme_dev *dev, unsigned nsid)
+{
+	struct nvme_ns *ns;
+
+	list_for_each_entry(ns, &dev->namespaces, list)
+		if (ns->ns_id == nsid)
+			return ns;
+	return NULL;
+}
+
+static inline bool nvme_io_incapable(struct nvme_dev *dev)
+{
+	return (!dev->bar || readl(&dev->bar->csts) & NVME_CSTS_CFS ||
+							dev->online_queues < 2);
+}
+
+static void nvme_ns_remove(struct nvme_ns *ns)
+{
+	bool kill = nvme_io_incapable(ns->dev) && !blk_queue_dying(ns->queue);
+
+	if (kill)
+		blk_set_queue_dying(ns->queue);
+	if (ns->disk->flags & GENHD_FL_UP) {
+		if (blk_get_integrity(ns->disk))
+			blk_integrity_unregister(ns->disk);
+		del_gendisk(ns->disk);
+	}
+	if (kill || !blk_queue_dying(ns->queue)) {
+		blk_mq_abort_requeue_list(ns->queue);
+		blk_cleanup_queue(ns->queue);
+        }
+}
+
+static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn)
+{
+	struct nvme_ns *ns;
+	unsigned i;
+
+	for (i = 1; i <= nn; i++) {
+		ns = nvme_find_ns(dev, i);
+		if (ns) {
+			if (revalidate_disk(ns->disk)) {
+				nvme_ns_remove(ns);
+				nvme_free_namespace(ns);
+			}
+		} else
+			nvme_alloc_ns(dev, i);
+	}
+}
+
+static void nvme_dev_scan(struct work_struct *work)
+{
+	struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work);
+	unsigned nn;
+	void *mem;
+	struct nvme_id_ctrl *ctrl;
+	dma_addr_t dma_addr;
+
+	if (IS_ERR_OR_NULL(dev->io_q))
+		return;
+
+	mem = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr, GFP_KERNEL);
+	if (!mem)
+		return;
+	if (nvme_identify(dev, 0, 1, dma_addr))
+		goto free;
+	ctrl = mem;
+	nn = le32_to_cpup(&ctrl->nn);
+	nvme_scan_namespaces(dev, nn);
+ free:
+	dma_free_coherent(&dev->pci_dev->dev, 4096, mem, dma_addr);
+}
+
 /*
  * Return: error value if an error occurred setting up the queues or calling
  * Identify Device.  0 if these succeeded, even if adding some of the
@@ -2274,7 +2368,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 {
 	struct pci_dev *pdev = dev->pci_dev;
 	int res;
-	unsigned nn, i;
+	unsigned nn;
 	struct nvme_id_ctrl *ctrl;
 	void *mem;
 	dma_addr_t dma_addr;
@@ -2339,9 +2433,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 		return 0;
 	}
 
-	for (i = 1; i <= nn; i++)
-		nvme_alloc_ns(dev, i);
-
+	nvme_scan_namespaces(dev, nn);
 	return 0;
 }
 
@@ -2639,17 +2731,8 @@ static void nvme_dev_remove(struct nvme_dev *dev)
 {
 	struct nvme_ns *ns;
 
-	list_for_each_entry(ns, &dev->namespaces, list) {
-		if (ns->disk->flags & GENHD_FL_UP) {
-			if (blk_get_integrity(ns->disk))
-				blk_integrity_unregister(ns->disk);
-			del_gendisk(ns->disk);
-		}
-		if (!blk_queue_dying(ns->queue)) {
-			blk_mq_abort_requeue_list(ns->queue);
-			blk_cleanup_queue(ns->queue);
-		}
-	}
+	list_for_each_entry(ns, &dev->namespaces, list)
+		nvme_ns_remove(ns);
 }
 
 static int nvme_setup_prp_pools(struct nvme_dev *dev)
@@ -2709,16 +2792,8 @@ static void nvme_free_namespaces(struct nvme_dev *dev)
 {
 	struct nvme_ns *ns, *next;
 
-	list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
-		list_del(&ns->list);
-
-		spin_lock(&dev_list_lock);
-		ns->disk->private_data = NULL;
-		spin_unlock(&dev_list_lock);
-
-		put_disk(ns->disk);
-		kfree(ns);
-	}
+	list_for_each_entry_safe(ns, next, &dev->namespaces, list)
+		nvme_free_namespace(ns);
 }
 
 static void nvme_free_dev(struct kref *kref)
@@ -2903,6 +2978,7 @@ static int nvme_dev_resume(struct nvme_dev *dev)
 		spin_unlock(&dev_list_lock);
 	} else {
 		nvme_unfreeze_queues(dev);
+		schedule_work(&dev->scan_work);
 		nvme_set_irq_hints(dev);
 	}
 	return 0;
@@ -2981,6 +3057,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	get_device(dev->device);
 
 	INIT_LIST_HEAD(&dev->node);
+	INIT_WORK(&dev->scan_work, nvme_dev_scan);
 	INIT_WORK(&dev->probe_work, nvme_async_probe);
 	schedule_work(&dev->probe_work);
 	return 0;
@@ -3047,6 +3124,7 @@ static void nvme_remove(struct pci_dev *pdev)
 
 	pci_set_drvdata(pdev, NULL);
 	flush_work(&dev->probe_work);
+	flush_work(&dev->scan_work);
 	flush_work(&dev->reset_work);
 	nvme_dev_shutdown(dev);
 	nvme_dev_remove(dev);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 013c38f..409d191 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -93,6 +93,7 @@ struct nvme_dev {
 	work_func_t reset_workfn;
 	struct work_struct reset_work;
 	struct work_struct probe_work;
+	struct work_struct scan_work;
 	char name[12];
 	char serial[20];
 	char model[40];
diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
index aef9a81..11f9cdc 100644
--- a/include/uapi/linux/nvme.h
+++ b/include/uapi/linux/nvme.h
@@ -179,6 +179,10 @@ enum {
 	NVME_SMART_CRIT_VOLATILE_MEMORY	= 1 << 4,
 };
 
+enum {
+	NVME_AER_NOTICE_NS_CHANGED	= 0x0002,
+};
+
 struct nvme_lba_range_type {
 	__u8			type;
 	__u8			attributes;
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4/4] NVMe: Remove disks before shutdown
  2015-05-18 19:30 [PATCH 0/4] Namespace handling updates and fixes Keith Busch
                   ` (2 preceding siblings ...)
  2015-05-18 19:30 ` [PATCH 3/4] NVMe: Automatic namespace rescan Keith Busch
@ 2015-05-18 19:30 ` Keith Busch
  3 siblings, 0 replies; 10+ messages in thread
From: Keith Busch @ 2015-05-18 19:30 UTC (permalink / raw)


On orderly removal, we need to shut down the controller after disks are
deleted to prevent deadlock on a filesystem sync.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/block/nvme-core.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 767ea54..62a147d 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -2960,8 +2960,8 @@ static void nvme_remove_disks(struct work_struct *ws)
 {
 	struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
 
-	nvme_free_queues(dev, 1);
 	nvme_dev_remove(dev);
+	nvme_free_queues(dev, 1);
 }
 
 static int nvme_dev_resume(struct nvme_dev *dev)
@@ -3126,8 +3126,8 @@ static void nvme_remove(struct pci_dev *pdev)
 	flush_work(&dev->probe_work);
 	flush_work(&dev->scan_work);
 	flush_work(&dev->reset_work);
-	nvme_dev_shutdown(dev);
 	nvme_dev_remove(dev);
+	nvme_dev_shutdown(dev);
 	nvme_dev_remove_admin(dev);
 	device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
 	nvme_free_queues(dev, 0);
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/4] NVMe: Decouple nvmeq hctx from ns request queue
  2015-05-18 19:30 ` [PATCH 2/4] NVMe: Decouple nvmeq hctx from ns request queue Keith Busch
@ 2015-05-19  7:46   ` Christoph Hellwig
  2015-05-19 15:35     ` Keith Busch
  0 siblings, 1 reply; 10+ messages in thread
From: Christoph Hellwig @ 2015-05-19  7:46 UTC (permalink / raw)


On Mon, May 18, 2015@01:30:21PM -0600, Keith Busch wrote:
> Preparing for namespaces to dynamically attach/detach, we can't tie
> the nvmeq's hctx to a namespace request queue: they may be removed at
> any time. This patch has the driver create an io request queue that
> unaffiliated to any namespace so we always have a valid hctx for the
> io queues.

I don't think this is correct.  The better fix is to stop storing
the (or rather a hctx) in the nvmeq.  The current code only works by
chance as is just deferenences the tags pointer in the hctx.  So
just add a helper to find the right struct blk_mq_tags for a queue index
and change the driver to use that.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 2/4] NVMe: Decouple nvmeq hctx from ns request queue
  2015-05-19  7:46   ` Christoph Hellwig
@ 2015-05-19 15:35     ` Keith Busch
  2015-05-19 15:51       ` Keith Busch
  2015-05-19 16:14       ` Christoph Hellwig
  0 siblings, 2 replies; 10+ messages in thread
From: Keith Busch @ 2015-05-19 15:35 UTC (permalink / raw)


On Tue, 19 May 2015, Christoph Hellwig wrote:
> On Mon, May 18, 2015@01:30:21PM -0600, Keith Busch wrote:
>> Preparing for namespaces to dynamically attach/detach, we can't tie
>> the nvmeq's hctx to a namespace request queue: they may be removed at
>> any time. This patch has the driver create an io request queue that
>> unaffiliated to any namespace so we always have a valid hctx for the
>> io queues.
>
> I don't think this is correct.  The better fix is to stop storing
> the (or rather a hctx) in the nvmeq.  The current code only works by
> chance as is just deferenences the tags pointer in the hctx.  So
> just add a helper to find the right struct blk_mq_tags for a queue index
> and change the driver to use that.

Agreed this isn't the ideal fix. I'm just trying to get this working
in the short term and the hctx is the most convienient thing to use
for couple reasons: it has the cpumask required for the irq affinity
hints, and the blk-mq layer takes an hctx to iterate through busy tags,
and I clearly don't want h/w IO queue tied to a dynamic namespace's
request_queue's hctx.

I'm actually not sure why every request queue has their own hardware
context. Shouldn't it be an artifact of the tagset rather than the
request queue? I thought I might have a chance of intercepting a late
4.1 rc if I didn't mess with the block layer, though.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 2/4] NVMe: Decouple nvmeq hctx from ns request queue
  2015-05-19 15:35     ` Keith Busch
@ 2015-05-19 15:51       ` Keith Busch
  2015-05-19 16:15         ` Christoph Hellwig
  2015-05-19 16:14       ` Christoph Hellwig
  1 sibling, 1 reply; 10+ messages in thread
From: Keith Busch @ 2015-05-19 15:51 UTC (permalink / raw)


On Tue, 19 May 2015, Keith Busch wrote:
> <snip> and the blk-mq layer takes an hctx to iterate through busy tags

> I'm actually not sure why every request queue has their own hardware
> context. Shouldn't it be an artifact of the tagset rather than the
> request queue? I thought I might have a chance of intercepting a late
> 4.1 rc if I didn't mess with the block layer, though.

Oh no!, the blk_mq_tag_busy_iter usage is broken in the nvme driver for
anything with more than one namespace, and broken with the implementation
I suggested. It only invokes the callback if the hctx is assocaited with
the same request queue. Must fix, maybe add a blk-mq api to iterate the
tagset rather than hctx.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 2/4] NVMe: Decouple nvmeq hctx from ns request queue
  2015-05-19 15:35     ` Keith Busch
  2015-05-19 15:51       ` Keith Busch
@ 2015-05-19 16:14       ` Christoph Hellwig
  1 sibling, 0 replies; 10+ messages in thread
From: Christoph Hellwig @ 2015-05-19 16:14 UTC (permalink / raw)


On Tue, May 19, 2015@03:35:19PM +0000, Keith Busch wrote:
> I'm actually not sure why every request queue has their own hardware
> context. Shouldn't it be an artifact of the tagset rather than the
> request queue?

That might make sense, but separating the hctx from the queue will
involve a fair amount of work.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 2/4] NVMe: Decouple nvmeq hctx from ns request queue
  2015-05-19 15:51       ` Keith Busch
@ 2015-05-19 16:15         ` Christoph Hellwig
  0 siblings, 0 replies; 10+ messages in thread
From: Christoph Hellwig @ 2015-05-19 16:15 UTC (permalink / raw)


On Tue, May 19, 2015@03:51:32PM +0000, Keith Busch wrote:
> Oh no!, the blk_mq_tag_busy_iter usage is broken in the nvme driver for
> anything with more than one namespace, and broken with the implementation
> I suggested. It only invokes the callback if the hctx is assocaited with
> the same request queue. Must fix, maybe add a blk-mq api to iterate the
> tagset rather than hctx.

Yes, similar to how we pass a tag_set to blk_mq_tag_to_rq.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2015-05-19 16:15 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-18 19:30 [PATCH 0/4] Namespace handling updates and fixes Keith Busch
2015-05-18 19:30 ` [PATCH 1/4] blk: Define blk_set_queue_dying in header Keith Busch
2015-05-18 19:30 ` [PATCH 2/4] NVMe: Decouple nvmeq hctx from ns request queue Keith Busch
2015-05-19  7:46   ` Christoph Hellwig
2015-05-19 15:35     ` Keith Busch
2015-05-19 15:51       ` Keith Busch
2015-05-19 16:15         ` Christoph Hellwig
2015-05-19 16:14       ` Christoph Hellwig
2015-05-18 19:30 ` [PATCH 3/4] NVMe: Automatic namespace rescan Keith Busch
2015-05-18 19:30 ` [PATCH 4/4] NVMe: Remove disks before shutdown Keith Busch

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.