All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHv5 0/6] NVMe: suspend resume
@ 2013-07-15 21:02 Keith Busch
  2013-07-15 21:02 ` [PATCHv5 1/6] NVMe: Group pci related actions in functions Keith Busch
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Keith Busch @ 2013-07-15 21:02 UTC (permalink / raw)


... yet another version.

v4->v5:

Renamed 'nvme_free_queue_mem' to 'nvme_free_queue' since it does more
than simply freeing the queue's memory; the bio_list is cleared here
instead of when disabling the queue since it doesn't appear anything
stops IO from being submitted while the device is being suspended.
The driver should be able to complete these bio requests after resume,
so we clear the bio_list only when we don't expect the device to start
handling IO again.

Moved the check for freeing previously allocated IO queues that are
no longer usable to the 'suspend/resume' patch since it would not be
possible to hit that code in the patch where it was initially added.

Added the patch to handle ioremap failure at the end of this change set
since we can use the newly added q_suspended flag to know whether or
not to call free_irq on the admin queue on initialization failure.

Fixed previous merge error setting dma coherent mask twice.

A few checkpatch issues.

Keith Busch (6):
  NVMe: Group pci related actions in functions
  NVMe: Separate queue alloc/free from create/delete
  NVMe: Separate controller init from disk discovery
  NVMe: Use normal shutdown
  NVMe: Add pci suspend/resume driver callbacks
  NVMe: Handle ioremap failure

 drivers/block/nvme-core.c |  437 +++++++++++++++++++++++++++++++--------------
 include/linux/nvme.h      |    2 +
 2 files changed, 308 insertions(+), 131 deletions(-)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCHv5 1/6] NVMe: Group pci related actions in functions
  2013-07-15 21:02 [PATCHv5 0/6] NVMe: suspend resume Keith Busch
@ 2013-07-15 21:02 ` Keith Busch
  2013-07-15 21:02 ` [PATCHv5 2/6] NVMe: Separate queue alloc/free from create/delete Keith Busch
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Keith Busch @ 2013-07-15 21:02 UTC (permalink / raw)


This will make it easier to reuse these outside probe/remove.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/block/nvme-core.c |  112 ++++++++++++++++++++++++++------------------
 1 files changed, 66 insertions(+), 46 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 7de80bb..678f505 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1194,9 +1194,6 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	u64 cap = readq(&dev->bar->cap);
 	struct nvme_queue *nvmeq;
 
-	dev->dbs = ((void __iomem *)dev->bar) + 4096;
-	dev->db_stride = NVME_CAP_STRIDE(cap);
-
 	result = nvme_disable_ctrl(dev, cap);
 	if (result < 0)
 		return result;
@@ -1834,6 +1831,61 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	return res;
 }
 
+static int nvme_dev_map(struct nvme_dev *dev)
+{
+	int bars, result = -ENOMEM;
+	struct pci_dev *pdev = dev->pci_dev;
+
+	if (pci_enable_device_mem(pdev))
+		return result;
+
+	dev->entry[0].vector = pdev->irq;
+	pci_set_master(pdev);
+	bars = pci_select_bars(pdev, IORESOURCE_MEM);
+	if (pci_request_selected_regions(pdev, bars, "nvme"))
+		goto disable_pci;
+
+	if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)))
+		dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
+	else if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)))
+		dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
+	else
+		goto disable_pci;
+
+	pci_set_drvdata(pdev, dev);
+	dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
+	if (!dev->bar)
+		goto disable;
+
+	dev->db_stride = NVME_CAP_STRIDE(readq(&dev->bar->cap));
+	dev->dbs = ((void __iomem *)dev->bar) + 4096;
+
+	return 0;
+
+ disable:
+	pci_release_regions(pdev);
+ disable_pci:
+	pci_disable_device(pdev);
+	return result;
+}
+
+static void nvme_dev_unmap(struct nvme_dev *dev)
+{
+	if (dev->pci_dev->msi_enabled)
+		pci_disable_msi(dev->pci_dev);
+	else if (dev->pci_dev->msix_enabled)
+		pci_disable_msix(dev->pci_dev);
+
+	if (dev->bar) {
+		iounmap(dev->bar);
+		dev->bar = NULL;
+	}
+
+	pci_release_regions(dev->pci_dev);
+	if (pci_is_enabled(dev->pci_dev))
+		pci_disable_device(dev->pci_dev);
+}
+
 static int nvme_dev_remove(struct nvme_dev *dev)
 {
 	struct nvme_ns *ns, *next;
@@ -1910,15 +1962,9 @@ static void nvme_free_dev(struct kref *kref)
 {
 	struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
 	nvme_dev_remove(dev);
-	if (dev->pci_dev->msi_enabled)
-		pci_disable_msi(dev->pci_dev);
-	else if (dev->pci_dev->msix_enabled)
-		pci_disable_msix(dev->pci_dev);
-	iounmap(dev->bar);
+	nvme_dev_unmap(dev);
 	nvme_release_instance(dev);
 	nvme_release_prp_pools(dev);
-	pci_disable_device(dev->pci_dev);
-	pci_release_regions(dev->pci_dev);
 	kfree(dev->queues);
 	kfree(dev->entry);
 	kfree(dev);
@@ -1961,7 +2007,7 @@ static const struct file_operations nvme_dev_fops = {
 
 static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
-	int bars, result = -ENOMEM;
+	int result = -ENOMEM;
 	struct nvme_dev *dev;
 
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
@@ -1976,39 +2022,19 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (!dev->queues)
 		goto free;
 
-	if (pci_enable_device_mem(pdev))
-		goto free;
-	pci_set_master(pdev);
-	bars = pci_select_bars(pdev, IORESOURCE_MEM);
-	if (pci_request_selected_regions(pdev, bars, "nvme"))
-		goto disable;
-
 	INIT_LIST_HEAD(&dev->namespaces);
 	dev->pci_dev = pdev;
-	pci_set_drvdata(pdev, dev);
-
-	if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)))
-		dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
-	else if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)))
-		dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
-	else
-		goto disable;
-
 	result = nvme_set_instance(dev);
 	if (result)
-		goto disable;
-
-	dev->entry[0].vector = pdev->irq;
+		goto free;
 
 	result = nvme_setup_prp_pools(dev);
 	if (result)
-		goto disable_msix;
+		goto release;
 
-	dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
-	if (!dev->bar) {
-		result = -ENOMEM;
-		goto disable_msix;
-	}
+	result = nvme_dev_map(dev);
+	if (result)
+		goto release_pools;
 
 	result = nvme_configure_admin_queue(dev);
 	if (result)
@@ -2044,17 +2070,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	nvme_free_queues(dev);
  unmap:
-	iounmap(dev->bar);
- disable_msix:
-	if (dev->pci_dev->msi_enabled)
-		pci_disable_msi(dev->pci_dev);
-	else if (dev->pci_dev->msix_enabled)
-		pci_disable_msix(dev->pci_dev);
-	nvme_release_instance(dev);
+	nvme_dev_unmap(dev);
+ release_pools:
 	nvme_release_prp_pools(dev);
- disable:
-	pci_disable_device(pdev);
-	pci_release_regions(pdev);
+ release:
+	nvme_release_instance(dev);
  free:
 	kfree(dev->queues);
 	kfree(dev->entry);
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCHv5 2/6] NVMe: Separate queue alloc/free from create/delete
  2013-07-15 21:02 [PATCHv5 0/6] NVMe: suspend resume Keith Busch
  2013-07-15 21:02 ` [PATCHv5 1/6] NVMe: Group pci related actions in functions Keith Busch
@ 2013-07-15 21:02 ` Keith Busch
  2013-07-15 21:02 ` [PATCHv5 3/6] NVMe: Separate controller init from disk discovery Keith Busch
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Keith Busch @ 2013-07-15 21:02 UTC (permalink / raw)


This separates nvme queue allocation from creation, and queue deletion
from freeing. This is so that we may in the future temporarily disable
queues and reuse the same memory when bringing them back online, like
coming back from suspend state.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/block/nvme-core.c |  133 ++++++++++++++++++++++++++++++++-------------
 1 files changed, 94 insertions(+), 39 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 678f505..6378e2d 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -82,6 +82,7 @@ struct nvme_queue {
 	u16 cq_head;
 	u8 cq_phase;
 	u8 cqe_seen;
+	u8 q_suspended;
 	unsigned long cmdid_data[];
 };
 
@@ -117,6 +118,11 @@ static struct nvme_cmd_info *nvme_cmd_info(struct nvme_queue *nvmeq)
 	return (void *)&nvmeq->cmdid_data[BITS_TO_LONGS(nvmeq->q_depth)];
 }
 
+static unsigned nvme_queue_extra(int depth)
+{
+	return DIV_ROUND_UP(depth, 8) + (depth * sizeof(struct nvme_cmd_info));
+}
+
 /**
  * alloc_cmdid() - Allocate a Command ID
  * @nvmeq: The queue that will be used for this command
@@ -787,7 +793,7 @@ static void nvme_make_request(struct request_queue *q, struct bio *bio)
 	int result = -EBUSY;
 
 	spin_lock_irq(&nvmeq->q_lock);
-	if (bio_list_empty(&nvmeq->sq_cong))
+	if (!nvmeq->q_suspended && bio_list_empty(&nvmeq->sq_cong))
 		result = nvme_submit_bio_queue(nvmeq, ns, bio);
 	if (unlikely(result)) {
 		if (bio_list_empty(&nvmeq->sq_cong))
@@ -1021,8 +1027,15 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout)
 	}
 }
 
-static void nvme_free_queue_mem(struct nvme_queue *nvmeq)
+static void nvme_free_queue(struct nvme_queue *nvmeq)
 {
+	spin_lock_irq(&nvmeq->q_lock);
+	while (bio_list_peek(&nvmeq->sq_cong)) {
+		struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
+		bio_endio(bio, -EIO);
+	}
+	spin_unlock_irq(&nvmeq->q_lock);
+
 	dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
 				(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
 	dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
@@ -1030,17 +1043,28 @@ static void nvme_free_queue_mem(struct nvme_queue *nvmeq)
 	kfree(nvmeq);
 }
 
-static void nvme_free_queue(struct nvme_dev *dev, int qid)
+static void nvme_free_queues(struct nvme_dev *dev)
+{
+	int i;
+
+	for (i = dev->queue_count - 1; i >= 0; i--) {
+		nvme_free_queue(dev->queues[i]);
+		dev->queue_count--;
+		dev->queues[i] = NULL;
+	}
+}
+
+static void nvme_disable_queue(struct nvme_dev *dev, int qid)
 {
 	struct nvme_queue *nvmeq = dev->queues[qid];
 	int vector = dev->entry[nvmeq->cq_vector].vector;
 
 	spin_lock_irq(&nvmeq->q_lock);
-	nvme_cancel_ios(nvmeq, false);
-	while (bio_list_peek(&nvmeq->sq_cong)) {
-		struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
-		bio_endio(bio, -EIO);
+	if (nvmeq->q_suspended) {
+		spin_unlock_irq(&nvmeq->q_lock);
+		return;
 	}
+	nvmeq->q_suspended = 1;
 	spin_unlock_irq(&nvmeq->q_lock);
 
 	irq_set_affinity_hint(vector, NULL);
@@ -1052,15 +1076,17 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid)
 		adapter_delete_cq(dev, qid);
 	}
 
-	nvme_free_queue_mem(nvmeq);
+	spin_lock_irq(&nvmeq->q_lock);
+	nvme_process_cq(nvmeq);
+	nvme_cancel_ios(nvmeq, false);
+	spin_unlock_irq(&nvmeq->q_lock);
 }
 
 static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 							int depth, int vector)
 {
 	struct device *dmadev = &dev->pci_dev->dev;
-	unsigned extra = DIV_ROUND_UP(depth, 8) + (depth *
-						sizeof(struct nvme_cmd_info));
+	unsigned extra = nvme_queue_extra(depth);
 	struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL);
 	if (!nvmeq)
 		return NULL;
@@ -1087,6 +1113,8 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 	nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
 	nvmeq->q_depth = depth;
 	nvmeq->cq_vector = vector;
+	nvmeq->q_suspended = 1;
+	dev->queue_count++;
 
 	return nvmeq;
 
@@ -1110,18 +1138,29 @@ static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq,
 				IRQF_DISABLED | IRQF_SHARED, name, nvmeq);
 }
 
-static struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, int qid,
-					    int cq_size, int vector)
+static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
 {
-	int result;
-	struct nvme_queue *nvmeq = nvme_alloc_queue(dev, qid, cq_size, vector);
+	struct nvme_dev *dev = nvmeq->dev;
+	unsigned extra = nvme_queue_extra(nvmeq->q_depth);
 
-	if (!nvmeq)
-		return ERR_PTR(-ENOMEM);
+	nvmeq->sq_tail = 0;
+	nvmeq->cq_head = 0;
+	nvmeq->cq_phase = 1;
+	nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
+	memset(nvmeq->cmdid_data, 0, extra);
+	memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
+	nvme_cancel_ios(nvmeq, false);
+	nvmeq->q_suspended = 0;
+}
+
+static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
+{
+	struct nvme_dev *dev = nvmeq->dev;
+	int result;
 
 	result = adapter_alloc_cq(dev, qid, nvmeq);
 	if (result < 0)
-		goto free_nvmeq;
+		return result;
 
 	result = adapter_alloc_sq(dev, qid, nvmeq);
 	if (result < 0)
@@ -1131,19 +1170,17 @@ static struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, int qid,
 	if (result < 0)
 		goto release_sq;
 
-	return nvmeq;
+	spin_lock(&nvmeq->q_lock);
+	nvme_init_queue(nvmeq, qid);
+	spin_unlock(&nvmeq->q_lock);
+
+	return result;
 
  release_sq:
 	adapter_delete_sq(dev, qid);
  release_cq:
 	adapter_delete_cq(dev, qid);
- free_nvmeq:
-	dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
-				(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
-	dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
-					nvmeq->sq_cmds, nvmeq->sq_dma_addr);
-	kfree(nvmeq);
-	return ERR_PTR(result);
+	return result;
 }
 
 static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled)
@@ -1224,10 +1261,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 		goto free_q;
 
 	dev->queues[0] = nvmeq;
+	spin_lock(&nvmeq->q_lock);
+	nvme_init_queue(nvmeq, 0);
+	spin_unlock(&nvmeq->q_lock);
 	return result;
 
  free_q:
-	nvme_free_queue_mem(nvmeq);
+	nvme_free_queue(nvmeq);
 	return result;
 }
 
@@ -1388,6 +1428,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	put_nvmeq(nvmeq);
 	if (length != (io.nblocks + 1) << ns->lba_shift)
 		status = -ENOMEM;
+	else if (!nvmeq || nvmeq->q_suspended)
+		status = -EBUSY;
 	else
 		status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);
 
@@ -1539,9 +1581,12 @@ static int nvme_kthread(void *data)
 				if (!nvmeq)
 					continue;
 				spin_lock_irq(&nvmeq->q_lock);
+				if (nvmeq->q_suspended)
+					goto unlock;
 				nvme_process_cq(nvmeq);
 				nvme_cancel_ios(nvmeq, true);
 				nvme_resubmit_bios(nvmeq);
+ unlock:
 				spin_unlock_irq(&nvmeq->q_lock);
 			}
 		}
@@ -1727,7 +1772,8 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	nr_io_queues = vecs;
 
 	result = queue_request_irq(dev, dev->queues[0], "nvme admin");
-	/* XXX: handle failure here */
+	if (result)
+		goto free_queues;
 
 	cpu = cpumask_first(cpu_online_mask);
 	for (i = 0; i < nr_io_queues; i++) {
@@ -1738,10 +1784,11 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1,
 								NVME_Q_DEPTH);
 	for (i = 0; i < nr_io_queues; i++) {
-		dev->queues[i + 1] = nvme_create_queue(dev, i + 1, q_depth, i);
-		if (IS_ERR(dev->queues[i + 1]))
-			return PTR_ERR(dev->queues[i + 1]);
-		dev->queue_count++;
+		dev->queues[i + 1] = nvme_alloc_queue(dev, i + 1, q_depth, i);
+		if (!dev->queues[i + 1]) {
+			result = -ENOMEM;
+			goto free_queues;
+		}
 	}
 
 	for (; i < num_possible_cpus(); i++) {
@@ -1749,15 +1796,20 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 		dev->queues[i + 1] = dev->queues[target + 1];
 	}
 
-	return 0;
-}
+	for (i = 1; i < dev->queue_count; i++) {
+		result = nvme_create_queue(dev->queues[i], i);
+		if (result) {
+			for (--i; i > 0; i--)
+				nvme_disable_queue(dev, i);
+			goto free_queues;
+		}
+	}
 
-static void nvme_free_queues(struct nvme_dev *dev)
-{
-	int i;
+	return 0;
 
-	for (i = dev->queue_count - 1; i >= 0; i--)
-		nvme_free_queue(dev, i);
+ free_queues:
+	nvme_free_queues(dev);
+	return result;
 }
 
 /*
@@ -1889,6 +1941,10 @@ static void nvme_dev_unmap(struct nvme_dev *dev)
 static int nvme_dev_remove(struct nvme_dev *dev)
 {
 	struct nvme_ns *ns, *next;
+	int i;
+
+	for (i = dev->queue_count - 1; i >= 0; i--)
+		nvme_disable_queue(dev, i);
 
 	spin_lock(&dev_list_lock);
 	list_del(&dev->node);
@@ -2039,7 +2095,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	result = nvme_configure_admin_queue(dev);
 	if (result)
 		goto unmap;
-	dev->queue_count++;
 
 	spin_lock(&dev_list_lock);
 	list_add(&dev->node, &dev_list);
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCHv5 3/6] NVMe: Separate controller init from disk discovery
  2013-07-15 21:02 [PATCHv5 0/6] NVMe: suspend resume Keith Busch
  2013-07-15 21:02 ` [PATCHv5 1/6] NVMe: Group pci related actions in functions Keith Busch
  2013-07-15 21:02 ` [PATCHv5 2/6] NVMe: Separate queue alloc/free from create/delete Keith Busch
@ 2013-07-15 21:02 ` Keith Busch
  2013-07-15 21:02 ` [PATCHv5 4/6] NVMe: Use normal shutdown Keith Busch
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Keith Busch @ 2013-07-15 21:02 UTC (permalink / raw)


This combines the controller initialization into one function, removing
IO queue setup from namespace discovery, and creates symetric functions
for device removal. The controller start and shutdown functions can now
be called from resume/suspend context as well as probe/remove.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/block/nvme-core.c |   77 +++++++++++++++++++++++++++-----------------
 1 files changed, 47 insertions(+), 30 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 6378e2d..78247e5 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1829,10 +1829,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	dma_addr_t dma_addr;
 	int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
 
-	res = nvme_setup_io_queues(dev);
-	if (res)
-		return res;
-
 	mem = dma_alloc_coherent(&dev->pci_dev->dev, 8192, &dma_addr,
 								GFP_KERNEL);
 	if (!mem)
@@ -1938,27 +1934,29 @@ static void nvme_dev_unmap(struct nvme_dev *dev)
 		pci_disable_device(dev->pci_dev);
 }
 
-static int nvme_dev_remove(struct nvme_dev *dev)
+static void nvme_dev_shutdown(struct nvme_dev *dev)
 {
-	struct nvme_ns *ns, *next;
 	int i;
 
 	for (i = dev->queue_count - 1; i >= 0; i--)
 		nvme_disable_queue(dev, i);
 
 	spin_lock(&dev_list_lock);
-	list_del(&dev->node);
+	list_del_init(&dev->node);
 	spin_unlock(&dev_list_lock);
 
+	nvme_dev_unmap(dev);
+}
+
+static void nvme_dev_remove(struct nvme_dev *dev)
+{
+	struct nvme_ns *ns, *next;
+
 	list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
 		list_del(&ns->list);
 		del_gendisk(ns->disk);
 		nvme_ns_free(ns);
 	}
-
-	nvme_free_queues(dev);
-
-	return 0;
 }
 
 static int nvme_setup_prp_pools(struct nvme_dev *dev)
@@ -2018,7 +2016,8 @@ static void nvme_free_dev(struct kref *kref)
 {
 	struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
 	nvme_dev_remove(dev);
-	nvme_dev_unmap(dev);
+	nvme_dev_shutdown(dev);
+	nvme_free_queues(dev);
 	nvme_release_instance(dev);
 	nvme_release_prp_pools(dev);
 	kfree(dev->queues);
@@ -2061,6 +2060,37 @@ static const struct file_operations nvme_dev_fops = {
 	.compat_ioctl	= nvme_dev_ioctl,
 };
 
+static int nvme_dev_start(struct nvme_dev *dev)
+{
+	int result;
+
+	result = nvme_dev_map(dev);
+	if (result)
+		return result;
+
+	result = nvme_configure_admin_queue(dev);
+	if (result)
+		goto unmap;
+
+	spin_lock(&dev_list_lock);
+	list_add(&dev->node, &dev_list);
+	spin_unlock(&dev_list_lock);
+
+	result = nvme_setup_io_queues(dev);
+	if (result)
+		goto disable;
+
+	return 0;
+
+ disable:
+	spin_lock(&dev_list_lock);
+	list_del_init(&dev->node);
+	spin_unlock(&dev_list_lock);
+ unmap:
+	nvme_dev_unmap(dev);
+	return result;
+}
+
 static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	int result = -ENOMEM;
@@ -2088,21 +2118,13 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (result)
 		goto release;
 
-	result = nvme_dev_map(dev);
+	result = nvme_dev_start(dev);
 	if (result)
 		goto release_pools;
 
-	result = nvme_configure_admin_queue(dev);
-	if (result)
-		goto unmap;
-
-	spin_lock(&dev_list_lock);
-	list_add(&dev->node, &dev_list);
-	spin_unlock(&dev_list_lock);
-
 	result = nvme_dev_add(dev);
 	if (result)
-		goto delete;
+		goto shutdown;
 
 	scnprintf(dev->name, sizeof(dev->name), "nvme%d", dev->instance);
 	dev->miscdev.minor = MISC_DYNAMIC_MINOR;
@@ -2118,15 +2140,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
  remove:
 	nvme_dev_remove(dev);
- delete:
-	spin_lock(&dev_list_lock);
-	list_del(&dev->node);
-	spin_unlock(&dev_list_lock);
-
-	nvme_free_queues(dev);
- unmap:
-	nvme_dev_unmap(dev);
+ shutdown:
+	nvme_dev_shutdown(dev);
  release_pools:
+	nvme_free_queues(dev);
 	nvme_release_prp_pools(dev);
  release:
 	nvme_release_instance(dev);
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCHv5 4/6] NVMe: Use normal shutdown
  2013-07-15 21:02 [PATCHv5 0/6] NVMe: suspend resume Keith Busch
                   ` (2 preceding siblings ...)
  2013-07-15 21:02 ` [PATCHv5 3/6] NVMe: Separate controller init from disk discovery Keith Busch
@ 2013-07-15 21:02 ` Keith Busch
  2013-07-15 21:02 ` [PATCHv5 5/6] NVMe: Add pci suspend/resume driver callbacks Keith Busch
  2013-07-15 21:02 ` [PATCHv5 6/6] NVMe: Handle ioremap failure Keith Busch
  5 siblings, 0 replies; 7+ messages in thread
From: Keith Busch @ 2013-07-15 21:02 UTC (permalink / raw)


The NVMe spec recommends using the shutdown normal sequence when safely
taking the controller offline instead of hitting CC.EN on the next
start-up to reset the controller. The spec recommends a minimum of 1
second for the shutdown complete. This patch waits 2 seconds to be on
the safe side.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/block/nvme-core.c |   26 ++++++++++++++++++++++++++
 include/linux/nvme.h      |    2 ++
 2 files changed, 28 insertions(+), 0 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 78247e5..bf002a8 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1224,6 +1224,30 @@ static int nvme_enable_ctrl(struct nvme_dev *dev, u64 cap)
 	return nvme_wait_ready(dev, cap, true);
 }
 
+static int nvme_shutdown_ctrl(struct nvme_dev *dev)
+{
+	unsigned long timeout;
+	u32 cc;
+
+	cc = (readl(&dev->bar->cc) & ~NVME_CC_SHN_MASK) | NVME_CC_SHN_NORMAL;
+	writel(cc, &dev->bar->cc);
+
+	timeout = 2 * HZ + jiffies;
+	while ((readl(&dev->bar->csts) & NVME_CSTS_SHST_MASK) !=
+							NVME_CSTS_SHST_CMPLT) {
+		msleep(100);
+		if (fatal_signal_pending(current))
+			return -EINTR;
+		if (time_after(jiffies, timeout)) {
+			dev_err(&dev->pci_dev->dev,
+				"Device shutdown incomplete; abort shutdown\n");
+			return -ENODEV;
+		}
+	}
+
+	return 0;
+}
+
 static int nvme_configure_admin_queue(struct nvme_dev *dev)
 {
 	int result;
@@ -1945,6 +1969,8 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
 	list_del_init(&dev->node);
 	spin_unlock(&dev_list_lock);
 
+	if (dev->bar)
+		nvme_shutdown_ctrl(dev);
 	nvme_dev_unmap(dev);
 }
 
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 3403c8f..26ebcf4 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -53,6 +53,7 @@ enum {
 	NVME_CC_SHN_NONE	= 0 << 14,
 	NVME_CC_SHN_NORMAL	= 1 << 14,
 	NVME_CC_SHN_ABRUPT	= 2 << 14,
+	NVME_CC_SHN_MASK	= 3 << 14,
 	NVME_CC_IOSQES		= 6 << 16,
 	NVME_CC_IOCQES		= 4 << 20,
 	NVME_CSTS_RDY		= 1 << 0,
@@ -60,6 +61,7 @@ enum {
 	NVME_CSTS_SHST_NORMAL	= 0 << 2,
 	NVME_CSTS_SHST_OCCUR	= 1 << 2,
 	NVME_CSTS_SHST_CMPLT	= 2 << 2,
+	NVME_CSTS_SHST_MASK	= 3 << 2,
 };
 
 #define NVME_VS(major, minor)	(major << 16 | minor)
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCHv5 5/6] NVMe: Add pci suspend/resume driver callbacks
  2013-07-15 21:02 [PATCHv5 0/6] NVMe: suspend resume Keith Busch
                   ` (3 preceding siblings ...)
  2013-07-15 21:02 ` [PATCHv5 4/6] NVMe: Use normal shutdown Keith Busch
@ 2013-07-15 21:02 ` Keith Busch
  2013-07-15 21:02 ` [PATCHv5 6/6] NVMe: Handle ioremap failure Keith Busch
  5 siblings, 0 replies; 7+ messages in thread
From: Keith Busch @ 2013-07-15 21:02 UTC (permalink / raw)


Used for going in and out of low power states. Resuming reuses the IO
queues from the previous initialization, freeing any allocated queues
that are no longer usable.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/block/nvme-core.c |   73 +++++++++++++++++++++++++++++++++++---------
 1 files changed, 58 insertions(+), 15 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index bf002a8..9649844 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -792,6 +792,12 @@ static void nvme_make_request(struct request_queue *q, struct bio *bio)
 	struct nvme_queue *nvmeq = get_nvmeq(ns->dev);
 	int result = -EBUSY;
 
+	if (!nvmeq) {
+		put_nvmeq(NULL);
+		bio_endio(bio, -EIO);
+		return;
+	}
+
 	spin_lock_irq(&nvmeq->q_lock);
 	if (!nvmeq->q_suspended && bio_list_empty(&nvmeq->sq_cong))
 		result = nvme_submit_bio_queue(nvmeq, ns, bio);
@@ -1259,9 +1265,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	if (result < 0)
 		return result;
 
-	nvmeq = nvme_alloc_queue(dev, 0, 64, 0);
-	if (!nvmeq)
-		return -ENOMEM;
+	nvmeq = dev->queues[0];
+	if (!nvmeq) {
+		nvmeq = nvme_alloc_queue(dev, 0, 64, 0);
+		if (!nvmeq)
+			return -ENOMEM;
+		dev->queues[0] = nvmeq;
+	}
 
 	aqa = nvmeq->q_depth - 1;
 	aqa |= aqa << 16;
@@ -1278,21 +1288,16 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 
 	result = nvme_enable_ctrl(dev, cap);
 	if (result)
-		goto free_q;
+		return result;
 
 	result = queue_request_irq(dev, nvmeq, "nvme admin");
 	if (result)
-		goto free_q;
+		return result;
 
-	dev->queues[0] = nvmeq;
 	spin_lock(&nvmeq->q_lock);
 	nvme_init_queue(nvmeq, 0);
 	spin_unlock(&nvmeq->q_lock);
 	return result;
-
- free_q:
-	nvme_free_queue(nvmeq);
-	return result;
 }
 
 struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
@@ -1799,6 +1804,21 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	if (result)
 		goto free_queues;
 
+	/* Free previously allocated queues that are no longer usable */
+	spin_lock(&dev_list_lock);
+	for (i = dev->queue_count - 1; i > nr_io_queues; i--) {
+		struct nvme_queue *nvmeq = dev->queues[i];
+
+		spin_lock(&nvmeq->q_lock);
+		nvme_cancel_ios(nvmeq, false);
+		spin_unlock(&nvmeq->q_lock);
+
+		nvme_free_queue(nvmeq);
+		dev->queue_count--;
+		dev->queues[i] = NULL;
+	}
+	spin_unlock(&dev_list_lock);
+
 	cpu = cpumask_first(cpu_online_mask);
 	for (i = 0; i < nr_io_queues; i++) {
 		irq_set_affinity_hint(dev->entry[i].vector, get_cpu_mask(cpu));
@@ -1807,7 +1827,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 
 	q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1,
 								NVME_Q_DEPTH);
-	for (i = 0; i < nr_io_queues; i++) {
+	for (i = dev->queue_count - 1; i < nr_io_queues; i++) {
 		dev->queues[i + 1] = nvme_alloc_queue(dev, i + 1, q_depth, i);
 		if (!dev->queues[i + 1]) {
 			result = -ENOMEM;
@@ -2193,8 +2213,30 @@ static void nvme_remove(struct pci_dev *pdev)
 #define nvme_link_reset NULL
 #define nvme_slot_reset NULL
 #define nvme_error_resume NULL
-#define nvme_suspend NULL
-#define nvme_resume NULL
+
+static int nvme_suspend(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct nvme_dev *ndev = pci_get_drvdata(pdev);
+
+	nvme_dev_shutdown(ndev);
+	return 0;
+}
+
+static int nvme_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct nvme_dev *ndev = pci_get_drvdata(pdev);
+	int ret;
+
+	ret = nvme_dev_start(ndev);
+	/* XXX: should remove gendisks if resume fails */
+	if (ret)
+		nvme_free_queues(ndev);
+	return ret;
+}
+
+SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume);
 
 static const struct pci_error_handlers nvme_err_handler = {
 	.error_detected	= nvme_error_detected,
@@ -2218,8 +2260,9 @@ static struct pci_driver nvme_driver = {
 	.id_table	= nvme_id_table,
 	.probe		= nvme_probe,
 	.remove		= nvme_remove,
-	.suspend	= nvme_suspend,
-	.resume		= nvme_resume,
+	.driver		= {
+		.pm	= &nvme_dev_pm_ops,
+	},
 	.err_handler	= &nvme_err_handler,
 };
 
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCHv5 6/6] NVMe: Handle ioremap failure
  2013-07-15 21:02 [PATCHv5 0/6] NVMe: suspend resume Keith Busch
                   ` (4 preceding siblings ...)
  2013-07-15 21:02 ` [PATCHv5 5/6] NVMe: Add pci suspend/resume driver callbacks Keith Busch
@ 2013-07-15 21:02 ` Keith Busch
  5 siblings, 0 replies; 7+ messages in thread
From: Keith Busch @ 2013-07-15 21:02 UTC (permalink / raw)


Decrement the number of queues required for doorbell remapping until
the memory is successfully mapped for that size.

Additional checks are done so that we don't call free_irq if it has
already been freed.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/block/nvme-core.c |   30 ++++++++++++++++++++++--------
 1 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 9649844..608b0a7 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1741,10 +1741,15 @@ static int set_queue_count(struct nvme_dev *dev, int count)
 	return min(result & 0xffff, result >> 16) + 1;
 }
 
+static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
+{
+	return 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3));
+}
+
 static int nvme_setup_io_queues(struct nvme_dev *dev)
 {
 	struct pci_dev *pdev = dev->pci_dev;
-	int result, cpu, i, vecs, nr_io_queues, db_bar_size, q_depth;
+	int result, cpu, i, vecs, nr_io_queues, size, q_depth;
 
 	nr_io_queues = num_online_cpus();
 	result = set_queue_count(dev, nr_io_queues);
@@ -1753,17 +1758,24 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	if (result < nr_io_queues)
 		nr_io_queues = result;
 
-	/* Deregister the admin queue's interrupt */
-	free_irq(dev->entry[0].vector, dev->queues[0]);
-
-	db_bar_size = 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3));
-	if (db_bar_size > 8192) {
+	size = db_bar_size(dev, nr_io_queues);
+	if (size > 8192) {
 		iounmap(dev->bar);
-		dev->bar = ioremap(pci_resource_start(pdev, 0), db_bar_size);
+		do {
+			dev->bar = ioremap(pci_resource_start(pdev, 0), size);
+			if (dev->bar)
+				break;
+			if (!--nr_io_queues)
+				return -ENOMEM;
+			size = db_bar_size(dev, nr_io_queues);
+		} while (1);
 		dev->dbs = ((void __iomem *)dev->bar) + 4096;
 		dev->queues[0]->q_db = dev->dbs;
 	}
 
+	/* Deregister the admin queue's interrupt */
+	free_irq(dev->entry[0].vector, dev->queues[0]);
+
 	vecs = nr_io_queues;
 	for (i = 0; i < vecs; i++)
 		dev->entry[i].entry = i;
@@ -1801,8 +1813,10 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	nr_io_queues = vecs;
 
 	result = queue_request_irq(dev, dev->queues[0], "nvme admin");
-	if (result)
+	if (result) {
+		dev->queues[0]->q_suspended = 1;
 		goto free_queues;
+	}
 
 	/* Free previously allocated queues that are no longer usable */
 	spin_lock(&dev_list_lock);
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2013-07-15 21:02 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-15 21:02 [PATCHv5 0/6] NVMe: suspend resume Keith Busch
2013-07-15 21:02 ` [PATCHv5 1/6] NVMe: Group pci related actions in functions Keith Busch
2013-07-15 21:02 ` [PATCHv5 2/6] NVMe: Separate queue alloc/free from create/delete Keith Busch
2013-07-15 21:02 ` [PATCHv5 3/6] NVMe: Separate controller init from disk discovery Keith Busch
2013-07-15 21:02 ` [PATCHv5 4/6] NVMe: Use normal shutdown Keith Busch
2013-07-15 21:02 ` [PATCHv5 5/6] NVMe: Add pci suspend/resume driver callbacks Keith Busch
2013-07-15 21:02 ` [PATCHv5 6/6] NVMe: Handle ioremap failure Keith Busch

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.