All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 0/2] Re-work CMB and add WDS support.
@ 2018-07-19 23:06 Scott Bauer
  2018-07-19 23:06 ` [RCF PATCH 1/2] nvme: pci: Move CMB allocation into a pool Scott Bauer
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Scott Bauer @ 2018-07-19 23:06 UTC (permalink / raw)


The first patch adds CMB into a generic pool, from there we'll allocate
from the pool for the submission queues. In the future we can allocate SGLs
and PRPs from the pool as well.

The second patch unfortunately bounces (memcpys) the data from host to CMB.
I originally tried to map the CMB into userspace via a custom nvme mmap
handler, but I'm dumb and could never get gup for direct io working correctly.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RCF PATCH 1/2] nvme: pci: Move CMB allocation into a pool.
  2018-07-19 23:06 [RFC PATCH 0/2] Re-work CMB and add WDS support Scott Bauer
@ 2018-07-19 23:06 ` Scott Bauer
  2018-07-20 14:49   ` Christoph Hellwig
  2018-07-19 23:06 ` [RCF PATCH 2/2] nvme-pci: Bounce data from Host memory to CMB Memory Scott Bauer
  2018-07-20 14:46 ` [RFC PATCH 0/2] Re-work CMB and add WDS support Christoph Hellwig
  2 siblings, 1 reply; 10+ messages in thread
From: Scott Bauer @ 2018-07-19 23:06 UTC (permalink / raw)


This switches the CMB allocation from straight offset calculations
into allocating from a pool.

Signed-off-by: Scott Bauer <scott.bauer at intel.com>
---
 drivers/nvme/host/pci.c | 58 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 48 insertions(+), 10 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 8dcae11bbf3a..b8c81be4a985 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -30,6 +30,7 @@
 #include <linux/types.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/sed-opal.h>
+#include <linux/genalloc.h>
 
 #include "nvme.h"
 
@@ -100,7 +101,7 @@ struct nvme_dev {
 	struct mutex shutdown_lock;
 	bool subsystem;
 	void __iomem *cmb;
-	pci_bus_addr_t cmb_bus_addr;
+	struct gen_pool *cmb_pool;
 	u64 cmb_size;
 	u32 cmbsz;
 	u32 cmbloc;
@@ -1300,6 +1301,12 @@ static void nvme_free_queue(struct nvme_queue *nvmeq)
 	if (nvmeq->sq_cmds)
 		dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
 					nvmeq->sq_cmds, nvmeq->sq_dma_addr);
+	if (nvmeq->sq_cmds_io) {
+		gen_pool_free(nvmeq->dev->cmb_pool,(unsigned long)nvmeq->sq_cmds_io,
+			      roundup(SQ_SIZE(nvmeq->q_depth),
+				      nvmeq->dev->ctrl.page_size));
+		nvmeq->sq_cmds_io = NULL;
+	}
 }
 
 static void nvme_free_queues(struct nvme_dev *dev, int lowest)
@@ -1467,14 +1474,19 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
 static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
 {
 	struct nvme_dev *dev = nvmeq->dev;
-	int result;
+	int result = -ENOMEM;
 	s16 vector;
+	unsigned size;
 
+	size = roundup(SQ_SIZE(nvmeq->q_depth), dev->ctrl.page_size);
 	if (dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
-		unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth),
-						      dev->ctrl.page_size);
-		nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
-		nvmeq->sq_cmds_io = dev->cmb + offset;
+		nvmeq->sq_cmds_io = (void *) gen_pool_alloc(dev->cmb_pool, size);
+		if (!nvmeq->sq_cmds_io)
+			return result;
+
+		nvmeq->sq_dma_addr =
+			gen_pool_virt_to_phys(dev->cmb_pool,
+					      (unsigned long)nvmeq->sq_cmds_io);
 	}
 
 	/*
@@ -1710,8 +1722,7 @@ static void nvme_map_cmb(struct nvme_dev *dev)
 	u64 size, offset;
 	resource_size_t bar_size;
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
-	int bar;
-
+	int bar, ret;
 	dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
 	if (!dev->cmbsz)
 		return;
@@ -1733,24 +1744,51 @@ static void nvme_map_cmb(struct nvme_dev *dev)
 	 * for example, due to being behind a bridge. Reduce the CMB to
 	 * the reported size of the BAR
 	 */
+
 	if (size > bar_size - offset)
 		size = bar_size - offset;
 
+	dev->cmb_pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev));
+	if (!dev->cmb_pool)
+		return;
+
 	dev->cmb = ioremap_wc(pci_resource_start(pdev, bar) + offset, size);
 	if (!dev->cmb)
-		return;
-	dev->cmb_bus_addr = pci_bus_address(pdev, bar) + offset;
+		goto unwind_pool;
+
+	ret = gen_pool_add_virt(dev->cmb_pool, (unsigned long) dev->cmb,
+				pci_bus_address(pdev, bar) + offset,
+				size, dev_to_node(&pdev->dev));
+
+	if (ret) {
+		pr_err("%s: failed to add our virt to the gen pool\n", __func__);
+		goto unwind_pool_cmb;
+	}
+
 	dev->cmb_size = size;
 
 	if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
 				    &dev_attr_cmb.attr, NULL))
 		dev_warn(dev->ctrl.device,
 			 "failed to add sysfs attribute for CMB\n");
+
+	return;
+
+ unwind_pool_cmb:
+	iounmap(dev->cmb);
+	dev->cmb = NULL;
+ unwind_pool:
+	gen_pool_destroy(dev->cmb_pool);
+	dev->cmb_pool = NULL;
 }
 
 static inline void nvme_release_cmb(struct nvme_dev *dev)
 {
 	if (dev->cmb) {
+		if (use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS))
+			nvme_free_queues(dev, 1);
+		gen_pool_destroy(dev->cmb_pool);
+		dev->cmb_pool = NULL;
 		iounmap(dev->cmb);
 		dev->cmb = NULL;
 		sysfs_remove_file_from_group(&dev->ctrl.device->kobj,
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RCF PATCH 2/2] nvme-pci: Bounce data from Host memory to CMB Memory
  2018-07-19 23:06 [RFC PATCH 0/2] Re-work CMB and add WDS support Scott Bauer
  2018-07-19 23:06 ` [RCF PATCH 1/2] nvme: pci: Move CMB allocation into a pool Scott Bauer
@ 2018-07-19 23:06 ` Scott Bauer
  2018-07-20 14:23   ` Keith Busch
  2018-07-20 14:46 ` [RFC PATCH 0/2] Re-work CMB and add WDS support Christoph Hellwig
  2 siblings, 1 reply; 10+ messages in thread
From: Scott Bauer @ 2018-07-19 23:06 UTC (permalink / raw)


Signed-off-by: Scott Bauer <scott.bauer at intel.com>
---
 drivers/nvme/host/pci.c | 91 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 85 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index b8c81be4a985..2f1dd7e582ac 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -53,6 +53,10 @@ static bool use_cmb_sqes = true;
 module_param(use_cmb_sqes, bool, 0444);
 MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
 
+static bool use_cmb_wds = false;
+module_param(use_cmb_wds, bool, 0444);
+MODULE_PARM_DESC(use_cmb_wds, "use controller's memory buffer for I/O data");
+
 static unsigned int max_host_mem_size_mb = 128;
 module_param(max_host_mem_size_mb, uint, 0444);
 MODULE_PARM_DESC(max_host_mem_size_mb,
@@ -193,6 +197,7 @@ struct nvme_iod {
 	int npages;		/* In the PRP list. 0 means small pool in use */
 	int nents;		/* Used in scatterlist */
 	int length;		/* Of data, in bytes */
+	int cmb_data;           /* Data was copied into CMB for WDS */
 	dma_addr_t first_dma;
 	struct scatterlist meta_sg; /* metadata requires single contiguous buffer */
 	struct scatterlist *sg;
@@ -504,6 +509,30 @@ static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
 	return BLK_STS_OK;
 }
 
+static void nvme_unmap_sg_cmb(struct nvme_dev *dev, struct nvme_iod *iod,
+			      int num_mapped)
+{
+	struct scatterlist *s;
+	int j;
+
+	for_each_sg(iod->sg, s, iod->nents, j) {
+		if (j == num_mapped)
+			break;
+		gen_pool_free(dev->cmb_pool, s->page_link, s->length);
+		s->dma_address = 0;
+		s->page_link = 0;
+		sg_dma_len(s) = 0;
+	}
+	iod->cmb_data = 0;
+}
+
+static void nvme_clean_cmb_iod(struct nvme_dev *dev, struct nvme_iod *iod)
+{
+	if (iod->cmb_data)
+		nvme_unmap_sg_cmb(dev, iod, iod->nents);
+	return;
+}
+
 static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -534,8 +563,10 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
 		dma_addr = next_dma_addr;
 	}
 
-	if (iod->sg != iod->inline_sg)
+	if (iod->sg != iod->inline_sg) {
+		nvme_clean_cmb_iod(dev, iod);
 		mempool_free(iod->sg, dev->iod_mempool);
+	}
 }
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
@@ -641,6 +672,12 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
 		goto done;
 	}
 
+	if (iod->cmb_data)
+		dma_addr = gen_pool_virt_to_phys(dev->cmb_pool,
+						 sg_dma_address(sg));
+	else
+		dma_addr = sg_dma_address(sg);
+
 	dma_len -= (page_size - offset);
 	if (dma_len) {
 		dma_addr += (page_size - offset);
@@ -792,8 +829,37 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
 	return BLK_STS_OK;
 }
 
+static int nvme_copy_to_cmb(struct nvme_dev *dev, struct nvme_iod *iod)
+{
+	struct scatterlist *s;
+	void *data_cmb;
+	int i;
+
+	iod->cmb_data = 1;
+	for_each_sg(iod->sg, s, iod->nents, i) {
+		data_cmb = (void *) gen_pool_alloc(dev->cmb_pool, s->length);
+		if (!data_cmb) {
+			pr_err("%s: failed to alloc from pool\n", __func__);
+			goto unwind;
+		}
+
+		memcpy_toio(data_cmb, page_address(sg_page(s)), s->length);
+
+		s->dma_address = gen_pool_virt_to_phys(dev->cmb_pool,
+						       (unsigned long) data_cmb);
+		sg_dma_len(s) = s->length;
+		/* We do not need the sg_page page link anymore so we'll steal it. */
+		s->page_link = (unsigned long) data_cmb;
+	}
+	return i;
+
+ unwind:
+	nvme_unmap_sg_cmb(dev, iod, i);
+	return 0;
+}
+
 static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
-		struct nvme_command *cmnd)
+				  struct nvme_command *cmnd)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	struct request_queue *q = req->q;
@@ -808,8 +874,15 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 		goto out;
 
 	ret = BLK_STS_RESOURCE;
-	nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir,
-			DMA_ATTR_NO_WARN);
+
+	if (dma_dir == DMA_TO_DEVICE && use_cmb_wds
+	    && dev->cmb_pool && dev->cmbsz & NVME_CMBSZ_WDS &&
+	    iod->nvmeq->qid)
+		nr_mapped = nvme_copy_to_cmb(dev, iod);
+	else
+		nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents,
+					     dma_dir, DMA_ATTR_NO_WARN);
+
 	if (!nr_mapped)
 		goto out;
 
@@ -842,7 +915,10 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 	return BLK_STS_OK;
 
 out_unmap:
-	dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
+	if (iod->cmb_data)
+		nvme_unmap_sg_cmb(dev, iod, iod->nents);
+	else
+		dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
 out:
 	return ret;
 }
@@ -853,7 +929,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
 	enum dma_data_direction dma_dir = rq_data_dir(req) ?
 			DMA_TO_DEVICE : DMA_FROM_DEVICE;
 
-	if (iod->nents) {
+	if (iod->nents && !iod->cmb_data) {
 		dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
 		if (blk_integrity_rq(req)) {
 			if (req_op(req) == REQ_OP_READ)
@@ -1210,6 +1286,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 	 */
 	if (nvme_should_reset(dev, csts)) {
 		nvme_warn_reset(dev, csts);
+		nvme_clean_cmb_iod(dev, iod);
 		nvme_dev_disable(dev, false);
 		nvme_reset_ctrl(&dev->ctrl);
 		return BLK_EH_DONE;
@@ -1237,6 +1314,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 		dev_warn_ratelimited(dev->ctrl.device,
 			 "I/O %d QID %d timeout, disable controller\n",
 			 req->tag, nvmeq->qid);
+		nvme_clean_cmb_iod(dev, iod);
 		nvme_dev_disable(dev, false);
 		nvme_req(req)->flags |= NVME_REQ_CANCELLED;
 		return BLK_EH_DONE;
@@ -1253,6 +1331,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 		dev_warn(dev->ctrl.device,
 			 "I/O %d QID %d timeout, reset controller\n",
 			 req->tag, nvmeq->qid);
+		nvme_clean_cmb_iod(dev, iod);
 		nvme_dev_disable(dev, false);
 		nvme_reset_ctrl(&dev->ctrl);
 
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RCF PATCH 2/2] nvme-pci: Bounce data from Host memory to CMB Memory
  2018-07-19 23:06 ` [RCF PATCH 2/2] nvme-pci: Bounce data from Host memory to CMB Memory Scott Bauer
@ 2018-07-20 14:23   ` Keith Busch
  2018-07-20 14:49     ` Christoph Hellwig
  0 siblings, 1 reply; 10+ messages in thread
From: Keith Busch @ 2018-07-20 14:23 UTC (permalink / raw)


On Thu, Jul 19, 2018@05:06:28PM -0600, Scott Bauer wrote:
> Signed-off-by: Scott Bauer <scott.bauer at intel.com>

What does this gain us? I don't think this would do anything except
increase the CPU utilization. If this actually buys additional
performance, could you include some relative data and workload
characteristics in the changelog?

> +static int nvme_copy_to_cmb(struct nvme_dev *dev, struct nvme_iod *iod)
> +{
> +	struct scatterlist *s;
> +	void *data_cmb;
> +	int i;
> +
> +	iod->cmb_data = 1;
> +	for_each_sg(iod->sg, s, iod->nents, i) {
> +		data_cmb = (void *) gen_pool_alloc(dev->cmb_pool, s->length);
> +		if (!data_cmb) {
> +			pr_err("%s: failed to alloc from pool\n", __func__);
> +			goto unwind;
> +		}
> +
> +		memcpy_toio(data_cmb, page_address(sg_page(s)), s->length);
> +
> +		s->dma_address = gen_pool_virt_to_phys(dev->cmb_pool,
> +						       (unsigned long) data_cmb);
> +		sg_dma_len(s) = s->length;
> +		/* We do not need the sg_page page link anymore so we'll steal it. */
> +		s->page_link = (unsigned long) data_cmb;
> +	}
> +	return i;
> +
> + unwind:
> +	nvme_unmap_sg_cmb(dev, iod, i);
> +	return 0;
> +}
> +
>  static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
> -		struct nvme_command *cmnd)
> +				  struct nvme_command *cmnd)
>  {
>  	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
>  	struct request_queue *q = req->q;
> @@ -808,8 +874,15 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
>  		goto out;
>  
>  	ret = BLK_STS_RESOURCE;
> -	nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir,
> -			DMA_ATTR_NO_WARN);
> +
> +	if (dma_dir == DMA_TO_DEVICE && use_cmb_wds
> +	    && dev->cmb_pool && dev->cmbsz & NVME_CMBSZ_WDS &&
> +	    iod->nvmeq->qid)
> +		nr_mapped = nvme_copy_to_cmb(dev, iod);
> +	else
> +		nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents,
> +					     dma_dir, DMA_ATTR_NO_WARN);
> +
>  	if (!nr_mapped)
>  		goto out;

A failure in to allocate cmb resource should probably fallback to the
non-cmb resource.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RFC PATCH 0/2] Re-work CMB and add WDS support.
  2018-07-19 23:06 [RFC PATCH 0/2] Re-work CMB and add WDS support Scott Bauer
  2018-07-19 23:06 ` [RCF PATCH 1/2] nvme: pci: Move CMB allocation into a pool Scott Bauer
  2018-07-19 23:06 ` [RCF PATCH 2/2] nvme-pci: Bounce data from Host memory to CMB Memory Scott Bauer
@ 2018-07-20 14:46 ` Christoph Hellwig
  2018-07-20 14:50   ` Scott Bauer
  2 siblings, 1 reply; 10+ messages in thread
From: Christoph Hellwig @ 2018-07-20 14:46 UTC (permalink / raw)


On Thu, Jul 19, 2018@05:06:26PM -0600, Scott Bauer wrote:
> The second patch unfortunately bounces (memcpys) the data from host to CMB.
> I originally tried to map the CMB into userspace via a custom nvme mmap
> handler, but I'm dumb and could never get gup for direct io working correctly.

Why?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RCF PATCH 1/2] nvme: pci: Move CMB allocation into a pool.
  2018-07-19 23:06 ` [RCF PATCH 1/2] nvme: pci: Move CMB allocation into a pool Scott Bauer
@ 2018-07-20 14:49   ` Christoph Hellwig
  0 siblings, 0 replies; 10+ messages in thread
From: Christoph Hellwig @ 2018-07-20 14:49 UTC (permalink / raw)


On Thu, Jul 19, 2018@05:06:27PM -0600, Scott Bauer wrote:
> This switches the CMB allocation from straight offset calculations
> into allocating from a pool.

Please don't change this code now.  Logan has been working for a while
to get PCIe P2P support upstream which will move this to a generic
pool:

https://github.com/sbates130272/linux-p2pmem/commits/pci-p2p-v4

> 
> Signed-off-by: Scott Bauer <scott.bauer at intel.com>
> ---
>  drivers/nvme/host/pci.c | 58 ++++++++++++++++++++++++++++++++++-------
>  1 file changed, 48 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index 8dcae11bbf3a..b8c81be4a985 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -30,6 +30,7 @@
>  #include <linux/types.h>
>  #include <linux/io-64-nonatomic-lo-hi.h>
>  #include <linux/sed-opal.h>
> +#include <linux/genalloc.h>
>  
>  #include "nvme.h"
>  
> @@ -100,7 +101,7 @@ struct nvme_dev {
>  	struct mutex shutdown_lock;
>  	bool subsystem;
>  	void __iomem *cmb;
> -	pci_bus_addr_t cmb_bus_addr;
> +	struct gen_pool *cmb_pool;
>  	u64 cmb_size;
>  	u32 cmbsz;
>  	u32 cmbloc;
> @@ -1300,6 +1301,12 @@ static void nvme_free_queue(struct nvme_queue *nvmeq)
>  	if (nvmeq->sq_cmds)
>  		dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
>  					nvmeq->sq_cmds, nvmeq->sq_dma_addr);
> +	if (nvmeq->sq_cmds_io) {
> +		gen_pool_free(nvmeq->dev->cmb_pool,(unsigned long)nvmeq->sq_cmds_io,
> +			      roundup(SQ_SIZE(nvmeq->q_depth),
> +				      nvmeq->dev->ctrl.page_size));
> +		nvmeq->sq_cmds_io = NULL;
> +	}
>  }
>  
>  static void nvme_free_queues(struct nvme_dev *dev, int lowest)
> @@ -1467,14 +1474,19 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
>  static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
>  {
>  	struct nvme_dev *dev = nvmeq->dev;
> -	int result;
> +	int result = -ENOMEM;
>  	s16 vector;
> +	unsigned size;
>  
> +	size = roundup(SQ_SIZE(nvmeq->q_depth), dev->ctrl.page_size);
>  	if (dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
> -		unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth),
> -						      dev->ctrl.page_size);
> -		nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
> -		nvmeq->sq_cmds_io = dev->cmb + offset;
> +		nvmeq->sq_cmds_io = (void *) gen_pool_alloc(dev->cmb_pool, size);
> +		if (!nvmeq->sq_cmds_io)
> +			return result;
> +
> +		nvmeq->sq_dma_addr =
> +			gen_pool_virt_to_phys(dev->cmb_pool,
> +					      (unsigned long)nvmeq->sq_cmds_io);
>  	}
>  
>  	/*
> @@ -1710,8 +1722,7 @@ static void nvme_map_cmb(struct nvme_dev *dev)
>  	u64 size, offset;
>  	resource_size_t bar_size;
>  	struct pci_dev *pdev = to_pci_dev(dev->dev);
> -	int bar;
> -
> +	int bar, ret;
>  	dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
>  	if (!dev->cmbsz)
>  		return;
> @@ -1733,24 +1744,51 @@ static void nvme_map_cmb(struct nvme_dev *dev)
>  	 * for example, due to being behind a bridge. Reduce the CMB to
>  	 * the reported size of the BAR
>  	 */
> +
>  	if (size > bar_size - offset)
>  		size = bar_size - offset;
>  
> +	dev->cmb_pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev));
> +	if (!dev->cmb_pool)
> +		return;
> +
>  	dev->cmb = ioremap_wc(pci_resource_start(pdev, bar) + offset, size);
>  	if (!dev->cmb)
> -		return;
> -	dev->cmb_bus_addr = pci_bus_address(pdev, bar) + offset;
> +		goto unwind_pool;
> +
> +	ret = gen_pool_add_virt(dev->cmb_pool, (unsigned long) dev->cmb,
> +				pci_bus_address(pdev, bar) + offset,
> +				size, dev_to_node(&pdev->dev));
> +
> +	if (ret) {
> +		pr_err("%s: failed to add our virt to the gen pool\n", __func__);
> +		goto unwind_pool_cmb;
> +	}
> +
>  	dev->cmb_size = size;
>  
>  	if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
>  				    &dev_attr_cmb.attr, NULL))
>  		dev_warn(dev->ctrl.device,
>  			 "failed to add sysfs attribute for CMB\n");
> +
> +	return;
> +
> + unwind_pool_cmb:
> +	iounmap(dev->cmb);
> +	dev->cmb = NULL;
> + unwind_pool:
> +	gen_pool_destroy(dev->cmb_pool);
> +	dev->cmb_pool = NULL;
>  }
>  
>  static inline void nvme_release_cmb(struct nvme_dev *dev)
>  {
>  	if (dev->cmb) {
> +		if (use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS))
> +			nvme_free_queues(dev, 1);
> +		gen_pool_destroy(dev->cmb_pool);
> +		dev->cmb_pool = NULL;
>  		iounmap(dev->cmb);
>  		dev->cmb = NULL;
>  		sysfs_remove_file_from_group(&dev->ctrl.device->kobj,
> -- 
> 2.17.1
> 
> 
> _______________________________________________
> Linux-nvme mailing list
> Linux-nvme at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-nvme
---end quoted text---

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RCF PATCH 2/2] nvme-pci: Bounce data from Host memory to CMB Memory
  2018-07-20 14:23   ` Keith Busch
@ 2018-07-20 14:49     ` Christoph Hellwig
  2018-07-20 14:53       ` Scott Bauer
  0 siblings, 1 reply; 10+ messages in thread
From: Christoph Hellwig @ 2018-07-20 14:49 UTC (permalink / raw)


On Fri, Jul 20, 2018@08:23:43AM -0600, Keith Busch wrote:
> On Thu, Jul 19, 2018@05:06:28PM -0600, Scott Bauer wrote:
> > Signed-off-by: Scott Bauer <scott.bauer at intel.com>
> 
> What does this gain us? I don't think this would do anything except
> increase the CPU utilization. If this actually buys additional
> performance, could you include some relative data and workload
> characteristics in the changelog?

Yes, as-is this seems rather counter productive..

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RFC PATCH 0/2] Re-work CMB and add WDS support.
  2018-07-20 14:46 ` [RFC PATCH 0/2] Re-work CMB and add WDS support Christoph Hellwig
@ 2018-07-20 14:50   ` Scott Bauer
  2018-07-20 16:01     ` Christoph Hellwig
  0 siblings, 1 reply; 10+ messages in thread
From: Scott Bauer @ 2018-07-20 14:50 UTC (permalink / raw)


On Fri, Jul 20, 2018@07:46:28AM -0700, Christoph Hellwig wrote:
> On Thu, Jul 19, 2018@05:06:26PM -0600, Scott Bauer wrote:
> > The second patch unfortunately bounces (memcpys) the data from host to CMB.
> > I originally tried to map the CMB into userspace via a custom nvme mmap
> > handler, but I'm dumb and could never get gup for direct io working correctly.
> 
> Why?

What part are you asking why about? 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RCF PATCH 2/2] nvme-pci: Bounce data from Host memory to CMB Memory
  2018-07-20 14:49     ` Christoph Hellwig
@ 2018-07-20 14:53       ` Scott Bauer
  0 siblings, 0 replies; 10+ messages in thread
From: Scott Bauer @ 2018-07-20 14:53 UTC (permalink / raw)


On Fri, Jul 20, 2018@07:49:49AM -0700, Christoph Hellwig wrote:
> On Fri, Jul 20, 2018@08:23:43AM -0600, Keith Busch wrote:
> > On Thu, Jul 19, 2018@05:06:28PM -0600, Scott Bauer wrote:
> > > Signed-off-by: Scott Bauer <scott.bauer at intel.com>
> > 
> > What does this gain us? I don't think this would do anything except
> > increase the CPU utilization. If this actually buys additional
> > performance, could you include some relative data and workload
> > characteristics in the changelog?
> 
> Yes, as-is this seems rather counter productive..

Unfortunately I still don't have a drive to do perf testing on. The
only thing I can do is quickly hack this into QEMU, but I suspect that's
far too different to get us any relevant numbers.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RFC PATCH 0/2] Re-work CMB and add WDS support.
  2018-07-20 14:50   ` Scott Bauer
@ 2018-07-20 16:01     ` Christoph Hellwig
  0 siblings, 0 replies; 10+ messages in thread
From: Christoph Hellwig @ 2018-07-20 16:01 UTC (permalink / raw)


On Fri, Jul 20, 2018@08:50:10AM -0600, Scott Bauer wrote:
> On Fri, Jul 20, 2018@07:46:28AM -0700, Christoph Hellwig wrote:
> > On Thu, Jul 19, 2018@05:06:26PM -0600, Scott Bauer wrote:
> > > The second patch unfortunately bounces (memcpys) the data from host to CMB.
> > > I originally tried to map the CMB into userspace via a custom nvme mmap
> > > handler, but I'm dumb and could never get gup for direct io working correctly.
> > 
> > Why?
> 
> What part are you asking why about? 

All of the above.  What is the motivation for this series?

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2018-07-20 16:01 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-19 23:06 [RFC PATCH 0/2] Re-work CMB and add WDS support Scott Bauer
2018-07-19 23:06 ` [RCF PATCH 1/2] nvme: pci: Move CMB allocation into a pool Scott Bauer
2018-07-20 14:49   ` Christoph Hellwig
2018-07-19 23:06 ` [RCF PATCH 2/2] nvme-pci: Bounce data from Host memory to CMB Memory Scott Bauer
2018-07-20 14:23   ` Keith Busch
2018-07-20 14:49     ` Christoph Hellwig
2018-07-20 14:53       ` Scott Bauer
2018-07-20 14:46 ` [RFC PATCH 0/2] Re-work CMB and add WDS support Christoph Hellwig
2018-07-20 14:50   ` Scott Bauer
2018-07-20 16:01     ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.