All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH] nvme-pci: Move the sg table allocation/free into init/exit_request
@ 2020-06-28 10:34 ` Baolin Wang
  0 siblings, 0 replies; 8+ messages in thread
From: Baolin Wang @ 2020-06-28 10:34 UTC (permalink / raw)
  To: kbusch, axboe, hch, sagi
  Cc: baolin.wang, baolin.wang7, linux-nvme, linux-kernel

Move the sg table allocation and free into the init_request() and
exit_request(), instead of allocating sg table when queuing requests,
which can benefit the IO performance.

Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
 drivers/nvme/host/pci.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index b1d18f0..cf7c997 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -410,9 +410,25 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
 	iod->nvmeq = nvmeq;
 
 	nvme_req(req)->ctrl = &dev->ctrl;
+
+	iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
+	if (!iod->sg)
+		return -ENOMEM;
+
+	sg_init_table(iod->sg, NVME_MAX_SEGS);
 	return 0;
 }
 
+static void nvme_exit_request(struct blk_mq_tag_set *set, struct request *req,
+			      unsigned int hctx_idx)
+{
+	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+	struct nvme_dev *dev = set->driver_data;
+
+	mempool_free(iod->sg, dev->iod_mempool);
+	iod->sg = NULL;
+}
+
 static int queue_irq_offset(struct nvme_dev *dev)
 {
 	/* if we have more than 1 vec, admin queue offsets us by 1 */
@@ -557,8 +573,6 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
 		dma_pool_free(dev->prp_page_pool, addr, dma_addr);
 		dma_addr = next_dma_addr;
 	}
-
-	mempool_free(iod->sg, dev->iod_mempool);
 }
 
 static void nvme_print_sgl(struct scatterlist *sgl, int nents)
@@ -808,10 +822,6 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 	}
 
 	iod->dma_len = 0;
-	iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
-	if (!iod->sg)
-		return BLK_STS_RESOURCE;
-	sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
 	iod->nents = blk_rq_map_sg(req->q, req, iod->sg);
 	if (!iod->nents)
 		goto out;
@@ -1557,6 +1567,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
 	.complete	= nvme_pci_complete_rq,
 	.init_hctx	= nvme_admin_init_hctx,
 	.init_request	= nvme_init_request,
+	.exit_request	= nvme_exit_request,
 	.timeout	= nvme_timeout,
 };
 
@@ -1566,6 +1577,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
 	.commit_rqs	= nvme_commit_rqs,
 	.init_hctx	= nvme_init_hctx,
 	.init_request	= nvme_init_request,
+	.exit_request	= nvme_exit_request,
 	.map_queues	= nvme_pci_map_queues,
 	.timeout	= nvme_timeout,
 	.poll		= nvme_poll,
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC PATCH] nvme-pci: Move the sg table allocation/free into init/exit_request
@ 2020-06-28 10:34 ` Baolin Wang
  0 siblings, 0 replies; 8+ messages in thread
From: Baolin Wang @ 2020-06-28 10:34 UTC (permalink / raw)
  To: kbusch, axboe, hch, sagi
  Cc: baolin.wang7, linux-nvme, baolin.wang, linux-kernel

Move the sg table allocation and free into the init_request() and
exit_request(), instead of allocating sg table when queuing requests,
which can benefit the IO performance.

Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
 drivers/nvme/host/pci.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index b1d18f0..cf7c997 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -410,9 +410,25 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
 	iod->nvmeq = nvmeq;
 
 	nvme_req(req)->ctrl = &dev->ctrl;
+
+	iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
+	if (!iod->sg)
+		return -ENOMEM;
+
+	sg_init_table(iod->sg, NVME_MAX_SEGS);
 	return 0;
 }
 
+static void nvme_exit_request(struct blk_mq_tag_set *set, struct request *req,
+			      unsigned int hctx_idx)
+{
+	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+	struct nvme_dev *dev = set->driver_data;
+
+	mempool_free(iod->sg, dev->iod_mempool);
+	iod->sg = NULL;
+}
+
 static int queue_irq_offset(struct nvme_dev *dev)
 {
 	/* if we have more than 1 vec, admin queue offsets us by 1 */
@@ -557,8 +573,6 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
 		dma_pool_free(dev->prp_page_pool, addr, dma_addr);
 		dma_addr = next_dma_addr;
 	}
-
-	mempool_free(iod->sg, dev->iod_mempool);
 }
 
 static void nvme_print_sgl(struct scatterlist *sgl, int nents)
@@ -808,10 +822,6 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 	}
 
 	iod->dma_len = 0;
-	iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
-	if (!iod->sg)
-		return BLK_STS_RESOURCE;
-	sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
 	iod->nents = blk_rq_map_sg(req->q, req, iod->sg);
 	if (!iod->nents)
 		goto out;
@@ -1557,6 +1567,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
 	.complete	= nvme_pci_complete_rq,
 	.init_hctx	= nvme_admin_init_hctx,
 	.init_request	= nvme_init_request,
+	.exit_request	= nvme_exit_request,
 	.timeout	= nvme_timeout,
 };
 
@@ -1566,6 +1577,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
 	.commit_rqs	= nvme_commit_rqs,
 	.init_hctx	= nvme_init_hctx,
 	.init_request	= nvme_init_request,
+	.exit_request	= nvme_exit_request,
 	.map_queues	= nvme_pci_map_queues,
 	.timeout	= nvme_timeout,
 	.poll		= nvme_poll,
-- 
1.8.3.1


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH] nvme-pci: Move the sg table allocation/free into init/exit_request
  2020-06-28 10:34 ` Baolin Wang
@ 2020-06-28 21:55   ` Chaitanya Kulkarni
  -1 siblings, 0 replies; 8+ messages in thread
From: Chaitanya Kulkarni @ 2020-06-28 21:55 UTC (permalink / raw)
  To: Baolin Wang, kbusch, axboe, hch, sagi
  Cc: baolin.wang7, linux-nvme, linux-kernel

On 6/28/20 3:44 AM, Baolin Wang wrote:
> Move the sg table allocation and free into the init_request() and
> exit_request(), instead of allocating sg table when queuing requests,
> which can benefit the IO performance.
> 
> Signed-off-by: Baolin Wang<baolin.wang@linux.alibaba.com>

The call to sg_init_table() uses blk_rq_nr_phys_segments in 
nvme_map_data() with this patch we are blindly allocating SG table with
NVME_MAX_SEGS, without any performance numbers it is hard to measure the 
impact.

Can you share performance numbers ?

I'm particularly interested in for IOPS/BW/CPU/USAGE/Submission latency 
and completion latency and perf numbers for the respective function in 
to determine the overall impact.



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH] nvme-pci: Move the sg table allocation/free into init/exit_request
@ 2020-06-28 21:55   ` Chaitanya Kulkarni
  0 siblings, 0 replies; 8+ messages in thread
From: Chaitanya Kulkarni @ 2020-06-28 21:55 UTC (permalink / raw)
  To: Baolin Wang, kbusch, axboe, hch, sagi
  Cc: baolin.wang7, linux-kernel, linux-nvme

On 6/28/20 3:44 AM, Baolin Wang wrote:
> Move the sg table allocation and free into the init_request() and
> exit_request(), instead of allocating sg table when queuing requests,
> which can benefit the IO performance.
> 
> Signed-off-by: Baolin Wang<baolin.wang@linux.alibaba.com>

The call to sg_init_table() uses blk_rq_nr_phys_segments in 
nvme_map_data() with this patch we are blindly allocating SG table with
NVME_MAX_SEGS, without any performance numbers it is hard to measure the 
impact.

Can you share performance numbers ?

I'm particularly interested in for IOPS/BW/CPU/USAGE/Submission latency 
and completion latency and perf numbers for the respective function in 
to determine the overall impact.



_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH] nvme-pci: Move the sg table allocation/free into init/exit_request
  2020-06-28 10:34 ` Baolin Wang
@ 2020-06-29  1:31   ` Keith Busch
  -1 siblings, 0 replies; 8+ messages in thread
From: Keith Busch @ 2020-06-29  1:31 UTC (permalink / raw)
  To: Baolin Wang; +Cc: axboe, hch, sagi, baolin.wang7, linux-nvme, linux-kernel

On Sun, Jun 28, 2020 at 06:34:46PM +0800, Baolin Wang wrote:
> Move the sg table allocation and free into the init_request() and
> exit_request(), instead of allocating sg table when queuing requests,
> which can benefit the IO performance.

If you want to pre-allocate something per-request, you can add the size
to the tagset's cmd_size.

But this is adding almost 4k per request. Considering how many requests
we try to allocate, that's a bit too large to count on being available
or sequestor for this driver.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH] nvme-pci: Move the sg table allocation/free into init/exit_request
@ 2020-06-29  1:31   ` Keith Busch
  0 siblings, 0 replies; 8+ messages in thread
From: Keith Busch @ 2020-06-29  1:31 UTC (permalink / raw)
  To: Baolin Wang; +Cc: sagi, linux-kernel, linux-nvme, axboe, baolin.wang7, hch

On Sun, Jun 28, 2020 at 06:34:46PM +0800, Baolin Wang wrote:
> Move the sg table allocation and free into the init_request() and
> exit_request(), instead of allocating sg table when queuing requests,
> which can benefit the IO performance.

If you want to pre-allocate something per-request, you can add the size
to the tagset's cmd_size.

But this is adding almost 4k per request. Considering how many requests
we try to allocate, that's a bit too large to count on being available
or sequestor for this driver.

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH] nvme-pci: Move the sg table allocation/free into init/exit_request
  2020-06-29  1:31   ` Keith Busch
  (?)
@ 2020-06-29 12:29   ` Baolin Wang
  -1 siblings, 0 replies; 8+ messages in thread
From: Baolin Wang @ 2020-06-29 12:29 UTC (permalink / raw)
  To: Keith Busch; +Cc: sagi, linux-kernel, linux-nvme, axboe, baolin.wang7, hch

On Sun, Jun 28, 2020 at 06:31:43PM -0700, Keith Busch wrote:
> On Sun, Jun 28, 2020 at 06:34:46PM +0800, Baolin Wang wrote:
> > Move the sg table allocation and free into the init_request() and
> > exit_request(), instead of allocating sg table when queuing requests,
> > which can benefit the IO performance.
> 
> If you want to pre-allocate something per-request, you can add the size
> to the tagset's cmd_size.
> 
> But this is adding almost 4k per request. Considering how many requests
> we try to allocate, that's a bit too large to count on being available
> or sequestor for this driver.

I saw other block drivers (mmc, scsi) had allocated the sg table in
init_request() instead of allocating them when queuing requests to benefit
performance, but I am not sure why the nvme did not do like this. OK,
now I think I know the reason, thanks.


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC PATCH] nvme-pci: Move the sg table allocation/free into init/exit_request
  2020-06-28 21:55   ` Chaitanya Kulkarni
  (?)
@ 2020-06-29 12:49   ` Baolin Wang
  -1 siblings, 0 replies; 8+ messages in thread
From: Baolin Wang @ 2020-06-29 12:49 UTC (permalink / raw)
  To: Chaitanya Kulkarni
  Cc: sagi, linux-kernel, linux-nvme, axboe, baolin.wang7, kbusch, hch

On Sun, Jun 28, 2020 at 09:55:12PM +0000, Chaitanya Kulkarni wrote:
> On 6/28/20 3:44 AM, Baolin Wang wrote:
> > Move the sg table allocation and free into the init_request() and
> > exit_request(), instead of allocating sg table when queuing requests,
> > which can benefit the IO performance.
> > 
> > Signed-off-by: Baolin Wang<baolin.wang@linux.alibaba.com>
> 
> The call to sg_init_table() uses blk_rq_nr_phys_segments in 
> nvme_map_data() with this patch we are blindly allocating SG table with
> NVME_MAX_SEGS, without any performance numbers it is hard to measure the 
> impact.

Not true, the original code also will allocate sg table with NVME_MAX_SEGS
if the request contains multiple segments, I just move the sg table allocation
to pre-allocate for each request, instead of allocating sg table when queuing
requests. Obviously it will save some memory allocation time when the request
contains seveval segments.

> 
> Can you share performance numbers ?
> 
> I'm particularly interested in for IOPS/BW/CPU/USAGE/Submission latency 
> and completion latency and perf numbers for the respective function in 
> to determine the overall impact.

From my previous tests, I did not see obvious improvements, I think my
test case always create one segment for each request though I set bs >
4K. I can try to create special case to test multiple segments.
But as Keith commented, he did not like this patch, so...

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-06-29 12:50 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-28 10:34 [RFC PATCH] nvme-pci: Move the sg table allocation/free into init/exit_request Baolin Wang
2020-06-28 10:34 ` Baolin Wang
2020-06-28 21:55 ` Chaitanya Kulkarni
2020-06-28 21:55   ` Chaitanya Kulkarni
2020-06-29 12:49   ` Baolin Wang
2020-06-29  1:31 ` Keith Busch
2020-06-29  1:31   ` Keith Busch
2020-06-29 12:29   ` Baolin Wang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.