* [RFC PATCH] nvme-pci: Move the sg table allocation/free into init/exit_request
@ 2020-06-28 10:34 ` Baolin Wang
0 siblings, 0 replies; 8+ messages in thread
From: Baolin Wang @ 2020-06-28 10:34 UTC (permalink / raw)
To: kbusch, axboe, hch, sagi
Cc: baolin.wang, baolin.wang7, linux-nvme, linux-kernel
Move the sg table allocation and free into the init_request() and
exit_request(), instead of allocating sg table when queuing requests,
which can benefit the IO performance.
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
drivers/nvme/host/pci.c | 24 ++++++++++++++++++------
1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index b1d18f0..cf7c997 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -410,9 +410,25 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
iod->nvmeq = nvmeq;
nvme_req(req)->ctrl = &dev->ctrl;
+
+ iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
+ if (!iod->sg)
+ return -ENOMEM;
+
+ sg_init_table(iod->sg, NVME_MAX_SEGS);
return 0;
}
+static void nvme_exit_request(struct blk_mq_tag_set *set, struct request *req,
+ unsigned int hctx_idx)
+{
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ struct nvme_dev *dev = set->driver_data;
+
+ mempool_free(iod->sg, dev->iod_mempool);
+ iod->sg = NULL;
+}
+
static int queue_irq_offset(struct nvme_dev *dev)
{
/* if we have more than 1 vec, admin queue offsets us by 1 */
@@ -557,8 +573,6 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
dma_pool_free(dev->prp_page_pool, addr, dma_addr);
dma_addr = next_dma_addr;
}
-
- mempool_free(iod->sg, dev->iod_mempool);
}
static void nvme_print_sgl(struct scatterlist *sgl, int nents)
@@ -808,10 +822,6 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
}
iod->dma_len = 0;
- iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
- if (!iod->sg)
- return BLK_STS_RESOURCE;
- sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
iod->nents = blk_rq_map_sg(req->q, req, iod->sg);
if (!iod->nents)
goto out;
@@ -1557,6 +1567,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
.complete = nvme_pci_complete_rq,
.init_hctx = nvme_admin_init_hctx,
.init_request = nvme_init_request,
+ .exit_request = nvme_exit_request,
.timeout = nvme_timeout,
};
@@ -1566,6 +1577,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
.commit_rqs = nvme_commit_rqs,
.init_hctx = nvme_init_hctx,
.init_request = nvme_init_request,
+ .exit_request = nvme_exit_request,
.map_queues = nvme_pci_map_queues,
.timeout = nvme_timeout,
.poll = nvme_poll,
--
1.8.3.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [RFC PATCH] nvme-pci: Move the sg table allocation/free into init/exit_request
@ 2020-06-28 10:34 ` Baolin Wang
0 siblings, 0 replies; 8+ messages in thread
From: Baolin Wang @ 2020-06-28 10:34 UTC (permalink / raw)
To: kbusch, axboe, hch, sagi
Cc: baolin.wang7, linux-nvme, baolin.wang, linux-kernel
Move the sg table allocation and free into the init_request() and
exit_request(), instead of allocating sg table when queuing requests,
which can benefit the IO performance.
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
drivers/nvme/host/pci.c | 24 ++++++++++++++++++------
1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index b1d18f0..cf7c997 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -410,9 +410,25 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
iod->nvmeq = nvmeq;
nvme_req(req)->ctrl = &dev->ctrl;
+
+ iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
+ if (!iod->sg)
+ return -ENOMEM;
+
+ sg_init_table(iod->sg, NVME_MAX_SEGS);
return 0;
}
+static void nvme_exit_request(struct blk_mq_tag_set *set, struct request *req,
+ unsigned int hctx_idx)
+{
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ struct nvme_dev *dev = set->driver_data;
+
+ mempool_free(iod->sg, dev->iod_mempool);
+ iod->sg = NULL;
+}
+
static int queue_irq_offset(struct nvme_dev *dev)
{
/* if we have more than 1 vec, admin queue offsets us by 1 */
@@ -557,8 +573,6 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
dma_pool_free(dev->prp_page_pool, addr, dma_addr);
dma_addr = next_dma_addr;
}
-
- mempool_free(iod->sg, dev->iod_mempool);
}
static void nvme_print_sgl(struct scatterlist *sgl, int nents)
@@ -808,10 +822,6 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
}
iod->dma_len = 0;
- iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
- if (!iod->sg)
- return BLK_STS_RESOURCE;
- sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
iod->nents = blk_rq_map_sg(req->q, req, iod->sg);
if (!iod->nents)
goto out;
@@ -1557,6 +1567,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
.complete = nvme_pci_complete_rq,
.init_hctx = nvme_admin_init_hctx,
.init_request = nvme_init_request,
+ .exit_request = nvme_exit_request,
.timeout = nvme_timeout,
};
@@ -1566,6 +1577,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
.commit_rqs = nvme_commit_rqs,
.init_hctx = nvme_init_hctx,
.init_request = nvme_init_request,
+ .exit_request = nvme_exit_request,
.map_queues = nvme_pci_map_queues,
.timeout = nvme_timeout,
.poll = nvme_poll,
--
1.8.3.1
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [RFC PATCH] nvme-pci: Move the sg table allocation/free into init/exit_request
2020-06-28 10:34 ` Baolin Wang
@ 2020-06-28 21:55 ` Chaitanya Kulkarni
-1 siblings, 0 replies; 8+ messages in thread
From: Chaitanya Kulkarni @ 2020-06-28 21:55 UTC (permalink / raw)
To: Baolin Wang, kbusch, axboe, hch, sagi
Cc: baolin.wang7, linux-nvme, linux-kernel
On 6/28/20 3:44 AM, Baolin Wang wrote:
> Move the sg table allocation and free into the init_request() and
> exit_request(), instead of allocating sg table when queuing requests,
> which can benefit the IO performance.
>
> Signed-off-by: Baolin Wang<baolin.wang@linux.alibaba.com>
The call to sg_init_table() uses blk_rq_nr_phys_segments in
nvme_map_data() with this patch we are blindly allocating SG table with
NVME_MAX_SEGS, without any performance numbers it is hard to measure the
impact.
Can you share performance numbers ?
I'm particularly interested in for IOPS/BW/CPU/USAGE/Submission latency
and completion latency and perf numbers for the respective function in
to determine the overall impact.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC PATCH] nvme-pci: Move the sg table allocation/free into init/exit_request
@ 2020-06-28 21:55 ` Chaitanya Kulkarni
0 siblings, 0 replies; 8+ messages in thread
From: Chaitanya Kulkarni @ 2020-06-28 21:55 UTC (permalink / raw)
To: Baolin Wang, kbusch, axboe, hch, sagi
Cc: baolin.wang7, linux-kernel, linux-nvme
On 6/28/20 3:44 AM, Baolin Wang wrote:
> Move the sg table allocation and free into the init_request() and
> exit_request(), instead of allocating sg table when queuing requests,
> which can benefit the IO performance.
>
> Signed-off-by: Baolin Wang<baolin.wang@linux.alibaba.com>
The call to sg_init_table() uses blk_rq_nr_phys_segments in
nvme_map_data() with this patch we are blindly allocating SG table with
NVME_MAX_SEGS, without any performance numbers it is hard to measure the
impact.
Can you share performance numbers ?
I'm particularly interested in for IOPS/BW/CPU/USAGE/Submission latency
and completion latency and perf numbers for the respective function in
to determine the overall impact.
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC PATCH] nvme-pci: Move the sg table allocation/free into init/exit_request
2020-06-28 10:34 ` Baolin Wang
@ 2020-06-29 1:31 ` Keith Busch
-1 siblings, 0 replies; 8+ messages in thread
From: Keith Busch @ 2020-06-29 1:31 UTC (permalink / raw)
To: Baolin Wang; +Cc: axboe, hch, sagi, baolin.wang7, linux-nvme, linux-kernel
On Sun, Jun 28, 2020 at 06:34:46PM +0800, Baolin Wang wrote:
> Move the sg table allocation and free into the init_request() and
> exit_request(), instead of allocating sg table when queuing requests,
> which can benefit the IO performance.
If you want to pre-allocate something per-request, you can add the size
to the tagset's cmd_size.
But this is adding almost 4k per request. Considering how many requests
we try to allocate, that's a bit too large to count on being available
or sequestor for this driver.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC PATCH] nvme-pci: Move the sg table allocation/free into init/exit_request
@ 2020-06-29 1:31 ` Keith Busch
0 siblings, 0 replies; 8+ messages in thread
From: Keith Busch @ 2020-06-29 1:31 UTC (permalink / raw)
To: Baolin Wang; +Cc: sagi, linux-kernel, linux-nvme, axboe, baolin.wang7, hch
On Sun, Jun 28, 2020 at 06:34:46PM +0800, Baolin Wang wrote:
> Move the sg table allocation and free into the init_request() and
> exit_request(), instead of allocating sg table when queuing requests,
> which can benefit the IO performance.
If you want to pre-allocate something per-request, you can add the size
to the tagset's cmd_size.
But this is adding almost 4k per request. Considering how many requests
we try to allocate, that's a bit too large to count on being available
or sequestor for this driver.
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC PATCH] nvme-pci: Move the sg table allocation/free into init/exit_request
2020-06-29 1:31 ` Keith Busch
(?)
@ 2020-06-29 12:29 ` Baolin Wang
-1 siblings, 0 replies; 8+ messages in thread
From: Baolin Wang @ 2020-06-29 12:29 UTC (permalink / raw)
To: Keith Busch; +Cc: sagi, linux-kernel, linux-nvme, axboe, baolin.wang7, hch
On Sun, Jun 28, 2020 at 06:31:43PM -0700, Keith Busch wrote:
> On Sun, Jun 28, 2020 at 06:34:46PM +0800, Baolin Wang wrote:
> > Move the sg table allocation and free into the init_request() and
> > exit_request(), instead of allocating sg table when queuing requests,
> > which can benefit the IO performance.
>
> If you want to pre-allocate something per-request, you can add the size
> to the tagset's cmd_size.
>
> But this is adding almost 4k per request. Considering how many requests
> we try to allocate, that's a bit too large to count on being available
> or sequestor for this driver.
I saw other block drivers (mmc, scsi) had allocated the sg table in
init_request() instead of allocating them when queuing requests to benefit
performance, but I am not sure why the nvme did not do like this. OK,
now I think I know the reason, thanks.
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC PATCH] nvme-pci: Move the sg table allocation/free into init/exit_request
2020-06-28 21:55 ` Chaitanya Kulkarni
(?)
@ 2020-06-29 12:49 ` Baolin Wang
-1 siblings, 0 replies; 8+ messages in thread
From: Baolin Wang @ 2020-06-29 12:49 UTC (permalink / raw)
To: Chaitanya Kulkarni
Cc: sagi, linux-kernel, linux-nvme, axboe, baolin.wang7, kbusch, hch
On Sun, Jun 28, 2020 at 09:55:12PM +0000, Chaitanya Kulkarni wrote:
> On 6/28/20 3:44 AM, Baolin Wang wrote:
> > Move the sg table allocation and free into the init_request() and
> > exit_request(), instead of allocating sg table when queuing requests,
> > which can benefit the IO performance.
> >
> > Signed-off-by: Baolin Wang<baolin.wang@linux.alibaba.com>
>
> The call to sg_init_table() uses blk_rq_nr_phys_segments in
> nvme_map_data() with this patch we are blindly allocating SG table with
> NVME_MAX_SEGS, without any performance numbers it is hard to measure the
> impact.
Not true, the original code also will allocate sg table with NVME_MAX_SEGS
if the request contains multiple segments, I just move the sg table allocation
to pre-allocate for each request, instead of allocating sg table when queuing
requests. Obviously it will save some memory allocation time when the request
contains seveval segments.
>
> Can you share performance numbers ?
>
> I'm particularly interested in for IOPS/BW/CPU/USAGE/Submission latency
> and completion latency and perf numbers for the respective function in
> to determine the overall impact.
From my previous tests, I did not see obvious improvements, I think my
test case always create one segment for each request though I set bs >
4K. I can try to create special case to test multiple segments.
But as Keith commented, he did not like this patch, so...
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2020-06-29 12:50 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-28 10:34 [RFC PATCH] nvme-pci: Move the sg table allocation/free into init/exit_request Baolin Wang
2020-06-28 10:34 ` Baolin Wang
2020-06-28 21:55 ` Chaitanya Kulkarni
2020-06-28 21:55 ` Chaitanya Kulkarni
2020-06-29 12:49 ` Baolin Wang
2020-06-29 1:31 ` Keith Busch
2020-06-29 1:31 ` Keith Busch
2020-06-29 12:29 ` Baolin Wang
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.