* [PATCH 0/2] NVMe/RDMA CQ pool patches for 5.9
@ 2020-07-13 8:53 Max Gurtovoy
2020-07-13 8:53 ` [PATCH 1/2] nvme-rdma: use new shared CQ mechanism Max Gurtovoy
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Max Gurtovoy @ 2020-07-13 8:53 UTC (permalink / raw)
To: sagi, hch, kbusch, yaminf, linux-nvme
Cc: Max Gurtovoy, israelr, jgg, oren, idanb
This series include 2 patches from Yamin that were removed from the
merge window of 5.8 since it caused conflicts between RDMA and Block
trees. It uses a shared CQ API that was merged to RDMA core layer to
improve performance and reduce resource allocation.
Yamin Friedman (2):
nvme-rdma: use new shared CQ mechanism
nvmet-rdma: use new shared CQ mechanism
drivers/nvme/host/rdma.c | 77 ++++++++++++++++++++++++++++++----------------
drivers/nvme/target/rdma.c | 14 ++++-----
2 files changed, 58 insertions(+), 33 deletions(-)
--
1.8.3.1
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/2] nvme-rdma: use new shared CQ mechanism
2020-07-13 8:53 [PATCH 0/2] NVMe/RDMA CQ pool patches for 5.9 Max Gurtovoy
@ 2020-07-13 8:53 ` Max Gurtovoy
2020-07-13 8:53 ` [PATCH 2/2] nvmet-rdma: " Max Gurtovoy
2020-07-14 11:18 ` [PATCH 0/2] NVMe/RDMA CQ pool patches for 5.9 Christoph Hellwig
2 siblings, 0 replies; 4+ messages in thread
From: Max Gurtovoy @ 2020-07-13 8:53 UTC (permalink / raw)
To: sagi, hch, kbusch, yaminf, linux-nvme
Cc: israelr, idanb, jgg, oren, Max Gurtovoy, Or Gerlitz
From: Yamin Friedman <yaminf@mellanox.com>
Has the driver use shared CQs providing ~10%-20% improvement as seen in
the patch introducing shared CQs. Instead of opening a CQ for each QP
per controller connected, a CQ for each QP will be provided by the RDMA
core driver that will be shared between the QPs on that core reducing
interrupt overhead.
Signed-off-by: Yamin Friedman <yaminf@mellanox.com>
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
drivers/nvme/host/rdma.c | 77 ++++++++++++++++++++++++++++++++----------------
1 file changed, 51 insertions(+), 26 deletions(-)
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index e881f87..467da08 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -96,6 +96,7 @@ struct nvme_rdma_queue {
int cm_error;
struct completion cm_done;
bool pi_support;
+ int cq_size;
};
struct nvme_rdma_ctrl {
@@ -275,6 +276,7 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
init_attr.recv_cq = queue->ib_cq;
if (queue->pi_support)
init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
+ init_attr.qp_context = queue;
ret = rdma_create_qp(queue->cm_id, dev->pd, &init_attr);
@@ -409,6 +411,14 @@ static int nvme_rdma_dev_get(struct nvme_rdma_device *dev)
return NULL;
}
+static void nvme_rdma_free_cq(struct nvme_rdma_queue *queue)
+{
+ if (nvme_rdma_poll_queue(queue))
+ ib_free_cq(queue->ib_cq);
+ else
+ ib_cq_pool_put(queue->ib_cq, queue->cq_size);
+}
+
static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
{
struct nvme_rdma_device *dev;
@@ -430,7 +440,7 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
* the destruction of the QP shouldn't use rdma_cm API.
*/
ib_destroy_qp(queue->qp);
- ib_free_cq(queue->ib_cq);
+ nvme_rdma_free_cq(queue);
nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
sizeof(struct nvme_completion), DMA_FROM_DEVICE);
@@ -450,13 +460,42 @@ static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev, bool pi_support)
return min_t(u32, NVME_RDMA_MAX_SEGMENTS, max_page_list_len - 1);
}
+static int nvme_rdma_create_cq(struct ib_device *ibdev,
+ struct nvme_rdma_queue *queue)
+{
+ int ret, comp_vector, idx = nvme_rdma_queue_idx(queue);
+ enum ib_poll_context poll_ctx;
+
+ /*
+ * Spread I/O queues completion vectors according their queue index.
+ * Admin queues can always go on completion vector 0.
+ */
+ comp_vector = (idx == 0 ? idx : idx - 1) % ibdev->num_comp_vectors;
+
+ /* Polling queues need direct cq polling context */
+ if (nvme_rdma_poll_queue(queue)) {
+ poll_ctx = IB_POLL_DIRECT;
+ queue->ib_cq = ib_alloc_cq(ibdev, queue, queue->cq_size,
+ comp_vector, poll_ctx);
+ } else {
+ poll_ctx = IB_POLL_SOFTIRQ;
+ queue->ib_cq = ib_cq_pool_get(ibdev, queue->cq_size,
+ comp_vector, poll_ctx);
+ }
+
+ if (IS_ERR(queue->ib_cq)) {
+ ret = PTR_ERR(queue->ib_cq);
+ return ret;
+ }
+
+ return 0;
+}
+
static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
{
struct ib_device *ibdev;
const int send_wr_factor = 3; /* MR, SEND, INV */
const int cq_factor = send_wr_factor + 1; /* + RECV */
- int comp_vector, idx = nvme_rdma_queue_idx(queue);
- enum ib_poll_context poll_ctx;
int ret, pages_per_mr;
queue->device = nvme_rdma_find_get_device(queue->cm_id);
@@ -467,26 +506,12 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
}
ibdev = queue->device->dev;
- /*
- * Spread I/O queues completion vectors according their queue index.
- * Admin queues can always go on completion vector 0.
- */
- comp_vector = (idx == 0 ? idx : idx - 1) % ibdev->num_comp_vectors;
-
- /* Polling queues need direct cq polling context */
- if (nvme_rdma_poll_queue(queue))
- poll_ctx = IB_POLL_DIRECT;
- else
- poll_ctx = IB_POLL_SOFTIRQ;
-
/* +1 for ib_stop_cq */
- queue->ib_cq = ib_alloc_cq(ibdev, queue,
- cq_factor * queue->queue_size + 1,
- comp_vector, poll_ctx);
- if (IS_ERR(queue->ib_cq)) {
- ret = PTR_ERR(queue->ib_cq);
+ queue->cq_size = cq_factor * queue->queue_size + 1;
+
+ ret = nvme_rdma_create_cq(ibdev, queue);
+ if (ret)
goto out_put_dev;
- }
ret = nvme_rdma_create_qp(queue, send_wr_factor);
if (ret)
@@ -512,7 +537,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
if (ret) {
dev_err(queue->ctrl->ctrl.device,
"failed to initialize MR pool sized %d for QID %d\n",
- queue->queue_size, idx);
+ queue->queue_size, nvme_rdma_queue_idx(queue));
goto out_destroy_ring;
}
@@ -523,7 +548,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
if (ret) {
dev_err(queue->ctrl->ctrl.device,
"failed to initialize PI MR pool sized %d for QID %d\n",
- queue->queue_size, idx);
+ queue->queue_size, nvme_rdma_queue_idx(queue));
goto out_destroy_mr_pool;
}
}
@@ -540,7 +565,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
out_destroy_qp:
rdma_destroy_qp(queue->cm_id);
out_destroy_ib_cq:
- ib_free_cq(queue->ib_cq);
+ nvme_rdma_free_cq(queue);
out_put_dev:
nvme_rdma_dev_put(queue->device);
return ret;
@@ -1163,7 +1188,7 @@ static void nvme_rdma_end_request(struct nvme_rdma_request *req)
static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
const char *op)
{
- struct nvme_rdma_queue *queue = cq->cq_context;
+ struct nvme_rdma_queue *queue = wc->qp->qp_context;
struct nvme_rdma_ctrl *ctrl = queue->ctrl;
if (ctrl->ctrl.state == NVME_CTRL_LIVE)
@@ -1706,7 +1731,7 @@ static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct nvme_rdma_qe *qe =
container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
- struct nvme_rdma_queue *queue = cq->cq_context;
+ struct nvme_rdma_queue *queue = wc->qp->qp_context;
struct ib_device *ibdev = queue->device->dev;
struct nvme_completion *cqe = qe->data;
const size_t len = sizeof(struct nvme_completion);
--
1.8.3.1
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] nvmet-rdma: use new shared CQ mechanism
2020-07-13 8:53 [PATCH 0/2] NVMe/RDMA CQ pool patches for 5.9 Max Gurtovoy
2020-07-13 8:53 ` [PATCH 1/2] nvme-rdma: use new shared CQ mechanism Max Gurtovoy
@ 2020-07-13 8:53 ` Max Gurtovoy
2020-07-14 11:18 ` [PATCH 0/2] NVMe/RDMA CQ pool patches for 5.9 Christoph Hellwig
2 siblings, 0 replies; 4+ messages in thread
From: Max Gurtovoy @ 2020-07-13 8:53 UTC (permalink / raw)
To: sagi, hch, kbusch, yaminf, linux-nvme
Cc: israelr, idanb, jgg, oren, Max Gurtovoy, Or Gerlitz
From: Yamin Friedman <yaminf@mellanox.com>
Has the driver use shared CQs providing ~10%-20% improvement when
multiple disks are used. Instead of opening a CQ for each QP per
controller, a CQ for each core will be provided by the RDMA core driver
that will be shared between the QPs on that core reducing interrupt
overhead.
Signed-off-by: Yamin Friedman <yaminf@mellanox.com>
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
drivers/nvme/target/rdma.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 6731e03..3ccb592 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -752,7 +752,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct nvmet_rdma_rsp *rsp =
container_of(wc->wr_cqe, struct nvmet_rdma_rsp, read_cqe);
- struct nvmet_rdma_queue *queue = cq->cq_context;
+ struct nvmet_rdma_queue *queue = wc->qp->qp_context;
u16 status = 0;
WARN_ON(rsp->n_rdma <= 0);
@@ -1008,7 +1008,7 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct nvmet_rdma_cmd *cmd =
container_of(wc->wr_cqe, struct nvmet_rdma_cmd, cqe);
- struct nvmet_rdma_queue *queue = cq->cq_context;
+ struct nvmet_rdma_queue *queue = wc->qp->qp_context;
struct nvmet_rdma_rsp *rsp;
if (unlikely(wc->status != IB_WC_SUCCESS)) {
@@ -1258,9 +1258,8 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
*/
nr_cqe = queue->recv_queue_size + 2 * queue->send_queue_size;
- queue->cq = ib_alloc_cq(ndev->device, queue,
- nr_cqe + 1, queue->comp_vector,
- IB_POLL_WORKQUEUE);
+ queue->cq = ib_cq_pool_get(ndev->device, nr_cqe + 1,
+ queue->comp_vector, IB_POLL_WORKQUEUE);
if (IS_ERR(queue->cq)) {
ret = PTR_ERR(queue->cq);
pr_err("failed to create CQ cqe= %d ret= %d\n",
@@ -1322,7 +1321,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
err_destroy_qp:
rdma_destroy_qp(queue->cm_id);
err_destroy_cq:
- ib_free_cq(queue->cq);
+ ib_cq_pool_put(queue->cq, nr_cqe + 1);
goto out;
}
@@ -1332,7 +1331,8 @@ static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue)
if (queue->cm_id)
rdma_destroy_id(queue->cm_id);
ib_destroy_qp(queue->qp);
- ib_free_cq(queue->cq);
+ ib_cq_pool_put(queue->cq, queue->recv_queue_size + 2 *
+ queue->send_queue_size + 1);
}
static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue)
--
1.8.3.1
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 0/2] NVMe/RDMA CQ pool patches for 5.9
2020-07-13 8:53 [PATCH 0/2] NVMe/RDMA CQ pool patches for 5.9 Max Gurtovoy
2020-07-13 8:53 ` [PATCH 1/2] nvme-rdma: use new shared CQ mechanism Max Gurtovoy
2020-07-13 8:53 ` [PATCH 2/2] nvmet-rdma: " Max Gurtovoy
@ 2020-07-14 11:18 ` Christoph Hellwig
2 siblings, 0 replies; 4+ messages in thread
From: Christoph Hellwig @ 2020-07-14 11:18 UTC (permalink / raw)
To: Max Gurtovoy
Cc: yaminf, sagi, israelr, linux-nvme, hch, idanb, jgg, oren, kbusch
On Mon, Jul 13, 2020 at 11:53:28AM +0300, Max Gurtovoy wrote:
> This series include 2 patches from Yamin that were removed from the
> merge window of 5.8 since it caused conflicts between RDMA and Block
> trees. It uses a shared CQ API that was merged to RDMA core layer to
> improve performance and reduce resource allocation.
Thanks,
applied to nvme-5.9.
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2020-07-14 11:19 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-13 8:53 [PATCH 0/2] NVMe/RDMA CQ pool patches for 5.9 Max Gurtovoy
2020-07-13 8:53 ` [PATCH 1/2] nvme-rdma: use new shared CQ mechanism Max Gurtovoy
2020-07-13 8:53 ` [PATCH 2/2] nvmet-rdma: " Max Gurtovoy
2020-07-14 11:18 ` [PATCH 0/2] NVMe/RDMA CQ pool patches for 5.9 Christoph Hellwig
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).