From: Yamin Friedman <yaminf@mellanox.com>
To: Jason Gunthorpe <jgg@mellanox.com>,
Sagi Grimberg <sagi@grimberg.me>,
Or Gerlitz <ogerlitz@mellanox.com>,
Leon Romanovsky <leonro@mellanox.com>
Cc: linux-rdma@vger.kernel.org, Yamin Friedman <yaminf@mellanox.com>
Subject: [PATCH V3 3/4] nvme-rdma: use new shared CQ mechanism
Date: Tue, 19 May 2020 15:43:35 +0300 [thread overview]
Message-ID: <1589892216-39283-4-git-send-email-yaminf@mellanox.com> (raw)
In-Reply-To: <1589892216-39283-1-git-send-email-yaminf@mellanox.com>
Has the driver use shared CQs providing ~10%-20% improvement as seen in the
patch introducing shared CQs. Instead of opening a CQ for each QP per
controller connected, a CQ for each QP will be provided by the RDMA core
driver that will be shared between the QPs on that core reducing interrupt
overhead.
Signed-off-by: Yamin Friedman <yaminf@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
---
drivers/nvme/host/rdma.c | 75 ++++++++++++++++++++++++++++++++----------------
1 file changed, 50 insertions(+), 25 deletions(-)
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index cac8a93..83d5f29 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -85,6 +85,7 @@ struct nvme_rdma_queue {
struct rdma_cm_id *cm_id;
int cm_error;
struct completion cm_done;
+ int cq_size;
};
struct nvme_rdma_ctrl {
@@ -261,6 +262,7 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
init_attr.qp_type = IB_QPT_RC;
init_attr.send_cq = queue->ib_cq;
init_attr.recv_cq = queue->ib_cq;
+ init_attr.qp_context = queue;
ret = rdma_create_qp(queue->cm_id, dev->pd, &init_attr);
@@ -389,6 +391,14 @@ static int nvme_rdma_dev_get(struct nvme_rdma_device *dev)
return NULL;
}
+static void nvme_rdma_free_cq(struct nvme_rdma_queue *queue)
+{
+ if (nvme_rdma_poll_queue(queue))
+ ib_free_cq(queue->ib_cq);
+ else
+ ib_cq_pool_put(queue->ib_cq, queue->cq_size);
+}
+
static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
{
struct nvme_rdma_device *dev;
@@ -408,7 +418,7 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
* the destruction of the QP shouldn't use rdma_cm API.
*/
ib_destroy_qp(queue->qp);
- ib_free_cq(queue->ib_cq);
+ nvme_rdma_free_cq(queue);
nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
sizeof(struct nvme_completion), DMA_FROM_DEVICE);
@@ -422,13 +432,42 @@ static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev)
ibdev->attrs.max_fast_reg_page_list_len - 1);
}
+static int nvme_rdma_create_cq(struct ib_device *ibdev,
+ struct nvme_rdma_queue *queue)
+{
+ int ret, comp_vector, idx = nvme_rdma_queue_idx(queue);
+ enum ib_poll_context poll_ctx;
+
+ /*
+ * Spread I/O queues completion vectors according their queue index.
+ * Admin queues can always go on completion vector 0.
+ */
+ comp_vector = idx == 0 ? idx : idx - 1;
+
+ /* Polling queues need direct cq polling context */
+ if (nvme_rdma_poll_queue(queue)) {
+ poll_ctx = IB_POLL_DIRECT;
+ queue->ib_cq = ib_alloc_cq(ibdev, queue, queue->cq_size,
+ comp_vector, poll_ctx);
+ } else {
+ poll_ctx = IB_POLL_SOFTIRQ;
+ queue->ib_cq = ib_cq_pool_get(ibdev, queue->cq_size,
+ comp_vector, poll_ctx);
+ }
+
+ if (IS_ERR(queue->ib_cq)) {
+ ret = PTR_ERR(queue->ib_cq);
+ return ret;
+ }
+
+ return 0;
+}
+
static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
{
struct ib_device *ibdev;
const int send_wr_factor = 3; /* MR, SEND, INV */
const int cq_factor = send_wr_factor + 1; /* + RECV */
- int comp_vector, idx = nvme_rdma_queue_idx(queue);
- enum ib_poll_context poll_ctx;
int ret, pages_per_mr;
queue->device = nvme_rdma_find_get_device(queue->cm_id);
@@ -439,26 +478,12 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
}
ibdev = queue->device->dev;
- /*
- * Spread I/O queues completion vectors according their queue index.
- * Admin queues can always go on completion vector 0.
- */
- comp_vector = idx == 0 ? idx : idx - 1;
-
- /* Polling queues need direct cq polling context */
- if (nvme_rdma_poll_queue(queue))
- poll_ctx = IB_POLL_DIRECT;
- else
- poll_ctx = IB_POLL_SOFTIRQ;
-
/* +1 for ib_stop_cq */
- queue->ib_cq = ib_alloc_cq(ibdev, queue,
- cq_factor * queue->queue_size + 1,
- comp_vector, poll_ctx);
- if (IS_ERR(queue->ib_cq)) {
- ret = PTR_ERR(queue->ib_cq);
+ queue->cq_size = cq_factor * queue->queue_size + 1;
+
+ ret = nvme_rdma_create_cq(ibdev, queue);
+ if (ret)
goto out_put_dev;
- }
ret = nvme_rdma_create_qp(queue, send_wr_factor);
if (ret)
@@ -484,7 +509,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
if (ret) {
dev_err(queue->ctrl->ctrl.device,
"failed to initialize MR pool sized %d for QID %d\n",
- queue->queue_size, idx);
+ queue->queue_size, nvme_rdma_queue_idx(queue));
goto out_destroy_ring;
}
@@ -498,7 +523,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
out_destroy_qp:
rdma_destroy_qp(queue->cm_id);
out_destroy_ib_cq:
- ib_free_cq(queue->ib_cq);
+ nvme_rdma_free_cq(queue);
out_put_dev:
nvme_rdma_dev_put(queue->device);
return ret;
@@ -1093,7 +1118,7 @@ static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
const char *op)
{
- struct nvme_rdma_queue *queue = cq->cq_context;
+ struct nvme_rdma_queue *queue = wc->qp->qp_context;
struct nvme_rdma_ctrl *ctrl = queue->ctrl;
if (ctrl->ctrl.state == NVME_CTRL_LIVE)
@@ -1481,7 +1506,7 @@ static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct nvme_rdma_qe *qe =
container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
- struct nvme_rdma_queue *queue = cq->cq_context;
+ struct nvme_rdma_queue *queue = wc->qp->qp_context;
struct ib_device *ibdev = queue->device->dev;
struct nvme_completion *cqe = qe->data;
const size_t len = sizeof(struct nvme_completion);
--
1.8.3.1
next prev parent reply other threads:[~2020-05-19 12:43 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-05-19 12:43 [PATCH V3 0/4] Introducing RDMA shared CQ pool Yamin Friedman
2020-05-19 12:43 ` [PATCH V3 1/4] RDMA/core: Add protection for shared CQs used by ULPs Yamin Friedman
2020-05-19 12:43 ` [PATCH V3 2/4] RDMA/core: Introduce shared CQ pool API Yamin Friedman
2020-05-20 6:19 ` Devesh Sharma
2020-05-20 9:23 ` Yamin Friedman
2020-05-20 9:32 ` Leon Romanovsky
2020-05-20 10:50 ` Devesh Sharma
2020-05-20 12:01 ` Yamin Friedman
2020-05-20 13:48 ` Devesh Sharma
2020-05-25 13:06 ` Yamin Friedman
2020-05-26 7:09 ` Yamin Friedman
2020-05-25 15:14 ` Bart Van Assche
2020-05-25 16:45 ` Jason Gunthorpe
2020-05-26 11:43 ` Yamin Friedman
2020-05-25 16:42 ` Jason Gunthorpe
2020-05-25 16:47 ` Leon Romanovsky
2020-05-26 11:39 ` Yamin Friedman
2020-05-26 12:09 ` Jason Gunthorpe
2020-05-19 12:43 ` Yamin Friedman [this message]
2020-05-19 12:43 ` [PATCH V3 4/4] nvmet-rdma: use new shared CQ mechanism Yamin Friedman
2020-05-20 7:03 ` [PATCH V3 0/4] Introducing RDMA shared CQ pool Sagi Grimberg
2020-05-20 8:15 ` Yamin Friedman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1589892216-39283-4-git-send-email-yaminf@mellanox.com \
--to=yaminf@mellanox.com \
--cc=jgg@mellanox.com \
--cc=leonro@mellanox.com \
--cc=linux-rdma@vger.kernel.org \
--cc=ogerlitz@mellanox.com \
--cc=sagi@grimberg.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).