All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sagi Grimberg <sagi-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org>
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org
Cc: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>,
	Jason Gunthorpe
	<jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>,
	idanb-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org
Subject: [PATCH 2/3] nvme-rdma: don't complete requests before a send work request has completed
Date: Tue, 31 Oct 2017 10:55:21 +0200	[thread overview]
Message-ID: <1509440122-1190-3-git-send-email-sagi@grimberg.me> (raw)
In-Reply-To: <1509440122-1190-1-git-send-email-sagi-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org>

In order to guarantee that the HCA will never get an access violation
(either from invalidated rkey or from iommu) when retrying a send
operation we must complete a request only when both send completion
and the nvme cqe has arrived.

Only then we are safe to invalidate the rkey (if needed), unmap
the host buffers, and complete the IO.

Signed-off-by: Sagi Grimberg <sagi-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org>
---
 drivers/nvme/host/rdma.c | 35 +++++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index ccbae327fe72..ae1fb66358f7 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -67,6 +67,9 @@ struct nvme_rdma_request {
 	struct nvme_request	req;
 	struct ib_mr		*mr;
 	struct nvme_rdma_qe	sqe;
+	struct nvme_completion	cqe;
+	bool			send_completed;
+	bool			resp_completed;
 	struct ib_sge		sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS];
 	u32			num_sge;
 	int			nents;
@@ -961,6 +964,8 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
 	req->num_sge = 1;
 	req->inline_data = false;
 	req->mr->need_inval = false;
+	req->send_completed = false;
+	req->resp_completed = false;
 
 	c->common.flags |= NVME_CMD_SGL_METABUF;
 
@@ -997,13 +1002,25 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
 
 static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
 {
-	if (unlikely(wc->status != IB_WC_SUCCESS))
+	struct nvme_rdma_qe *qe =
+		container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
+	struct nvme_rdma_request *req =
+		container_of(qe, struct nvme_rdma_request, sqe);
+	struct request *rq = blk_mq_rq_from_pdu(req);
+
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
 		nvme_rdma_wr_error(cq, wc, "SEND");
+		return;
+	}
+
+	req->send_completed = true;
+	if (req->resp_completed)
+		nvme_end_request(rq, req->cqe.status, req->cqe.result);
 }
 
 static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
 		struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
-		struct ib_send_wr *first)
+		struct ib_send_wr *first, bool signal)
 {
 	struct ib_send_wr wr, *bad_wr;
 	int ret;
@@ -1019,7 +1036,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
 	wr.sg_list    = sge;
 	wr.num_sge    = num_sge;
 	wr.opcode     = IB_WR_SEND;
-	wr.send_flags = IB_SEND_SIGNALED;
+	wr.send_flags = signal ? IB_SEND_SIGNALED : 0;
 
 	if (first)
 		first->next = &wr;
@@ -1093,7 +1110,7 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
 	ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(*cmd),
 			DMA_TO_DEVICE);
 
-	ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL);
+	ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL, false);
 	WARN_ON_ONCE(ret);
 }
 
@@ -1117,11 +1134,17 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
 	if (rq->tag == tag)
 		ret = 1;
 
+	req->cqe.status = cqe->status;
+	req->cqe.result = cqe->result;
+	req->resp_completed = true;
+
 	if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) &&
 	    wc->ex.invalidate_rkey == req->mr->rkey)
 		req->mr->need_inval = false;
 
-	nvme_end_request(rq, cqe->status, cqe->result);
+	if (req->send_completed)
+		nvme_end_request(rq, req->cqe.status, req->cqe.result);
+
 	return ret;
 }
 
@@ -1410,7 +1433,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 			sizeof(struct nvme_command), DMA_TO_DEVICE);
 
 	err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
-			req->mr->need_inval ? &req->reg_wr.wr : NULL);
+			req->mr->need_inval ? &req->reg_wr.wr : NULL, true);
 	if (unlikely(err)) {
 		nvme_rdma_unmap_data(queue, rq);
 		goto err;
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

WARNING: multiple messages have this Message-ID (diff)
From: sagi@grimberg.me (Sagi Grimberg)
Subject: [PATCH 2/3] nvme-rdma: don't complete requests before a send work request has completed
Date: Tue, 31 Oct 2017 10:55:21 +0200	[thread overview]
Message-ID: <1509440122-1190-3-git-send-email-sagi@grimberg.me> (raw)
In-Reply-To: <1509440122-1190-1-git-send-email-sagi@grimberg.me>

In order to guarantee that the HCA will never get an access violation
(either from invalidated rkey or from iommu) when retrying a send
operation we must complete a request only when both send completion
and the nvme cqe has arrived.

Only then we are safe to invalidate the rkey (if needed), unmap
the host buffers, and complete the IO.

Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
---
 drivers/nvme/host/rdma.c | 35 +++++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index ccbae327fe72..ae1fb66358f7 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -67,6 +67,9 @@ struct nvme_rdma_request {
 	struct nvme_request	req;
 	struct ib_mr		*mr;
 	struct nvme_rdma_qe	sqe;
+	struct nvme_completion	cqe;
+	bool			send_completed;
+	bool			resp_completed;
 	struct ib_sge		sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS];
 	u32			num_sge;
 	int			nents;
@@ -961,6 +964,8 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
 	req->num_sge = 1;
 	req->inline_data = false;
 	req->mr->need_inval = false;
+	req->send_completed = false;
+	req->resp_completed = false;
 
 	c->common.flags |= NVME_CMD_SGL_METABUF;
 
@@ -997,13 +1002,25 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
 
 static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
 {
-	if (unlikely(wc->status != IB_WC_SUCCESS))
+	struct nvme_rdma_qe *qe =
+		container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
+	struct nvme_rdma_request *req =
+		container_of(qe, struct nvme_rdma_request, sqe);
+	struct request *rq = blk_mq_rq_from_pdu(req);
+
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
 		nvme_rdma_wr_error(cq, wc, "SEND");
+		return;
+	}
+
+	req->send_completed = true;
+	if (req->resp_completed)
+		nvme_end_request(rq, req->cqe.status, req->cqe.result);
 }
 
 static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
 		struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
-		struct ib_send_wr *first)
+		struct ib_send_wr *first, bool signal)
 {
 	struct ib_send_wr wr, *bad_wr;
 	int ret;
@@ -1019,7 +1036,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
 	wr.sg_list    = sge;
 	wr.num_sge    = num_sge;
 	wr.opcode     = IB_WR_SEND;
-	wr.send_flags = IB_SEND_SIGNALED;
+	wr.send_flags = signal ? IB_SEND_SIGNALED : 0;
 
 	if (first)
 		first->next = &wr;
@@ -1093,7 +1110,7 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
 	ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(*cmd),
 			DMA_TO_DEVICE);
 
-	ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL);
+	ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL, false);
 	WARN_ON_ONCE(ret);
 }
 
@@ -1117,11 +1134,17 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
 	if (rq->tag == tag)
 		ret = 1;
 
+	req->cqe.status = cqe->status;
+	req->cqe.result = cqe->result;
+	req->resp_completed = true;
+
 	if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) &&
 	    wc->ex.invalidate_rkey == req->mr->rkey)
 		req->mr->need_inval = false;
 
-	nvme_end_request(rq, cqe->status, cqe->result);
+	if (req->send_completed)
+		nvme_end_request(rq, req->cqe.status, req->cqe.result);
+
 	return ret;
 }
 
@@ -1410,7 +1433,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 			sizeof(struct nvme_command), DMA_TO_DEVICE);
 
 	err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
-			req->mr->need_inval ? &req->reg_wr.wr : NULL);
+			req->mr->need_inval ? &req->reg_wr.wr : NULL, true);
 	if (unlikely(err)) {
 		nvme_rdma_unmap_data(queue, rq);
 		goto err;
-- 
2.7.4

  parent reply	other threads:[~2017-10-31  8:55 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-31  8:55 [PATCH 0/3] Fix request completion holes Sagi Grimberg
2017-10-31  8:55 ` Sagi Grimberg
     [not found] ` <1509440122-1190-1-git-send-email-sagi-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org>
2017-10-31  8:55   ` [PATCH 1/3] nvme-rdma: don't suppress send completions Sagi Grimberg
2017-10-31  8:55     ` Sagi Grimberg
2017-10-31  8:55   ` Sagi Grimberg [this message]
2017-10-31  8:55     ` [PATCH 2/3] nvme-rdma: don't complete requests before a send work request has completed Sagi Grimberg
2017-10-31  8:55   ` [PATCH 3/3] nvme-rdma: wait for local invalidation before completing a request Sagi Grimberg
2017-10-31  8:55     ` Sagi Grimberg
2017-10-31  9:38   ` [PATCH 0/3] Fix request completion holes Max Gurtovoy
2017-10-31  9:38     ` Max Gurtovoy
     [not found]     ` <8abcde91-9150-2982-3900-078619bcdac0-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2017-10-31 11:10       ` Sagi Grimberg
2017-10-31 11:10         ` Sagi Grimberg
     [not found]         ` <a950a671-22a1-432c-555e-8309c8a64a88-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org>
2017-11-01 16:02           ` idanb
2017-11-01 16:02             ` idanb
     [not found]             ` <d9dcdb05-d001-53d8-7ed7-5fafc1709f4a-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2017-11-01 16:09               ` Max Gurtovoy
2017-11-01 16:09                 ` Max Gurtovoy
2017-11-01 16:50               ` Jason Gunthorpe
2017-11-01 16:50                 ` Jason Gunthorpe
     [not found]                 ` <20171101165036.GD1030-uk2M96/98Pc@public.gmane.org>
2017-11-01 17:31                   ` Sagi Grimberg
2017-11-01 17:31                     ` Sagi Grimberg
     [not found]                     ` <5f3955d2-9116-5f18-2299-cc697947d599-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org>
2017-11-01 17:58                       ` Jason Gunthorpe
2017-11-01 17:58                         ` Jason Gunthorpe
     [not found]                         ` <20171101175819.GG1030-uk2M96/98Pc@public.gmane.org>
2017-11-02  8:06                           ` Sagi Grimberg
2017-11-02  8:06                             ` Sagi Grimberg
     [not found]                             ` <5810bb05-fffd-a0f2-3509-9d9b89a2ef32-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org>
2017-11-02 15:12                               ` Jason Gunthorpe
2017-11-02 15:12                                 ` Jason Gunthorpe
     [not found]                                 ` <20171102151254.GE18874-uk2M96/98Pc@public.gmane.org>
2017-11-02 15:23                                   ` Sagi Grimberg
2017-11-02 15:23                                     ` Sagi Grimberg
     [not found]                                     ` <6626939a-3626-181f-ccea-0f6482e7a3fc-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org>
2017-11-02 15:51                                       ` Jason Gunthorpe
2017-11-02 15:51                                         ` Jason Gunthorpe
     [not found]                                         ` <20171102155156.GG18874-uk2M96/98Pc@public.gmane.org>
2017-11-02 16:15                                           ` Sagi Grimberg
2017-11-02 16:15                                             ` Sagi Grimberg
2017-11-02 16:18                                   ` Steve Wise
2017-11-02 16:18                                     ` Steve Wise
2017-11-02 16:36                                     ` Jason Gunthorpe
2017-11-02 16:36                                       ` Jason Gunthorpe
     [not found]                                       ` <20171102163614.GK18874-uk2M96/98Pc@public.gmane.org>
2017-11-02 16:53                                         ` Steve Wise
2017-11-02 16:53                                           ` Steve Wise
2017-11-02 16:54                                           ` Jason Gunthorpe
2017-11-02 16:54                                             ` Jason Gunthorpe
2017-11-01 17:26               ` Sagi Grimberg
2017-11-01 17:26                 ` Sagi Grimberg
     [not found]                 ` <ae5a6be3-f9c3-9926-ab0d-48b0a99cdb35-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org>
2017-11-01 22:23                   ` Max Gurtovoy
2017-11-01 22:23                     ` Max Gurtovoy
2017-11-02 17:55                   ` Steve Wise
2017-11-02 17:55                     ` Steve Wise

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1509440122-1190-3-git-send-email-sagi@grimberg.me \
    --to=sagi-nqwnxtmzq1alnmji0ikvqw@public.gmane.org \
    --cc=hch-jcswGhMUV9g@public.gmane.org \
    --cc=idanb-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org \
    --cc=jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org \
    --cc=linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.