[PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data structures

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data structures
@ 2017-01-16 20:19 ` Parav Pandit
  0 siblings, 0 replies; 14+ messages in thread
From: Parav Pandit @ 2017-01-16 20:19 UTC (permalink / raw)
  To: hch-jcswGhMUV9g, sagi-NQWnxTmZq1alnMjI0IkVqw,
	linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	dledford-H+wXaHxf7aLQT0dZR+AlfA
  Cc: parav-VPRAkNaXOzVWk0Htik3J/w

This patch performs dma sync operations on nvme_command,
inline page(s) and nvme_completion.

nvme_command and write cmd inline data is synced
(a) on receiving of the recv queue completion for cpu access.
(b) before posting recv wqe back to rdma adapter for device access.

nvme_completion is synced
(a) on receiving send completion for nvme_completion for cpu access.
(b) before posting send wqe to rdma adapter for device access.

This patch is generated for git://git.infradead.org/nvme-fabrics.git
Branch: nvmf-4.10

Signed-off-by: Parav Pandit <parav-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Reviewed-by: Max Gurtovoy <maxg-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
 drivers/nvme/target/rdma.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 6c1c368..fe7e257 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -437,6 +437,14 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
 		struct nvmet_rdma_cmd *cmd)
 {
 	struct ib_recv_wr *bad_wr;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		if (cmd->sge[i].length)
+			ib_dma_sync_single_for_device(ndev->device,
+				cmd->sge[0].addr, cmd->sge[0].length,
+				DMA_FROM_DEVICE);
+	}
 
 	if (ndev->srq)
 		return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
@@ -507,6 +515,10 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
 	struct nvmet_rdma_rsp *rsp =
 		container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe);
 
+	ib_dma_sync_single_for_cpu(rsp->queue->dev->device,
+			rsp->send_sge.addr, rsp->send_sge.length,
+			DMA_TO_DEVICE);
+
 	nvmet_rdma_release_rsp(rsp);
 
 	if (unlikely(wc->status != IB_WC_SUCCESS &&
@@ -538,6 +550,11 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req)
 		first_wr = &rsp->send_wr;
 
 	nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd);
+
+	ib_dma_sync_single_for_device(rsp->queue->dev->device,
+			rsp->send_sge.addr, rsp->send_sge.length,
+			DMA_TO_DEVICE);
+
 	if (ib_post_send(cm_id->qp, first_wr, &bad_wr)) {
 		pr_err("sending cmd response failed\n");
 		nvmet_rdma_release_rsp(rsp);
@@ -692,12 +709,20 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp)
 static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue,
 		struct nvmet_rdma_rsp *cmd)
 {
+	int i;
 	u16 status;
 
 	cmd->queue = queue;
 	cmd->n_rdma = 0;
 	cmd->req.port = queue->port;
 
+	for (i = 0; i < 2; i++) {
+		if (cmd->cmd->sge[i].length)
+			ib_dma_sync_single_for_cpu(queue->dev->device,
+				cmd->cmd->sge[i].addr, cmd->cmd->sge[i].length,
+				DMA_FROM_DEVICE);
+	}
+
 	if (!nvmet_req_init(&cmd->req, &queue->nvme_cq,
 			&queue->nvme_sq, &nvmet_rdma_ops))
 		return;
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data structures
@ 2017-01-16 20:19 ` Parav Pandit
  0 siblings, 0 replies; 14+ messages in thread
From: Parav Pandit @ 2017-01-16 20:19 UTC (permalink / raw)


This patch performs dma sync operations on nvme_command,
inline page(s) and nvme_completion.

nvme_command and write cmd inline data is synced
(a) on receiving of the recv queue completion for cpu access.
(b) before posting recv wqe back to rdma adapter for device access.

nvme_completion is synced
(a) on receiving send completion for nvme_completion for cpu access.
(b) before posting send wqe to rdma adapter for device access.

This patch is generated for git://git.infradead.org/nvme-fabrics.git
Branch: nvmf-4.10

Signed-off-by: Parav Pandit <parav at mellanox.com>
Reviewed-by: Max Gurtovoy <maxg at mellanox.com>
---
 drivers/nvme/target/rdma.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 6c1c368..fe7e257 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -437,6 +437,14 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
 		struct nvmet_rdma_cmd *cmd)
 {
 	struct ib_recv_wr *bad_wr;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		if (cmd->sge[i].length)
+			ib_dma_sync_single_for_device(ndev->device,
+				cmd->sge[0].addr, cmd->sge[0].length,
+				DMA_FROM_DEVICE);
+	}
 
 	if (ndev->srq)
 		return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
@@ -507,6 +515,10 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
 	struct nvmet_rdma_rsp *rsp =
 		container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe);
 
+	ib_dma_sync_single_for_cpu(rsp->queue->dev->device,
+			rsp->send_sge.addr, rsp->send_sge.length,
+			DMA_TO_DEVICE);
+
 	nvmet_rdma_release_rsp(rsp);
 
 	if (unlikely(wc->status != IB_WC_SUCCESS &&
@@ -538,6 +550,11 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req)
 		first_wr = &rsp->send_wr;
 
 	nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd);
+
+	ib_dma_sync_single_for_device(rsp->queue->dev->device,
+			rsp->send_sge.addr, rsp->send_sge.length,
+			DMA_TO_DEVICE);
+
 	if (ib_post_send(cm_id->qp, first_wr, &bad_wr)) {
 		pr_err("sending cmd response failed\n");
 		nvmet_rdma_release_rsp(rsp);
@@ -692,12 +709,20 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp)
 static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue,
 		struct nvmet_rdma_rsp *cmd)
 {
+	int i;
 	u16 status;
 
 	cmd->queue = queue;
 	cmd->n_rdma = 0;
 	cmd->req.port = queue->port;
 
+	for (i = 0; i < 2; i++) {
+		if (cmd->cmd->sge[i].length)
+			ib_dma_sync_single_for_cpu(queue->dev->device,
+				cmd->cmd->sge[i].addr, cmd->cmd->sge[i].length,
+				DMA_FROM_DEVICE);
+	}
+
 	if (!nvmet_req_init(&cmd->req, &queue->nvme_cq,
 			&queue->nvme_sq, &nvmet_rdma_ops))
 		return;
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data structures
  2017-01-16 20:19 ` Parav Pandit
@ 2017-01-16 20:31     ` Yuval Shaia
  -1 siblings, 0 replies; 14+ messages in thread
From: Yuval Shaia @ 2017-01-16 20:31 UTC (permalink / raw)
  To: Parav Pandit
  Cc: hch-jcswGhMUV9g, sagi-NQWnxTmZq1alnMjI0IkVqw,
	linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	dledford-H+wXaHxf7aLQT0dZR+AlfA

On Mon, Jan 16, 2017 at 02:19:05PM -0600, Parav Pandit wrote:
> This patch performs dma sync operations on nvme_command,
> inline page(s) and nvme_completion.
> 
> nvme_command and write cmd inline data is synced
> (a) on receiving of the recv queue completion for cpu access.
> (b) before posting recv wqe back to rdma adapter for device access.
> 
> nvme_completion is synced
> (a) on receiving send completion for nvme_completion for cpu access.
> (b) before posting send wqe to rdma adapter for device access.
> 
> This patch is generated for git://git.infradead.org/nvme-fabrics.git
> Branch: nvmf-4.10
> 
> Signed-off-by: Parav Pandit <parav-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> Reviewed-by: Max Gurtovoy <maxg-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> ---
>  drivers/nvme/target/rdma.c | 25 +++++++++++++++++++++++++
>  1 file changed, 25 insertions(+)
> 
> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
> index 6c1c368..fe7e257 100644
> --- a/drivers/nvme/target/rdma.c
> +++ b/drivers/nvme/target/rdma.c
> @@ -437,6 +437,14 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
>  		struct nvmet_rdma_cmd *cmd)
>  {
>  	struct ib_recv_wr *bad_wr;
> +	int i;
> +
> +	for (i = 0; i < 2; i++) {
> +		if (cmd->sge[i].length)
> +			ib_dma_sync_single_for_device(ndev->device,

Aren't we trying to get rid of all these ib_dma_* wrappers?

> +				cmd->sge[0].addr, cmd->sge[0].length,
> +				DMA_FROM_DEVICE);
> +	}
>  
>  	if (ndev->srq)
>  		return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
> @@ -507,6 +515,10 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
>  	struct nvmet_rdma_rsp *rsp =
>  		container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe);
>  
> +	ib_dma_sync_single_for_cpu(rsp->queue->dev->device,
> +			rsp->send_sge.addr, rsp->send_sge.length,
> +			DMA_TO_DEVICE);
> +
>  	nvmet_rdma_release_rsp(rsp);
>  
>  	if (unlikely(wc->status != IB_WC_SUCCESS &&
> @@ -538,6 +550,11 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req)
>  		first_wr = &rsp->send_wr;
>  
>  	nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd);
> +
> +	ib_dma_sync_single_for_device(rsp->queue->dev->device,
> +			rsp->send_sge.addr, rsp->send_sge.length,
> +			DMA_TO_DEVICE);
> +
>  	if (ib_post_send(cm_id->qp, first_wr, &bad_wr)) {
>  		pr_err("sending cmd response failed\n");
>  		nvmet_rdma_release_rsp(rsp);
> @@ -692,12 +709,20 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp)
>  static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue,
>  		struct nvmet_rdma_rsp *cmd)
>  {
> +	int i;
>  	u16 status;
>  
>  	cmd->queue = queue;
>  	cmd->n_rdma = 0;
>  	cmd->req.port = queue->port;
>  
> +	for (i = 0; i < 2; i++) {
> +		if (cmd->cmd->sge[i].length)
> +			ib_dma_sync_single_for_cpu(queue->dev->device,
> +				cmd->cmd->sge[i].addr, cmd->cmd->sge[i].length,
> +				DMA_FROM_DEVICE);
> +	}
> +
>  	if (!nvmet_req_init(&cmd->req, &queue->nvme_cq,
>  			&queue->nvme_sq, &nvmet_rdma_ops))
>  		return;
> -- 
> 1.8.3.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data structures
@ 2017-01-16 20:31     ` Yuval Shaia
  0 siblings, 0 replies; 14+ messages in thread
From: Yuval Shaia @ 2017-01-16 20:31 UTC (permalink / raw)


On Mon, Jan 16, 2017@02:19:05PM -0600, Parav Pandit wrote:
> This patch performs dma sync operations on nvme_command,
> inline page(s) and nvme_completion.
> 
> nvme_command and write cmd inline data is synced
> (a) on receiving of the recv queue completion for cpu access.
> (b) before posting recv wqe back to rdma adapter for device access.
> 
> nvme_completion is synced
> (a) on receiving send completion for nvme_completion for cpu access.
> (b) before posting send wqe to rdma adapter for device access.
> 
> This patch is generated for git://git.infradead.org/nvme-fabrics.git
> Branch: nvmf-4.10
> 
> Signed-off-by: Parav Pandit <parav at mellanox.com>
> Reviewed-by: Max Gurtovoy <maxg at mellanox.com>
> ---
>  drivers/nvme/target/rdma.c | 25 +++++++++++++++++++++++++
>  1 file changed, 25 insertions(+)
> 
> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
> index 6c1c368..fe7e257 100644
> --- a/drivers/nvme/target/rdma.c
> +++ b/drivers/nvme/target/rdma.c
> @@ -437,6 +437,14 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
>  		struct nvmet_rdma_cmd *cmd)
>  {
>  	struct ib_recv_wr *bad_wr;
> +	int i;
> +
> +	for (i = 0; i < 2; i++) {
> +		if (cmd->sge[i].length)
> +			ib_dma_sync_single_for_device(ndev->device,

Aren't we trying to get rid of all these ib_dma_* wrappers?

> +				cmd->sge[0].addr, cmd->sge[0].length,
> +				DMA_FROM_DEVICE);
> +	}
>  
>  	if (ndev->srq)
>  		return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
> @@ -507,6 +515,10 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
>  	struct nvmet_rdma_rsp *rsp =
>  		container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe);
>  
> +	ib_dma_sync_single_for_cpu(rsp->queue->dev->device,
> +			rsp->send_sge.addr, rsp->send_sge.length,
> +			DMA_TO_DEVICE);
> +
>  	nvmet_rdma_release_rsp(rsp);
>  
>  	if (unlikely(wc->status != IB_WC_SUCCESS &&
> @@ -538,6 +550,11 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req)
>  		first_wr = &rsp->send_wr;
>  
>  	nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd);
> +
> +	ib_dma_sync_single_for_device(rsp->queue->dev->device,
> +			rsp->send_sge.addr, rsp->send_sge.length,
> +			DMA_TO_DEVICE);
> +
>  	if (ib_post_send(cm_id->qp, first_wr, &bad_wr)) {
>  		pr_err("sending cmd response failed\n");
>  		nvmet_rdma_release_rsp(rsp);
> @@ -692,12 +709,20 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp)
>  static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue,
>  		struct nvmet_rdma_rsp *cmd)
>  {
> +	int i;
>  	u16 status;
>  
>  	cmd->queue = queue;
>  	cmd->n_rdma = 0;
>  	cmd->req.port = queue->port;
>  
> +	for (i = 0; i < 2; i++) {
> +		if (cmd->cmd->sge[i].length)
> +			ib_dma_sync_single_for_cpu(queue->dev->device,
> +				cmd->cmd->sge[i].addr, cmd->cmd->sge[i].length,
> +				DMA_FROM_DEVICE);
> +	}
> +
>  	if (!nvmet_req_init(&cmd->req, &queue->nvme_cq,
>  			&queue->nvme_sq, &nvmet_rdma_ops))
>  		return;
> -- 
> 1.8.3.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo at vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data structures
  2017-01-16 20:31     ` Yuval Shaia
@ 2017-01-16 20:51       ` Parav Pandit
  -1 siblings, 0 replies; 14+ messages in thread
From: Parav Pandit @ 2017-01-16 20:51 UTC (permalink / raw)
  To: Yuval Shaia
  Cc: hch-jcswGhMUV9g, sagi-NQWnxTmZq1alnMjI0IkVqw,
	linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	dledford-H+wXaHxf7aLQT0dZR+AlfA

Hi Yuval,

> -----Original Message-----
> From: Yuval Shaia [mailto:yuval.shaia-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org]
> Sent: Monday, January 16, 2017 2:32 PM
> To: Parav Pandit <parav-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> Cc: hch-jcswGhMUV9g@public.gmane.org; sagi-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org; linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org; linux-
> rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org; dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org
> Subject: Re: [PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data
> structures
> 
> On Mon, Jan 16, 2017 at 02:19:05PM -0600, Parav Pandit wrote:
> > This patch performs dma sync operations on nvme_command, inline
> > page(s) and nvme_completion.
> >
> > nvme_command and write cmd inline data is synced
> > (a) on receiving of the recv queue completion for cpu access.
> > (b) before posting recv wqe back to rdma adapter for device access.
> >
> > nvme_completion is synced
> > (a) on receiving send completion for nvme_completion for cpu access.
> > (b) before posting send wqe to rdma adapter for device access.
> >
> > This patch is generated for git://git.infradead.org/nvme-fabrics.git
> > Branch: nvmf-4.10
> >
> > Signed-off-by: Parav Pandit <parav-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> > Reviewed-by: Max Gurtovoy <maxg-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> > ---
> >  drivers/nvme/target/rdma.c | 25 +++++++++++++++++++++++++
> >  1 file changed, 25 insertions(+)
> >
> > diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
> > index 6c1c368..fe7e257 100644
> > --- a/drivers/nvme/target/rdma.c
> > +++ b/drivers/nvme/target/rdma.c
> > @@ -437,6 +437,14 @@ static int nvmet_rdma_post_recv(struct
> nvmet_rdma_device *ndev,
> >  		struct nvmet_rdma_cmd *cmd)
> >  {
> >  	struct ib_recv_wr *bad_wr;
> > +	int i;
> > +
> > +	for (i = 0; i < 2; i++) {
> > +		if (cmd->sge[i].length)
> > +			ib_dma_sync_single_for_device(ndev->device,
> 
> Aren't we trying to get rid of all these ib_dma_* wrappers?

Yes. We are. Bart patch is still not merged. Lastly there was some issue with SDMA or hfi changes.
I have sent out patch on top of Bart's patch for inux-rdma tree using regular dma_xx APIs.
However Sagi and Christoph acknowledged that this fix needs to go to 4.8-stable and 4.10-rc as bug fix.
So this patch is generated from the nvme-fabrics tree as mentioned in comment.
When this gets to linux-rdma tree, Bart's new patch will translate this additional calls to regular dma_xx APIs.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data structures
@ 2017-01-16 20:51       ` Parav Pandit
  0 siblings, 0 replies; 14+ messages in thread
From: Parav Pandit @ 2017-01-16 20:51 UTC (permalink / raw)


Hi Yuval,

> -----Original Message-----
> From: Yuval Shaia [mailto:yuval.shaia at oracle.com]
> Sent: Monday, January 16, 2017 2:32 PM
> To: Parav Pandit <parav at mellanox.com>
> Cc: hch at lst.de; sagi at grimberg.me; linux-nvme at lists.infradead.org; linux-
> rdma at vger.kernel.org; dledford at redhat.com
> Subject: Re: [PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data
> structures
> 
> On Mon, Jan 16, 2017@02:19:05PM -0600, Parav Pandit wrote:
> > This patch performs dma sync operations on nvme_command, inline
> > page(s) and nvme_completion.
> >
> > nvme_command and write cmd inline data is synced
> > (a) on receiving of the recv queue completion for cpu access.
> > (b) before posting recv wqe back to rdma adapter for device access.
> >
> > nvme_completion is synced
> > (a) on receiving send completion for nvme_completion for cpu access.
> > (b) before posting send wqe to rdma adapter for device access.
> >
> > This patch is generated for git://git.infradead.org/nvme-fabrics.git
> > Branch: nvmf-4.10
> >
> > Signed-off-by: Parav Pandit <parav at mellanox.com>
> > Reviewed-by: Max Gurtovoy <maxg at mellanox.com>
> > ---
> >  drivers/nvme/target/rdma.c | 25 +++++++++++++++++++++++++
> >  1 file changed, 25 insertions(+)
> >
> > diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
> > index 6c1c368..fe7e257 100644
> > --- a/drivers/nvme/target/rdma.c
> > +++ b/drivers/nvme/target/rdma.c
> > @@ -437,6 +437,14 @@ static int nvmet_rdma_post_recv(struct
> nvmet_rdma_device *ndev,
> >  		struct nvmet_rdma_cmd *cmd)
> >  {
> >  	struct ib_recv_wr *bad_wr;
> > +	int i;
> > +
> > +	for (i = 0; i < 2; i++) {
> > +		if (cmd->sge[i].length)
> > +			ib_dma_sync_single_for_device(ndev->device,
> 
> Aren't we trying to get rid of all these ib_dma_* wrappers?

Yes. We are. Bart patch is still not merged. Lastly there was some issue with SDMA or hfi changes.
I have sent out patch on top of Bart's patch for inux-rdma tree using regular dma_xx APIs.
However Sagi and Christoph acknowledged that this fix needs to go to 4.8-stable and 4.10-rc as bug fix.
So this patch is generated from the nvme-fabrics tree as mentioned in comment.
When this gets to linux-rdma tree, Bart's new patch will translate this additional calls to regular dma_xx APIs.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data structures
  2017-01-16 20:19 ` Parav Pandit
@ 2017-01-16 21:12     ` Sagi Grimberg
  -1 siblings, 0 replies; 14+ messages in thread
From: Sagi Grimberg @ 2017-01-16 21:12 UTC (permalink / raw)
  To: Parav Pandit, hch-jcswGhMUV9g,
	linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	dledford-H+wXaHxf7aLQT0dZR+AlfA

Hey Parav,

> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
> index 6c1c368..fe7e257 100644
> --- a/drivers/nvme/target/rdma.c
> +++ b/drivers/nvme/target/rdma.c
> @@ -437,6 +437,14 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
>  		struct nvmet_rdma_cmd *cmd)
>  {
>  	struct ib_recv_wr *bad_wr;
> +	int i;
> +
> +	for (i = 0; i < 2; i++) {
> +		if (cmd->sge[i].length)
> +			ib_dma_sync_single_for_device(ndev->device,
> +				cmd->sge[0].addr, cmd->sge[0].length,
> +				DMA_FROM_DEVICE);
> +	}

a. you test on sge[i] but sync sge[0].
b. I don't think we need the for statement, lest keep it open-coded
for [0] and [1].

>
>  	if (ndev->srq)
>  		return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
> @@ -507,6 +515,10 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
>  	struct nvmet_rdma_rsp *rsp =
>  		container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe);
>
> +	ib_dma_sync_single_for_cpu(rsp->queue->dev->device,
> +			rsp->send_sge.addr, rsp->send_sge.length,
> +			DMA_TO_DEVICE);

Why do you need to sync_for_cpu here? you have no interest in the
data at this point.

> +
>  	nvmet_rdma_release_rsp(rsp);
>
>  	if (unlikely(wc->status != IB_WC_SUCCESS &&
> @@ -538,6 +550,11 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req)
>  		first_wr = &rsp->send_wr;
>
>  	nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd);
> +
> +	ib_dma_sync_single_for_device(rsp->queue->dev->device,
> +			rsp->send_sge.addr, rsp->send_sge.length,
> +			DMA_TO_DEVICE);
> +
>  	if (ib_post_send(cm_id->qp, first_wr, &bad_wr)) {
>  		pr_err("sending cmd response failed\n");
>  		nvmet_rdma_release_rsp(rsp);
> @@ -692,12 +709,20 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp)
>  static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue,
>  		struct nvmet_rdma_rsp *cmd)
>  {
> +	int i;
>  	u16 status;
>
>  	cmd->queue = queue;
>  	cmd->n_rdma = 0;
>  	cmd->req.port = queue->port;
>
> +	for (i = 0; i < 2; i++) {
> +		if (cmd->cmd->sge[i].length)
> +			ib_dma_sync_single_for_cpu(queue->dev->device,
> +				cmd->cmd->sge[i].addr, cmd->cmd->sge[i].length,
> +				DMA_FROM_DEVICE);
> +	}

Again, we don't need the for statement.

Also, I think we can optimize a bit by syncing the in-capsule page
only if:
1. it was posted for recv (sge has length)
2. its a write command
3. it has in-capsule data.

So, here lets sync the sqe (sge[0]) and sync the in-capsule page
in nvmet_rdma_map_sgl_inline().
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data structures
@ 2017-01-16 21:12     ` Sagi Grimberg
  0 siblings, 0 replies; 14+ messages in thread
From: Sagi Grimberg @ 2017-01-16 21:12 UTC (permalink / raw)


Hey Parav,

> diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
> index 6c1c368..fe7e257 100644
> --- a/drivers/nvme/target/rdma.c
> +++ b/drivers/nvme/target/rdma.c
> @@ -437,6 +437,14 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
>  		struct nvmet_rdma_cmd *cmd)
>  {
>  	struct ib_recv_wr *bad_wr;
> +	int i;
> +
> +	for (i = 0; i < 2; i++) {
> +		if (cmd->sge[i].length)
> +			ib_dma_sync_single_for_device(ndev->device,
> +				cmd->sge[0].addr, cmd->sge[0].length,
> +				DMA_FROM_DEVICE);
> +	}

a. you test on sge[i] but sync sge[0].
b. I don't think we need the for statement, lest keep it open-coded
for [0] and [1].

>
>  	if (ndev->srq)
>  		return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
> @@ -507,6 +515,10 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
>  	struct nvmet_rdma_rsp *rsp =
>  		container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe);
>
> +	ib_dma_sync_single_for_cpu(rsp->queue->dev->device,
> +			rsp->send_sge.addr, rsp->send_sge.length,
> +			DMA_TO_DEVICE);

Why do you need to sync_for_cpu here? you have no interest in the
data at this point.

> +
>  	nvmet_rdma_release_rsp(rsp);
>
>  	if (unlikely(wc->status != IB_WC_SUCCESS &&
> @@ -538,6 +550,11 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req)
>  		first_wr = &rsp->send_wr;
>
>  	nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd);
> +
> +	ib_dma_sync_single_for_device(rsp->queue->dev->device,
> +			rsp->send_sge.addr, rsp->send_sge.length,
> +			DMA_TO_DEVICE);
> +
>  	if (ib_post_send(cm_id->qp, first_wr, &bad_wr)) {
>  		pr_err("sending cmd response failed\n");
>  		nvmet_rdma_release_rsp(rsp);
> @@ -692,12 +709,20 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp)
>  static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue,
>  		struct nvmet_rdma_rsp *cmd)
>  {
> +	int i;
>  	u16 status;
>
>  	cmd->queue = queue;
>  	cmd->n_rdma = 0;
>  	cmd->req.port = queue->port;
>
> +	for (i = 0; i < 2; i++) {
> +		if (cmd->cmd->sge[i].length)
> +			ib_dma_sync_single_for_cpu(queue->dev->device,
> +				cmd->cmd->sge[i].addr, cmd->cmd->sge[i].length,
> +				DMA_FROM_DEVICE);
> +	}

Again, we don't need the for statement.

Also, I think we can optimize a bit by syncing the in-capsule page
only if:
1. it was posted for recv (sge has length)
2. its a write command
3. it has in-capsule data.

So, here lets sync the sqe (sge[0]) and sync the in-capsule page
in nvmet_rdma_map_sgl_inline().

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data structures
  2017-01-16 21:12     ` Sagi Grimberg
@ 2017-01-16 23:17         ` Parav Pandit
  -1 siblings, 0 replies; 14+ messages in thread
From: Parav Pandit @ 2017-01-16 23:17 UTC (permalink / raw)
  To: Sagi Grimberg, hch-jcswGhMUV9g,
	linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	dledford-H+wXaHxf7aLQT0dZR+AlfA

Hi Sagi,

> > +	for (i = 0; i < 2; i++) {
> > +		if (cmd->sge[i].length)
> > +			ib_dma_sync_single_for_device(ndev->device,
> > +				cmd->sge[0].addr, cmd->sge[0].length,
> > +				DMA_FROM_DEVICE);
> > +	}
> 
> a. you test on sge[i] but sync sge[0].
Crap code. I will fix this.

> b. I don't think we need the for statement, lest keep it open-coded for [0]
> and [1].

I put for loop because, there was high level agreement in max_sge thread chain between Chuck, Steve and Jason about having generic sg_list/bounce buffer and doing things similar to RW APIs.
Now if we generalize at that point, my thoughts were, that this code can eventually move out from every ULP to that generic send() API.
So I put a for loop to make is more ULP agnostic from beginning.

> 
> >
> >  	if (ndev->srq)
> >  		return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
> @@ -507,6
> > +515,10 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct
> ib_wc *wc)
> >  	struct nvmet_rdma_rsp *rsp =
> >  		container_of(wc->wr_cqe, struct nvmet_rdma_rsp,
> send_cqe);
> >
> > +	ib_dma_sync_single_for_cpu(rsp->queue->dev->device,
> > +			rsp->send_sge.addr, rsp->send_sge.length,
> > +			DMA_TO_DEVICE);
> 
> Why do you need to sync_for_cpu here? you have no interest in the data at
> this point.
> 
Before a cqe can be prepared by cpu, it needs to be synced.
So once CQE send is completed, that region is ready for preparing new CQE.
In error case cqe is prepared by the RDMA layer and sent using nvmet_req_complete.
In happy path case cqe is prepared by the core layer before invoking queue_response() callback of fabric_ops.

In happy case nvmet-core needs to do the sync_to_cpu.
In error case rdma layer needs to do the sync_to_cpu.

Instead of messing code at both places, I did the sync_for_cpu in send_done() which is unified place.
If there is generic callback in fabric_ops that can be invoked by __nvmet_req_complete(), than its cleaner to do at single place by invoking it.
I didn't find it worth enough to increase fabric_ops for this.
Let me know if you have different idea to resolve this.

> > nvmet_rdma_rsp *rsp)  static void nvmet_rdma_handle_command(struct
> nvmet_rdma_queue *queue,
> >  		struct nvmet_rdma_rsp *cmd)
> >  {
> > +	int i;
> >  	u16 status;
> >
> >  	cmd->queue = queue;
> >  	cmd->n_rdma = 0;
> >  	cmd->req.port = queue->port;
> >
> > +	for (i = 0; i < 2; i++) {
> > +		if (cmd->cmd->sge[i].length)
> > +			ib_dma_sync_single_for_cpu(queue->dev->device,
> > +				cmd->cmd->sge[i].addr, cmd->cmd-
> >sge[i].length,
> > +				DMA_FROM_DEVICE);
> > +	}
> 
> Again, we don't need the for statement.
> 
> Also, I think we can optimize a bit by syncing the in-capsule page only if:
> 1. it was posted for recv (sge has length) 2. its a write command 3. it has in-
> capsule data.
> 
> So, here lets sync the sqe (sge[0]) and sync the in-capsule page in
> nvmet_rdma_map_sgl_inline().

I agree Sagi that this can be differed to _inline(). I was headed to do this in generic code eventually similar to RW API.
And thought why not have the clean code now so that migration later to new API would be easy.
But if you think we should differ it to later stage, I am fine with that and continue with open coding.

Now when I review the code of map_sgl_inline() I am wondering that we should not sync the INLINE data page at all because cpu is not going to read it anyway.
Its only the remote device which will read/write and it will do the dma_sync_to_device as part of that driver anyway.
So I should just sync nvme_command and not inline data.
That brings me back to open code to sync only entry [0]. :-)

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data structures
@ 2017-01-16 23:17         ` Parav Pandit
  0 siblings, 0 replies; 14+ messages in thread
From: Parav Pandit @ 2017-01-16 23:17 UTC (permalink / raw)


Hi Sagi,

> > +	for (i = 0; i < 2; i++) {
> > +		if (cmd->sge[i].length)
> > +			ib_dma_sync_single_for_device(ndev->device,
> > +				cmd->sge[0].addr, cmd->sge[0].length,
> > +				DMA_FROM_DEVICE);
> > +	}
> 
> a. you test on sge[i] but sync sge[0].
Crap code. I will fix this.

> b. I don't think we need the for statement, lest keep it open-coded for [0]
> and [1].

I put for loop because, there was high level agreement in max_sge thread chain between Chuck, Steve and Jason about having generic sg_list/bounce buffer and doing things similar to RW APIs.
Now if we generalize at that point, my thoughts were, that this code can eventually move out from every ULP to that generic send() API.
So I put a for loop to make is more ULP agnostic from beginning.

> 
> >
> >  	if (ndev->srq)
> >  		return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
> @@ -507,6
> > +515,10 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct
> ib_wc *wc)
> >  	struct nvmet_rdma_rsp *rsp =
> >  		container_of(wc->wr_cqe, struct nvmet_rdma_rsp,
> send_cqe);
> >
> > +	ib_dma_sync_single_for_cpu(rsp->queue->dev->device,
> > +			rsp->send_sge.addr, rsp->send_sge.length,
> > +			DMA_TO_DEVICE);
> 
> Why do you need to sync_for_cpu here? you have no interest in the data at
> this point.
> 
Before a cqe can be prepared by cpu, it needs to be synced.
So once CQE send is completed, that region is ready for preparing new CQE.
In error case cqe is prepared by the RDMA layer and sent using nvmet_req_complete.
In happy path case cqe is prepared by the core layer before invoking queue_response() callback of fabric_ops.

In happy case nvmet-core needs to do the sync_to_cpu.
In error case rdma layer needs to do the sync_to_cpu.

Instead of messing code at both places, I did the sync_for_cpu in send_done() which is unified place.
If there is generic callback in fabric_ops that can be invoked by __nvmet_req_complete(), than its cleaner to do at single place by invoking it.
I didn't find it worth enough to increase fabric_ops for this.
Let me know if you have different idea to resolve this.

> > nvmet_rdma_rsp *rsp)  static void nvmet_rdma_handle_command(struct
> nvmet_rdma_queue *queue,
> >  		struct nvmet_rdma_rsp *cmd)
> >  {
> > +	int i;
> >  	u16 status;
> >
> >  	cmd->queue = queue;
> >  	cmd->n_rdma = 0;
> >  	cmd->req.port = queue->port;
> >
> > +	for (i = 0; i < 2; i++) {
> > +		if (cmd->cmd->sge[i].length)
> > +			ib_dma_sync_single_for_cpu(queue->dev->device,
> > +				cmd->cmd->sge[i].addr, cmd->cmd-
> >sge[i].length,
> > +				DMA_FROM_DEVICE);
> > +	}
> 
> Again, we don't need the for statement.
> 
> Also, I think we can optimize a bit by syncing the in-capsule page only if:
> 1. it was posted for recv (sge has length) 2. its a write command 3. it has in-
> capsule data.
> 
> So, here lets sync the sqe (sge[0]) and sync the in-capsule page in
> nvmet_rdma_map_sgl_inline().

I agree Sagi that this can be differed to _inline(). I was headed to do this in generic code eventually similar to RW API.
And thought why not have the clean code now so that migration later to new API would be easy.
But if you think we should differ it to later stage, I am fine with that and continue with open coding.

Now when I review the code of map_sgl_inline() I am wondering that we should not sync the INLINE data page at all because cpu is not going to read it anyway.
Its only the remote device which will read/write and it will do the dma_sync_to_device as part of that driver anyway.
So I should just sync nvme_command and not inline data.
That brings me back to open code to sync only entry [0]. :-)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data structures
  2017-01-16 23:17         ` Parav Pandit
@ 2017-01-17  8:07             ` Sagi Grimberg
  -1 siblings, 0 replies; 14+ messages in thread
From: Sagi Grimberg @ 2017-01-17  8:07 UTC (permalink / raw)
  To: Parav Pandit, hch-jcswGhMUV9g,
	linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	dledford-H+wXaHxf7aLQT0dZR+AlfA

>>> +	for (i = 0; i < 2; i++) {
>>> +		if (cmd->sge[i].length)
>>> +			ib_dma_sync_single_for_device(ndev->device,
>>> +				cmd->sge[0].addr, cmd->sge[0].length,
>>> +				DMA_FROM_DEVICE);
>>> +	}
>>
>> a. you test on sge[i] but sync sge[0].
> Crap code. I will fix this.
>
>> b. I don't think we need the for statement, lest keep it open-coded for [0]
>> and [1].
>
> I put for loop because, there was high level agreement in max_sge thread chain between Chuck, Steve and Jason about having generic sg_list/bounce buffer and doing things similar to RW APIs.
> Now if we generalize at that point, my thoughts were, that this code can eventually move out from every ULP to that generic send() API.
> So I put a for loop to make is more ULP agnostic from beginning.

Lets start simple and clear, if we indeed do this (and I should
checkout this discussion) we'll move it out like all the rest of
the ULPs.

>>>  	if (ndev->srq)
>>>  		return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
>> @@ -507,6
>>> +515,10 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct
>> ib_wc *wc)
>>>  	struct nvmet_rdma_rsp *rsp =
>>>  		container_of(wc->wr_cqe, struct nvmet_rdma_rsp,
>> send_cqe);
>>>
>>> +	ib_dma_sync_single_for_cpu(rsp->queue->dev->device,
>>> +			rsp->send_sge.addr, rsp->send_sge.length,
>>> +			DMA_TO_DEVICE);
>>
>> Why do you need to sync_for_cpu here? you have no interest in the data at
>> this point.
>>
> Before a cqe can be prepared by cpu, it needs to be synced.
> So once CQE send is completed, that region is ready for preparing new CQE.
> In error case cqe is prepared by the RDMA layer and sent using nvmet_req_complete.
> In happy path case cqe is prepared by the core layer before invoking queue_response() callback of fabric_ops.
>
> In happy case nvmet-core needs to do the sync_to_cpu.
> In error case rdma layer needs to do the sync_to_cpu.
>
> Instead of messing code at both places, I did the sync_for_cpu in send_done() which is unified place.
> If there is generic callback in fabric_ops that can be invoked by __nvmet_req_complete(), than its cleaner to do at single place by invoking it.
> I didn't find it worth enough to increase fabric_ops for this.
> Let me know if you have different idea to resolve this.

Why not sync it when you start using the cmd at nvmet_rdma_recv_done()?
--
@@ -747,6 +747,10 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, 
struct ib_wc *wc)
         rsp->flags = 0;
         rsp->req.cmd = cmd->nvme_cmd;

+       ib_dma_sync_single_for_cpu(rsp->queue->dev->device,
+                       rsp->send_sge.addr, rsp->send_sge.length,
+                       DMA_TO_DEVICE);
+
         if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) {
                 unsigned long flags;
--

>
>>> nvmet_rdma_rsp *rsp)  static void nvmet_rdma_handle_command(struct
>> nvmet_rdma_queue *queue,
>>>  		struct nvmet_rdma_rsp *cmd)
>>>  {
>>> +	int i;
>>>  	u16 status;
>>>
>>>  	cmd->queue = queue;
>>>  	cmd->n_rdma = 0;
>>>  	cmd->req.port = queue->port;
>>>
>>> +	for (i = 0; i < 2; i++) {
>>> +		if (cmd->cmd->sge[i].length)
>>> +			ib_dma_sync_single_for_cpu(queue->dev->device,
>>> +				cmd->cmd->sge[i].addr, cmd->cmd-
>>> sge[i].length,
>>> +				DMA_FROM_DEVICE);
>>> +	}
>>
>> Again, we don't need the for statement.
>>
>> Also, I think we can optimize a bit by syncing the in-capsule page only if:
>> 1. it was posted for recv (sge has length) 2. its a write command 3. it has in-
>> capsule data.
>>
>> So, here lets sync the sqe (sge[0]) and sync the in-capsule page in
>> nvmet_rdma_map_sgl_inline().
>
> I agree Sagi that this can be differed to _inline(). I was headed to do this in generic code eventually similar to RW API.
> And thought why not have the clean code now so that migration later to new API would be easy.
> But if you think we should differ it to later stage, I am fine with that and continue with open coding.
>
> Now when I review the code of map_sgl_inline() I am wondering that we should not sync the INLINE data page at all because cpu is not going to read it anyway.
> Its only the remote device which will read/write and it will do the dma_sync_to_device as part of that driver anyway.

Yea, you're right. Let's kill it.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data structures
@ 2017-01-17  8:07             ` Sagi Grimberg
  0 siblings, 0 replies; 14+ messages in thread
From: Sagi Grimberg @ 2017-01-17  8:07 UTC (permalink / raw)


>>> +	for (i = 0; i < 2; i++) {
>>> +		if (cmd->sge[i].length)
>>> +			ib_dma_sync_single_for_device(ndev->device,
>>> +				cmd->sge[0].addr, cmd->sge[0].length,
>>> +				DMA_FROM_DEVICE);
>>> +	}
>>
>> a. you test on sge[i] but sync sge[0].
> Crap code. I will fix this.
>
>> b. I don't think we need the for statement, lest keep it open-coded for [0]
>> and [1].
>
> I put for loop because, there was high level agreement in max_sge thread chain between Chuck, Steve and Jason about having generic sg_list/bounce buffer and doing things similar to RW APIs.
> Now if we generalize at that point, my thoughts were, that this code can eventually move out from every ULP to that generic send() API.
> So I put a for loop to make is more ULP agnostic from beginning.

Lets start simple and clear, if we indeed do this (and I should
checkout this discussion) we'll move it out like all the rest of
the ULPs.

>>>  	if (ndev->srq)
>>>  		return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
>> @@ -507,6
>>> +515,10 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct
>> ib_wc *wc)
>>>  	struct nvmet_rdma_rsp *rsp =
>>>  		container_of(wc->wr_cqe, struct nvmet_rdma_rsp,
>> send_cqe);
>>>
>>> +	ib_dma_sync_single_for_cpu(rsp->queue->dev->device,
>>> +			rsp->send_sge.addr, rsp->send_sge.length,
>>> +			DMA_TO_DEVICE);
>>
>> Why do you need to sync_for_cpu here? you have no interest in the data at
>> this point.
>>
> Before a cqe can be prepared by cpu, it needs to be synced.
> So once CQE send is completed, that region is ready for preparing new CQE.
> In error case cqe is prepared by the RDMA layer and sent using nvmet_req_complete.
> In happy path case cqe is prepared by the core layer before invoking queue_response() callback of fabric_ops.
>
> In happy case nvmet-core needs to do the sync_to_cpu.
> In error case rdma layer needs to do the sync_to_cpu.
>
> Instead of messing code at both places, I did the sync_for_cpu in send_done() which is unified place.
> If there is generic callback in fabric_ops that can be invoked by __nvmet_req_complete(), than its cleaner to do at single place by invoking it.
> I didn't find it worth enough to increase fabric_ops for this.
> Let me know if you have different idea to resolve this.

Why not sync it when you start using the cmd at nvmet_rdma_recv_done()?
--
@@ -747,6 +747,10 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, 
struct ib_wc *wc)
         rsp->flags = 0;
         rsp->req.cmd = cmd->nvme_cmd;

+       ib_dma_sync_single_for_cpu(rsp->queue->dev->device,
+                       rsp->send_sge.addr, rsp->send_sge.length,
+                       DMA_TO_DEVICE);
+
         if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) {
                 unsigned long flags;
--

>
>>> nvmet_rdma_rsp *rsp)  static void nvmet_rdma_handle_command(struct
>> nvmet_rdma_queue *queue,
>>>  		struct nvmet_rdma_rsp *cmd)
>>>  {
>>> +	int i;
>>>  	u16 status;
>>>
>>>  	cmd->queue = queue;
>>>  	cmd->n_rdma = 0;
>>>  	cmd->req.port = queue->port;
>>>
>>> +	for (i = 0; i < 2; i++) {
>>> +		if (cmd->cmd->sge[i].length)
>>> +			ib_dma_sync_single_for_cpu(queue->dev->device,
>>> +				cmd->cmd->sge[i].addr, cmd->cmd-
>>> sge[i].length,
>>> +				DMA_FROM_DEVICE);
>>> +	}
>>
>> Again, we don't need the for statement.
>>
>> Also, I think we can optimize a bit by syncing the in-capsule page only if:
>> 1. it was posted for recv (sge has length) 2. its a write command 3. it has in-
>> capsule data.
>>
>> So, here lets sync the sqe (sge[0]) and sync the in-capsule page in
>> nvmet_rdma_map_sgl_inline().
>
> I agree Sagi that this can be differed to _inline(). I was headed to do this in generic code eventually similar to RW API.
> And thought why not have the clean code now so that migration later to new API would be easy.
> But if you think we should differ it to later stage, I am fine with that and continue with open coding.
>
> Now when I review the code of map_sgl_inline() I am wondering that we should not sync the INLINE data page at all because cpu is not going to read it anyway.
> Its only the remote device which will read/write and it will do the dma_sync_to_device as part of that driver anyway.

Yea, you're right. Let's kill it.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data structures
  2017-01-17  8:07             ` Sagi Grimberg
@ 2017-01-17 16:03                 ` Parav Pandit
  -1 siblings, 0 replies; 14+ messages in thread
From: Parav Pandit @ 2017-01-17 16:03 UTC (permalink / raw)
  To: Sagi Grimberg, hch-jcswGhMUV9g,
	linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	dledford-H+wXaHxf7aLQT0dZR+AlfA

Hi Sagi,

> -----Original Message-----
> From: Sagi Grimberg [mailto:sagi-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org]
> Sent: Tuesday, January 17, 2017 2:08 AM
> To: Parav Pandit <parav-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>; hch-jcswGhMUV9g@public.gmane.org; linux-
> nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org; linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org;
> dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org
> Subject: Re: [PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data
> structures
> 
> >>> +	for (i = 0; i < 2; i++) {
> >>> +		if (cmd->sge[i].length)
> >>> +			ib_dma_sync_single_for_device(ndev->device,
> >>> +				cmd->sge[0].addr, cmd->sge[0].length,
> >>> +				DMA_FROM_DEVICE);
> >>> +	}
> >>
> >> a. you test on sge[i] but sync sge[0].
> > Crap code. I will fix this.
> >
> >> b. I don't think we need the for statement, lest keep it open-coded
> >> for [0] and [1].
> >
> > I put for loop because, there was high level agreement in max_sge thread
> chain between Chuck, Steve and Jason about having generic sg_list/bounce
> buffer and doing things similar to RW APIs.
> > Now if we generalize at that point, my thoughts were, that this code can
> eventually move out from every ULP to that generic send() API.
> > So I put a for loop to make is more ULP agnostic from beginning.
> 
> Lets start simple and clear, if we indeed do this (and I should checkout this
> discussion) we'll move it out like all the rest of the ULPs.
> 
Yes. So I that's why I changed the code to use send_sge.length instead of sizeof(nvme_completion).

> >>>  	if (ndev->srq)
> >>>  		return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
> >> @@ -507,6
> >>> +515,10 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct
> >> ib_wc *wc)
> >>>  	struct nvmet_rdma_rsp *rsp =
> >>>  		container_of(wc->wr_cqe, struct nvmet_rdma_rsp,
> >> send_cqe);
> >>>
> >>> +	ib_dma_sync_single_for_cpu(rsp->queue->dev->device,
> >>> +			rsp->send_sge.addr, rsp->send_sge.length,
> >>> +			DMA_TO_DEVICE);
> >>
> >> Why do you need to sync_for_cpu here? you have no interest in the
> >> data at this point.
> >>
> > Before a cqe can be prepared by cpu, it needs to be synced.
> > So once CQE send is completed, that region is ready for preparing new
> CQE.
> > In error case cqe is prepared by the RDMA layer and sent using
> nvmet_req_complete.
> > In happy path case cqe is prepared by the core layer before invoking
> queue_response() callback of fabric_ops.
> >
> > In happy case nvmet-core needs to do the sync_to_cpu.
> > In error case rdma layer needs to do the sync_to_cpu.
> >
> > Instead of messing code at both places, I did the sync_for_cpu in
> send_done() which is unified place.
> > If there is generic callback in fabric_ops that can be invoked by
> __nvmet_req_complete(), than its cleaner to do at single place by invoking it.
> > I didn't find it worth enough to increase fabric_ops for this.
> > Let me know if you have different idea to resolve this.
> 
> Why not sync it when you start using the cmd at nvmet_rdma_recv_done()?
> --
Yes, that works too.
nvmet_rdma_handle_command() seems better place to me where sync_cpu() is done for nvme_command as well.
This makes is more readable too.
Having it in handle_command () also covers the case of receiving RQEs before QP moves to established state.

> > Now when I review the code of map_sgl_inline() I am wondering that we
> should not sync the INLINE data page at all because cpu is not going to read it
> anyway.
> > Its only the remote device which will read/write and it will do the
> dma_sync_to_device as part of that driver anyway.
> 
> Yea, you're right. Let's kill it.
Ok.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data structures
@ 2017-01-17 16:03                 ` Parav Pandit
  0 siblings, 0 replies; 14+ messages in thread
From: Parav Pandit @ 2017-01-17 16:03 UTC (permalink / raw)


Hi Sagi,

> -----Original Message-----
> From: Sagi Grimberg [mailto:sagi at grimberg.me]
> Sent: Tuesday, January 17, 2017 2:08 AM
> To: Parav Pandit <parav at mellanox.com>; hch at lst.de; linux-
> nvme at lists.infradead.org; linux-rdma at vger.kernel.org;
> dledford at redhat.com
> Subject: Re: [PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data
> structures
> 
> >>> +	for (i = 0; i < 2; i++) {
> >>> +		if (cmd->sge[i].length)
> >>> +			ib_dma_sync_single_for_device(ndev->device,
> >>> +				cmd->sge[0].addr, cmd->sge[0].length,
> >>> +				DMA_FROM_DEVICE);
> >>> +	}
> >>
> >> a. you test on sge[i] but sync sge[0].
> > Crap code. I will fix this.
> >
> >> b. I don't think we need the for statement, lest keep it open-coded
> >> for [0] and [1].
> >
> > I put for loop because, there was high level agreement in max_sge thread
> chain between Chuck, Steve and Jason about having generic sg_list/bounce
> buffer and doing things similar to RW APIs.
> > Now if we generalize at that point, my thoughts were, that this code can
> eventually move out from every ULP to that generic send() API.
> > So I put a for loop to make is more ULP agnostic from beginning.
> 
> Lets start simple and clear, if we indeed do this (and I should checkout this
> discussion) we'll move it out like all the rest of the ULPs.
> 
Yes. So I that's why I changed the code to use send_sge.length instead of sizeof(nvme_completion).

> >>>  	if (ndev->srq)
> >>>  		return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
> >> @@ -507,6
> >>> +515,10 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct
> >> ib_wc *wc)
> >>>  	struct nvmet_rdma_rsp *rsp =
> >>>  		container_of(wc->wr_cqe, struct nvmet_rdma_rsp,
> >> send_cqe);
> >>>
> >>> +	ib_dma_sync_single_for_cpu(rsp->queue->dev->device,
> >>> +			rsp->send_sge.addr, rsp->send_sge.length,
> >>> +			DMA_TO_DEVICE);
> >>
> >> Why do you need to sync_for_cpu here? you have no interest in the
> >> data at this point.
> >>
> > Before a cqe can be prepared by cpu, it needs to be synced.
> > So once CQE send is completed, that region is ready for preparing new
> CQE.
> > In error case cqe is prepared by the RDMA layer and sent using
> nvmet_req_complete.
> > In happy path case cqe is prepared by the core layer before invoking
> queue_response() callback of fabric_ops.
> >
> > In happy case nvmet-core needs to do the sync_to_cpu.
> > In error case rdma layer needs to do the sync_to_cpu.
> >
> > Instead of messing code at both places, I did the sync_for_cpu in
> send_done() which is unified place.
> > If there is generic callback in fabric_ops that can be invoked by
> __nvmet_req_complete(), than its cleaner to do at single place by invoking it.
> > I didn't find it worth enough to increase fabric_ops for this.
> > Let me know if you have different idea to resolve this.
> 
> Why not sync it when you start using the cmd at nvmet_rdma_recv_done()?
> --
Yes, that works too.
nvmet_rdma_handle_command() seems better place to me where sync_cpu() is done for nvme_command as well.
This makes is more readable too.
Having it in handle_command () also covers the case of receiving RQEs before QP moves to established state.

> > Now when I review the code of map_sgl_inline() I am wondering that we
> should not sync the INLINE data page at all because cpu is not going to read it
> anyway.
> > Its only the remote device which will read/write and it will do the
> dma_sync_to_device as part of that driver anyway.
> 
> Yea, you're right. Let's kill it.
Ok.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-01-17 16:03 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-16 20:19 [PATCHv1] nvmet-rdma: Fix missing dma sync to nvme data structures Parav Pandit
2017-01-16 20:19 ` Parav Pandit
     [not found] ` <1484597945-31143-1-git-send-email-parav-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2017-01-16 20:31   ` Yuval Shaia
2017-01-16 20:31     ` Yuval Shaia
2017-01-16 20:51     ` Parav Pandit
2017-01-16 20:51       ` Parav Pandit
2017-01-16 21:12   ` Sagi Grimberg
2017-01-16 21:12     ` Sagi Grimberg
     [not found]     ` <ff2d3e9b-323c-aae5-f2dc-b18da5e0dc09-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org>
2017-01-16 23:17       ` Parav Pandit
2017-01-16 23:17         ` Parav Pandit
     [not found]         ` <VI1PR0502MB30083D014DF2D4C0BEEA9F17D17D0-o1MPJYiShExKsLr+rGaxW8DSnupUy6xnnBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2017-01-17  8:07           ` Sagi Grimberg
2017-01-17  8:07             ` Sagi Grimberg
     [not found]             ` <77902aab-5d1c-5755-ca46-679d737f496d-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org>
2017-01-17 16:03               ` Parav Pandit
2017-01-17 16:03                 ` Parav Pandit

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.