linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Tom Talpey <tom@talpey.com>
To: longli@microsoft.com, Steve French <sfrench@samba.org>,
	linux-cifs@vger.kernel.org, samba-technical@lists.samba.org,
	linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org
Subject: Re: [Patch v2 10/15] CIFS: SMBD: Support page offset in memory registration
Date: Sat, 23 Jun 2018 22:24:28 -0400	[thread overview]
Message-ID: <fb6189b0-2fe6-fc98-8b3b-d8efaad9cef1@talpey.com> (raw)
In-Reply-To: <20180530194807.31657-11-longli@linuxonhyperv.com>

On 5/30/2018 3:48 PM, Long Li wrote:
> From: Long Li <longli@microsoft.com>
> 
> Change code to pass the correct page offset during memory registration for
> RDMA read/write.
> 
> Signed-off-by: Long Li <longli@microsoft.com>
> ---
>   fs/cifs/smb2pdu.c   | 18 ++++++++-----
>   fs/cifs/smbdirect.c | 76 +++++++++++++++++++++++++++++++----------------------
>   fs/cifs/smbdirect.h |  2 +-
>   3 files changed, 58 insertions(+), 38 deletions(-)
> 
> diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
> index f603fbe..fc30774 100644
> --- a/fs/cifs/smb2pdu.c
> +++ b/fs/cifs/smb2pdu.c
> @@ -2623,8 +2623,8 @@ smb2_new_read_req(void **buf, unsigned int *total_len,
>   
>   		rdata->mr = smbd_register_mr(
>   				server->smbd_conn, rdata->pages,
> -				rdata->nr_pages, rdata->tailsz,
> -				true, need_invalidate);
> +				rdata->nr_pages, rdata->page_offset,
> +				rdata->tailsz, true, need_invalidate);
>   		if (!rdata->mr)
>   			return -ENOBUFS;
>   
> @@ -3013,16 +3013,22 @@ smb2_async_writev(struct cifs_writedata *wdata,
>   
>   		wdata->mr = smbd_register_mr(
>   				server->smbd_conn, wdata->pages,
> -				wdata->nr_pages, wdata->tailsz,
> -				false, need_invalidate);
> +				wdata->nr_pages, wdata->page_offset,
> +				wdata->tailsz, false, need_invalidate);
>   		if (!wdata->mr) {
>   			rc = -ENOBUFS;
>   			goto async_writev_out;
>   		}
>   		req->Length = 0;
>   		req->DataOffset = 0;
> -		req->RemainingBytes =
> -			cpu_to_le32((wdata->nr_pages-1)*PAGE_SIZE + wdata->tailsz);
> +		if (wdata->nr_pages > 1)
> +			req->RemainingBytes =
> +				cpu_to_le32(
> +					(wdata->nr_pages - 1) * wdata->pagesz -
> +					wdata->page_offset + wdata->tailsz
> +				);
> +		else
> +			req->RemainingBytes = cpu_to_le32(wdata->tailsz);

Again, I think a helper that computed and returned this size would be
much clearer and compact. And I still am incredulous that a single page
io always has an offset of zero. :-)

>   		req->Channel = SMB2_CHANNEL_RDMA_V1_INVALIDATE;
>   		if (need_invalidate)
>   			req->Channel = SMB2_CHANNEL_RDMA_V1;
> diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
> index ba53c52..e459c97 100644
> --- a/fs/cifs/smbdirect.c
> +++ b/fs/cifs/smbdirect.c
> @@ -2299,37 +2299,37 @@ static void smbd_mr_recovery_work(struct work_struct *work)
>   		if (smbdirect_mr->state == MR_INVALIDATED ||
>   			smbdirect_mr->state == MR_ERROR) {
>   
> -			if (smbdirect_mr->state == MR_INVALIDATED) {
> +			/* recover this MR entry */
> +			rc = ib_dereg_mr(smbdirect_mr->mr);
> +			if (rc) {
> +				log_rdma_mr(ERR,
> +					"ib_dereg_mr failed rc=%x\n",
> +					rc);
> +				smbd_disconnect_rdma_connection(info);
> +				continue;
> +			}

Ok, we discussed this ib_dereg_mr() call at the plugfest last week.
It's unnecessary - the MR is reusable and does not need to be destroyed
after each use.

> +
> +			smbdirect_mr->mr = ib_alloc_mr(
> +				info->pd, info->mr_type,
> +				info->max_frmr_depth);
> +			if (IS_ERR(smbdirect_mr->mr)) {
> +				log_rdma_mr(ERR,
> +					"ib_alloc_mr failed mr_type=%x "
> +					"max_frmr_depth=%x\n",
> +					info->mr_type,
> +					info->max_frmr_depth);
> +				smbd_disconnect_rdma_connection(info);
> +				continue;
> +			}
> +

Not needed, for the same reason above.

> +			if (smbdirect_mr->state == MR_INVALIDATED)
>   				ib_dma_unmap_sg(
>   					info->id->device, smbdirect_mr->sgl,
>   					smbdirect_mr->sgl_count,
>   					smbdirect_mr->dir);
> -				smbdirect_mr->state = MR_READY;

As we observed, the smbdirect_mr is not protected by a lock, therefore
this MR_READY state transition needs a memory barrier in front of it!

> -			} else if (smbdirect_mr->state == MR_ERROR) {
> -
> -				/* recover this MR entry */
> -				rc = ib_dereg_mr(smbdirect_mr->mr);
> -				if (rc) {
> -					log_rdma_mr(ERR,
> -						"ib_dereg_mr failed rc=%x\n",
> -						rc);
> -					smbd_disconnect_rdma_connection(info);
> -				}
Why are you deleting the MR_ERROR handling? It seems this is precisely
the place where the MR needs to be destroyed, to prevent any later RDMA
operations from potentially reaching the original memory.

>   
> -				smbdirect_mr->mr = ib_alloc_mr(
> -					info->pd, info->mr_type,
> -					info->max_frmr_depth);
> -				if (IS_ERR(smbdirect_mr->mr)) {
> -					log_rdma_mr(ERR,
> -						"ib_alloc_mr failed mr_type=%x "
> -						"max_frmr_depth=%x\n",
> -						info->mr_type,
> -						info->max_frmr_depth);
> -					smbd_disconnect_rdma_connection(info);
> -				}
> +			smbdirect_mr->state = MR_READY;
>   
> -				smbdirect_mr->state = MR_READY;
> -			}
>   			/* smbdirect_mr->state is updated by this function
>   			 * and is read and updated by I/O issuing CPUs trying
>   			 * to get a MR, the call to atomic_inc_return
> @@ -2475,7 +2475,7 @@ static struct smbd_mr *get_mr(struct smbd_connection *info)
>    */
>   struct smbd_mr *smbd_register_mr(
>   	struct smbd_connection *info, struct page *pages[], int num_pages,
> -	int tailsz, bool writing, bool need_invalidate)
> +	int offset, int tailsz, bool writing, bool need_invalidate)
>   {
>   	struct smbd_mr *smbdirect_mr;
>   	int rc, i;
> @@ -2498,17 +2498,31 @@ struct smbd_mr *smbd_register_mr(
>   	smbdirect_mr->sgl_count = num_pages;
>   	sg_init_table(smbdirect_mr->sgl, num_pages);
>   
> -	for (i = 0; i < num_pages - 1; i++)
> -		sg_set_page(&smbdirect_mr->sgl[i], pages[i], PAGE_SIZE, 0);
> +	log_rdma_mr(INFO, "num_pages=0x%x offset=0x%x tailsz=0x%x\n",
> +			num_pages, offset, tailsz);
>   
> +	if (num_pages == 1) {
> +		sg_set_page(&smbdirect_mr->sgl[0], pages[0], tailsz, offset);
> +		goto skip_multiple_pages;

A simple "else" would be much preferable to this "goto".

> +	}
> +
> +	/* We have at least two pages to register */
> +	sg_set_page(
> +		&smbdirect_mr->sgl[0], pages[0], PAGE_SIZE - offset, offset);
> +	i = 1;
> +	while (i < num_pages - 1) {
> +		sg_set_page(&smbdirect_mr->sgl[i], pages[i], PAGE_SIZE, 0);
> +		i++;
> +	}
>   	sg_set_page(&smbdirect_mr->sgl[i], pages[i],
>   		tailsz ? tailsz : PAGE_SIZE, 0);
>   
> +skip_multiple_pages:
>   	dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
>   	smbdirect_mr->dir = dir;
>   	rc = ib_dma_map_sg(info->id->device, smbdirect_mr->sgl, num_pages, dir);
>   	if (!rc) {
> -		log_rdma_mr(INFO, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n",
> +		log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n",
>   			num_pages, dir, rc);
>   		goto dma_map_error;
>   	}
> @@ -2516,8 +2530,8 @@ struct smbd_mr *smbd_register_mr(
>   	rc = ib_map_mr_sg(smbdirect_mr->mr, smbdirect_mr->sgl, num_pages,
>   		NULL, PAGE_SIZE);
>   	if (rc != num_pages) {
> -		log_rdma_mr(INFO,
> -			"ib_map_mr_sg failed rc = %x num_pages = %x\n",
> +		log_rdma_mr(ERR,
> +			"ib_map_mr_sg failed rc = %d num_pages = %x\n",
>   			rc, num_pages);
>   		goto map_mr_error;
>   	}
> diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h
> index f9038da..1e419c2 100644
> --- a/fs/cifs/smbdirect.h
> +++ b/fs/cifs/smbdirect.h
> @@ -321,7 +321,7 @@ struct smbd_mr {
>   /* Interfaces to register and deregister MR for RDMA read/write */
>   struct smbd_mr *smbd_register_mr(
>   	struct smbd_connection *info, struct page *pages[], int num_pages,
> -	int tailsz, bool writing, bool need_invalidate);
> +	int offset, int tailsz, bool writing, bool need_invalidate);
>   int smbd_deregister_mr(struct smbd_mr *mr);
>   
>   #else
> 

  reply	other threads:[~2018-06-24  2:24 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-30 19:47 [Patch v2 00/15] CIFS: Add direct I/O support Long Li
2018-05-30 19:47 ` [Patch v2 01/15] CIFS: Introduce offset for the 1st page in data transfer structures Long Li
2018-05-30 19:47 ` [Patch v2 02/15] CIFS: Add support for direct pages in rdata Long Li
2018-05-30 20:27   ` Ruhl, Michael J
2018-05-30 20:57     ` Long Li
2018-06-24  1:50   ` Tom Talpey
2018-06-25 20:25     ` Long Li
2018-06-25 21:01     ` Jason Gunthorpe
2018-06-26 15:13       ` Tom Talpey
2018-06-27  3:21         ` Long Li
2018-05-30 19:47 ` [Patch v2 03/15] CIFS: Use offset when reading pages Long Li
2018-06-24  1:58   ` Tom Talpey
2018-06-25 20:27     ` Long Li
2018-05-30 19:47 ` [Patch v2 04/15] CIFS: Add support for direct pages in wdata Long Li
2018-06-24  2:01   ` Tom Talpey
2018-06-25 20:34     ` Long Li
2018-05-30 19:47 ` [Patch v2 05/15] CIFS: Calculate the correct request length based on page offset and tail size Long Li
2018-06-24  2:07   ` Tom Talpey
2018-06-25 21:07     ` Long Li
2018-05-30 19:47 ` [Patch v2 06/15] CIFS: Introduce helper function to get page offset and length in smb_rqst Long Li
2018-06-24  2:09   ` Tom Talpey
2018-06-25 21:14     ` Long Li
2018-06-26 13:16       ` Tom Talpey
2018-06-27  3:24         ` Long Li
2018-05-30 19:47 ` [Patch v2 07/15] CIFS: When sending data on socket, pass the correct page offset Long Li
2018-05-30 19:48 ` [Patch v2 08/15] CIFS: SMBD: Support page offset in RDMA send Long Li
2018-06-24  2:11   ` Tom Talpey
2018-06-25 21:23     ` Long Li
2018-05-30 19:48 ` [Patch v2 09/15] CIFS: SMBD: Support page offset in RDMA recv Long Li
2018-06-24  2:16   ` Tom Talpey
2018-06-25 21:29     ` Long Li
2018-05-30 19:48 ` [Patch v2 10/15] CIFS: SMBD: Support page offset in memory registration Long Li
2018-06-24  2:24   ` Tom Talpey [this message]
2018-05-30 19:48 ` [Patch v2 11/15] CIFS: Pass page offset for calculating signature Long Li
2018-06-24  2:27   ` Tom Talpey
2018-06-26  4:15     ` Long Li
2018-05-30 19:48 ` [Patch v2 12/15] CIFS: Pass page offset for encrypting Long Li
2018-06-24  2:28   ` Tom Talpey
2018-05-30 19:48 ` [Patch v2 13/15] CIFS: Add support for direct I/O read Long Li
2018-06-02  5:51   ` kbuild test robot
2018-06-02  7:15   ` kbuild test robot
2018-06-24  2:39   ` Tom Talpey
2018-06-26  4:34     ` Long Li
2018-05-30 19:48 ` [Patch v2 14/15] CIFS: Add support for direct I/O write Long Li
2018-06-24  2:48   ` Tom Talpey
2018-06-26  4:39     ` Long Li
2018-06-26 13:29       ` Tom Talpey
2018-06-27  3:44         ` Long Li
2018-05-30 19:48 ` [Patch v2 15/15] CIFS: Add direct I/O functions to file_operations Long Li
2018-06-07 11:17   ` Pavel Shilovsky

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=fb6189b0-2fe6-fc98-8b3b-d8efaad9cef1@talpey.com \
    --to=tom@talpey.com \
    --cc=linux-cifs@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=longli@microsoft.com \
    --cc=samba-technical@lists.samba.org \
    --cc=sfrench@samba.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).