From: David Howells <dhowells@redhat.com>
To: Steve French <smfrench@gmail.com>, Al Viro <viro@zeniv.linux.org.uk>
Cc: Steve French <sfrench@samba.org>,
Shyam Prasad N <nspmangalore@gmail.com>,
Rohith Surabattula <rohiths.msft@gmail.com>,
Jeff Layton <jlayton@kernel.org>,
linux-cifs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
linux-rdma@vger.kernel.org, dhowells@redhat.com,
Shyam Prasad N <nspmangalore@gmail.com>,
Rohith Surabattula <rohiths.msft@gmail.com>,
Tom Talpey <tom@talpey.com>,
Christoph Hellwig <hch@infradead.org>,
Matthew Wilcox <willy@infradead.org>,
Jeff Layton <jlayton@kernel.org>,
linux-cifs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
linux-kernel@vger.kernel.org
Subject: [RFC PATCH 06/12] cifs: Add a function to build an RDMA SGE list from an iterator
Date: Tue, 01 Nov 2022 16:31:28 +0000 [thread overview]
Message-ID: <166732028840.3186319.8512284239779728860.stgit@warthog.procyon.org.uk> (raw)
In-Reply-To: <166732024173.3186319.18204305072070871546.stgit@warthog.procyon.org.uk>
Add a function to add elements onto an RDMA SGE list representing page
fragments extracted from a BVEC-, KVEC- or XARRAY-type iterator and DMA
mapped until the maximum number of elements is reached.
Nothing is done to make sure the pages remain present - that must be done
by the caller.
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Steve French <sfrench@samba.org>
cc: Shyam Prasad N <nspmangalore@gmail.com>
cc: Rohith Surabattula <rohiths.msft@gmail.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: linux-cifs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-rdma@vger.kernel.org
---
fs/cifs/smbdirect.c | 224 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 224 insertions(+)
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index 90789aaa6567..18fb440a02c3 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -44,6 +44,17 @@ static int smbd_post_send_page(struct smbd_connection *info,
static void destroy_mr_list(struct smbd_connection *info);
static int allocate_mr_list(struct smbd_connection *info);
+struct smb_extract_to_rdma {
+ struct ib_sge *sge;
+ unsigned int nr_sge;
+ unsigned int max_sge;
+ struct ib_device *device;
+ u32 local_dma_lkey;
+ enum dma_data_direction direction;
+};
+static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len,
+ struct smb_extract_to_rdma *rdma);
+
/* SMBD version number */
#define SMBD_V1 0x0100
@@ -2489,3 +2500,216 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr)
return rc;
}
+
+static bool smb_set_sge(struct smb_extract_to_rdma *rdma,
+ struct page *lowest_page, size_t off, size_t len)
+{
+ struct ib_sge *sge = &rdma->sge[rdma->nr_sge];
+ u64 addr;
+
+ addr = ib_dma_map_page(rdma->device, lowest_page,
+ off, len, rdma->direction);
+ if (ib_dma_mapping_error(rdma->device, addr))
+ return false;
+
+ sge->addr = addr;
+ sge->length = len;
+ sge->lkey = rdma->local_dma_lkey;
+ rdma->nr_sge++;
+ return true;
+}
+
+/*
+ * Extract page fragments from a BVEC-class iterator and add them to an RDMA
+ * element list. The pages are not pinned.
+ */
+static ssize_t smb_extract_bvec_to_rdma(struct iov_iter *iter,
+ struct smb_extract_to_rdma *rdma,
+ ssize_t maxsize)
+{
+ const struct bio_vec *bv = iter->bvec;
+ unsigned long start = iter->iov_offset;
+ unsigned int i, sge_max = rdma->max_sge;
+ ssize_t ret = 0;
+
+ for (i = 0; i < iter->nr_segs; i++) {
+ size_t off, len;
+
+ len = bv[i].bv_len;
+ if (start >= len) {
+ start -= len;
+ continue;
+ }
+
+ len = min_t(size_t, maxsize, len - start);
+ off = bv[i].bv_offset + start;
+
+ if (!smb_set_sge(rdma, bv[i].bv_page, off, len))
+ return -EIO;
+ sge_max--;
+
+ ret += len;
+ maxsize -= len;
+ if (maxsize <= 0 || sge_max == 0)
+ break;
+ start = 0;
+ }
+
+ return ret;
+}
+
+/*
+ * Extract fragments from a KVEC-class iterator and add them to an RDMA list.
+ * This can deal with vmalloc'd buffers as well as kmalloc'd or static buffers.
+ * The pages are not pinned.
+ */
+static ssize_t smb_extract_kvec_to_rdma(struct iov_iter *iter,
+ struct smb_extract_to_rdma *rdma,
+ ssize_t maxsize)
+{
+ const struct kvec *kv = iter->kvec;
+ unsigned long start = iter->iov_offset;
+ unsigned int i, sge_max = rdma->max_sge;
+ ssize_t ret = 0;
+
+ for (i = 0; i < iter->nr_segs; i++) {
+ struct page *page;
+ unsigned long kaddr;
+ size_t off, len, seg;
+
+ len = kv[i].iov_len;
+ if (start >= len) {
+ start -= len;
+ continue;
+ }
+
+ kaddr = (unsigned long)kv[i].iov_base + start;
+ off = kaddr & ~PAGE_MASK;
+ len = min_t(size_t, maxsize, len - start);
+ kaddr &= PAGE_MASK;
+
+ maxsize -= len;
+ ret += len;
+ do {
+ seg = min_t(size_t, len, PAGE_SIZE - off);
+
+ if (is_vmalloc_or_module_addr((void *)kaddr))
+ page = vmalloc_to_page((void *)kaddr);
+ else
+ page = virt_to_page(kaddr);
+
+ if (!smb_set_sge(rdma, page, off, len))
+ return -EIO;
+ sge_max--;
+
+ len -= seg;
+ kaddr += PAGE_SIZE;
+ off = 0;
+ } while (len > 0 && sge_max > 0);
+
+ if (maxsize <= 0 || sge_max == 0)
+ break;
+ start = 0;
+ }
+
+ return ret;
+}
+
+/*
+ * Extract folio fragments from an XARRAY-class iterator and add them to an
+ * RDMA list. The folios are not pinned.
+ */
+static ssize_t smb_extract_xarray_to_rdma(struct iov_iter *iter,
+ struct smb_extract_to_rdma *rdma,
+ ssize_t maxsize)
+{
+ struct xarray *xa = iter->xarray;
+ struct folio *folio;
+ unsigned int sge_max = rdma->max_sge;
+ loff_t start = iter->xarray_start + iter->iov_offset;
+ pgoff_t index = start / PAGE_SIZE;
+ ssize_t ret;
+ size_t off, len;
+ XA_STATE(xas, xa, index);
+
+ rcu_read_lock();
+
+ xas_for_each(&xas, folio, ULONG_MAX) {
+ if (xas_retry(&xas, folio))
+ continue;
+ if (WARN_ON(xa_is_value(folio)))
+ break;
+ if (WARN_ON(folio_test_hugetlb(folio)))
+ break;
+
+ off = offset_in_folio(folio, start);
+ len = min_t(size_t, maxsize, folio_size(folio) - off);
+
+ if (!smb_set_sge(rdma, folio_page(folio, 0), off, len)) {
+ rcu_read_lock();
+ return -EIO;
+ }
+ sge_max--;
+
+ maxsize -= len;
+ ret += len;
+ if (maxsize <= 0 || sge_max == 0)
+ break;
+ }
+
+ rcu_read_unlock();
+ return ret;
+}
+
+/*
+ * Extract page fragments from up to the given amount of the source iterator
+ * and build up an RDMA list that refers to all of those bits. The RDMA list
+ * is appended to, up to the maximum number of elements set in the parameter
+ * block.
+ *
+ * The extracted page fragments are not pinned or ref'd in any way; if an
+ * IOVEC/UBUF-type iterator is to be used, it should be converted to a
+ * BVEC-type iterator and the pages pinned, ref'd or otherwise held in some
+ * way.
+ */
+static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len,
+ struct smb_extract_to_rdma *rdma)
+{
+ ssize_t ret;
+ int before = rdma->nr_sge;
+
+ if (iov_iter_is_discard(iter) ||
+ iov_iter_is_pipe(iter) ||
+ user_backed_iter(iter)) {
+ WARN_ON_ONCE(1);
+ return -EIO;
+ }
+
+ switch (iov_iter_type(iter)) {
+ case ITER_BVEC:
+ ret = smb_extract_bvec_to_rdma(iter, rdma, len);
+ break;
+ case ITER_KVEC:
+ ret = smb_extract_kvec_to_rdma(iter, rdma, len);
+ break;
+ case ITER_XARRAY:
+ ret = smb_extract_xarray_to_rdma(iter, rdma, len);
+ break;
+ default:
+ BUG();
+ }
+
+ if (ret > 0) {
+ iov_iter_advance(iter, ret);
+ } else if (ret < 0) {
+ while (rdma->nr_sge > before) {
+ struct ib_sge *sge = &rdma->sge[rdma->nr_sge--];
+
+ ib_dma_unmap_single(rdma->device, sge->addr, sge->length,
+ rdma->direction);
+ sge->addr = 0;
+ }
+ }
+
+ return ret;
+}
next prev parent reply other threads:[~2022-11-01 16:33 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-01 16:30 [RFC PATCH 00/12] smb3: Add iter helpers and use iov_iters down to the network transport David Howells
2022-11-01 16:30 ` [RFC PATCH 01/12] mm: Move FOLL_* defs to mm_types.h David Howells
2022-11-01 16:30 ` [RFC PATCH 02/12] iov_iter: Add a function to extract a page list from an iterator David Howells
2022-11-01 16:31 ` [RFC PATCH 03/12] netfs: Add a function to extract a UBUF or IOVEC into a BVEC iterator David Howells
2022-11-01 16:31 ` [RFC PATCH 04/12] netfs: Add a function to extract an iterator into a scatterlist David Howells
2022-11-01 16:31 ` [RFC PATCH 05/12] cifs: Implement splice_read to pass down ITER_BVEC not ITER_PIPE David Howells
2022-11-01 16:31 ` David Howells [this message]
2022-11-01 16:31 ` [RFC PATCH 07/12] cifs: Add a function to Hash the contents of an iterator David Howells
2022-11-01 16:31 ` [RFC PATCH 08/12] cifs: Add some helper functions David Howells
2022-11-01 16:31 ` [RFC PATCH 09/12] cifs: Add a function to read into an iter from a socket David Howells
2022-11-01 16:31 ` [RFC PATCH 10/12] cifs: Change the I/O paths to use an iterator rather than a page list David Howells
2022-11-01 16:32 ` [RFC PATCH 11/12] cifs: Build the RDMA SGE list directly from an iterator David Howells
2022-11-01 16:32 ` [RFC PATCH 12/12] cifs: Remove unused code David Howells
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=166732028840.3186319.8512284239779728860.stgit@warthog.procyon.org.uk \
--to=dhowells@redhat.com \
--cc=hch@infradead.org \
--cc=jlayton@kernel.org \
--cc=linux-cifs@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=nspmangalore@gmail.com \
--cc=rohiths.msft@gmail.com \
--cc=sfrench@samba.org \
--cc=smfrench@gmail.com \
--cc=tom@talpey.com \
--cc=viro@zeniv.linux.org.uk \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).