linux-cifs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: linux-fsdevel@vger.kernel.org
Cc: dhowells@redhat.com, Jeff Layton <jlayton@redhat.com>,
	"Matthew Wilcox (Oracle)" <willy@infradead.org>,
	Anna Schumaker <anna.schumaker@netapp.com>,
	Steve French <sfrench@samba.org>,
	Dominique Martinet <asmadeus@codewreck.org>,
	Mike Marshall <hubcap@omnibond.com>,
	David Wysochanski <dwysocha@redhat.com>,
	Shyam Prasad N <nspmangalore@gmail.com>,
	Miklos Szeredi <miklos@szeredi.hu>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	linux-cachefs@redhat.com, linux-afs@lists.infradead.org,
	linux-nfs@vger.kernel.org, linux-cifs@vger.kernel.org,
	ceph-devel@vger.kernel.org, v9fs-developer@lists.sourceforge.net,
	devel@lists.orangefs.org, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org
Subject: [RFC PATCH 04/12] netfs: Use a buffer in netfs_read_request and add pages to it
Date: Wed, 21 Jul 2021 14:45:24 +0100	[thread overview]
Message-ID: <162687512469.276387.15723958695928327041.stgit@warthog.procyon.org.uk> (raw)
In-Reply-To: <162687506932.276387.14456718890524355509.stgit@warthog.procyon.org.uk>

Add an "output" buffer to the netfs_read_request struct.  This is an xarray
to which the intended destination pages can be added, supplemented by
additional pages to make the buffer up to a sufficient size to be the
output for an overlarge read, decryption and/or decompression.

The readahead_expand() function will only expand the requested pageset up
to a point where it runs into an already extant page at either end - which
means that the resulting buffer might not be large enough or may be
misaligned for our purposes.

With this, we can make sure we have a useful buffer and we can splice the
extra pages from it into the pagecache if there are holes we can plug.

The read buffer could also be useful in the future to perform RMW cycles
when fixing up after disconnected operation or direct I/O with
smaller-than-preferred granularity.

Signed-off-by: David Howells <dhowells@redhat.com>
---

 fs/netfs/read_helper.c |  166 ++++++++++++++++++++++++++++++++++++++++++++----
 include/linux/netfs.h  |    1 
 2 files changed, 154 insertions(+), 13 deletions(-)

diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c
index 5e1a9be48130..b03bc5b0da5a 100644
--- a/fs/netfs/read_helper.c
+++ b/fs/netfs/read_helper.c
@@ -28,6 +28,7 @@ module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask");
 
 static void netfs_rreq_work(struct work_struct *);
+static void netfs_rreq_clear_buffer(struct netfs_read_request *);
 static void __netfs_put_subrequest(struct netfs_read_subrequest *, bool);
 
 static void netfs_put_subrequest(struct netfs_read_subrequest *subreq,
@@ -51,6 +52,7 @@ static struct netfs_read_request *netfs_alloc_read_request(
 		rreq->inode	= file_inode(file);
 		rreq->i_size	= i_size_read(rreq->inode);
 		rreq->debug_id	= atomic_inc_return(&debug_ids);
+		xa_init(&rreq->buffer);
 		INIT_LIST_HEAD(&rreq->subrequests);
 		INIT_WORK(&rreq->work, netfs_rreq_work);
 		refcount_set(&rreq->usage, 1);
@@ -90,6 +92,7 @@ static void netfs_free_read_request(struct work_struct *work)
 	trace_netfs_rreq(rreq, netfs_rreq_trace_free);
 	if (rreq->cache_resources.ops)
 		rreq->cache_resources.ops->end_operation(&rreq->cache_resources);
+	netfs_rreq_clear_buffer(rreq);
 	kfree(rreq);
 	netfs_stat_d(&netfs_n_rh_rreq);
 }
@@ -727,7 +730,7 @@ netfs_rreq_prepare_read(struct netfs_read_request *rreq,
 	if (WARN_ON(subreq->len == 0))
 		source = NETFS_INVALID_READ;
 
-	iov_iter_xarray(&subreq->iter, READ, &rreq->mapping->i_pages,
+	iov_iter_xarray(&subreq->iter, READ, &rreq->buffer,
 			subreq->start, subreq->len);
 
 out:
@@ -838,6 +841,133 @@ static void netfs_rreq_expand(struct netfs_read_request *rreq,
 	}
 }
 
+/*
+ * Clear a read buffer, discarding the pages which have XA_MARK_0 set.
+ */
+static void netfs_rreq_clear_buffer(struct netfs_read_request *rreq)
+{
+	struct page *page;
+	XA_STATE(xas, &rreq->buffer, 0);
+
+	rcu_read_lock();
+	xas_for_each_marked(&xas, page, ULONG_MAX, XA_MARK_0) {
+		put_page(page);
+	}
+	rcu_read_unlock();
+	xa_destroy(&rreq->buffer);
+}
+
+static int xa_insert_set_mark(struct xarray *xa, unsigned long index,
+			      void *entry, xa_mark_t mark, gfp_t gfp_mask)
+{
+	int ret;
+
+	xa_lock(xa);
+	ret = __xa_insert(xa, index, entry, gfp_mask);
+	if (ret == 0)
+		__xa_set_mark(xa, index, mark);
+	xa_unlock(xa);
+	return ret;
+}
+
+/*
+ * Create the specified range of pages in the buffer attached to the read
+ * request.  The pages are marked with XA_MARK_0 so that we know that these
+ * need freeing later.
+ */
+static int netfs_rreq_add_pages_to_buffer(struct netfs_read_request *rreq,
+					  pgoff_t index, pgoff_t to, gfp_t gfp_mask)
+{
+	struct page *page;
+	int ret;
+
+	if (to + 1 == index) /* Page range is inclusive */
+		return 0;
+
+	do {
+		page = __page_cache_alloc(gfp_mask);
+		if (!page)
+			return -ENOMEM;
+		page->index = index;
+		ret = xa_insert_set_mark(&rreq->buffer, index, page, XA_MARK_0,
+					 gfp_mask);
+		if (ret < 0) {
+			__free_page(page);
+			return ret;
+		}
+
+		index += thp_nr_pages(page);
+	} while (index < to);
+
+	return 0;
+}
+
+/*
+ * Set up a buffer into which to data will be read or decrypted/decompressed.
+ * The pages to be read into are attached to this buffer and the gaps filled in
+ * to form a continuous region.
+ */
+static int netfs_rreq_set_up_buffer(struct netfs_read_request *rreq,
+				    struct readahead_control *ractl,
+				    struct page *keep,
+				    pgoff_t have_index, unsigned int have_pages)
+{
+	struct page *page;
+	gfp_t gfp_mask = readahead_gfp_mask(rreq->mapping);
+	unsigned int want_pages = have_pages;
+	pgoff_t want_index = have_index;
+	int ret;
+
+#if 0
+	want_index = round_down(want_index, 256 * 1024 / PAGE_SIZE);
+	want_pages += have_index - want_index;
+	want_pages = round_up(want_pages, 256 * 1024 / PAGE_SIZE);
+
+	kdebug("setup %lx-%lx -> %lx-%lx",
+	       have_index, have_index + have_pages - 1,
+	       want_index, want_index + want_pages - 1);
+#endif
+
+	ret = netfs_rreq_add_pages_to_buffer(rreq, want_index, have_index - 1,
+					     gfp_mask);
+	if (ret < 0)
+		return ret;
+	have_pages += have_index - want_index;
+
+	ret = netfs_rreq_add_pages_to_buffer(rreq, have_index + have_pages,
+					     want_index + want_pages - 1,
+					     gfp_mask);
+	if (ret < 0)
+		return ret;
+
+	/* Transfer the pages proposed by the VM into the buffer along with
+	 * their page refs.  The locks will be dropped in netfs_rreq_unlock().
+	 */
+	if (ractl) {
+		while ((page = readahead_page(ractl))) {
+			if (page == keep)
+				get_page(page);
+			ret = xa_insert_set_mark(&rreq->buffer, page->index, page,
+						 XA_MARK_0, gfp_mask);
+			if (ret < 0) {
+				if (page != keep)
+					unlock_page(page);
+				put_page(page);
+				return ret;
+			}
+		}
+	} else {
+		get_page(keep);
+		ret = xa_insert_set_mark(&rreq->buffer, keep->index, keep,
+					 XA_MARK_0, gfp_mask);
+		if (ret < 0) {
+			put_page(keep);
+			return ret;
+		}
+	}
+	return 0;
+}
+
 /**
  * netfs_readahead - Helper to manage a read request
  * @ractl: The description of the readahead request
@@ -861,7 +991,6 @@ void netfs_readahead(struct readahead_control *ractl,
 		     void *netfs_priv)
 {
 	struct netfs_read_request *rreq;
-	struct page *page;
 	unsigned int debug_index = 0;
 	int ret;
 
@@ -889,6 +1018,12 @@ void netfs_readahead(struct readahead_control *ractl,
 
 	netfs_rreq_expand(rreq, ractl);
 
+	/* Set up the output buffer */
+	ret = netfs_rreq_set_up_buffer(rreq, ractl, NULL,
+				       readahead_index(ractl), readahead_count(ractl));
+	if (ret < 0)
+		goto cleanup_free;
+
 	atomic_set(&rreq->nr_rd_ops, 1);
 	do {
 		if (!netfs_rreq_submit_slice(rreq, &debug_index))
@@ -896,12 +1031,6 @@ void netfs_readahead(struct readahead_control *ractl,
 
 	} while (rreq->submitted < rreq->len);
 
-	/* Drop the refs on the pages here rather than in the cache or
-	 * filesystem.  The locks will be dropped in netfs_rreq_unlock().
-	 */
-	while ((page = readahead_page(ractl)))
-		put_page(page);
-
 	/* If we decrement nr_rd_ops to 0, the ref belongs to us. */
 	if (atomic_dec_and_test(&rreq->nr_rd_ops))
 		netfs_rreq_assess(rreq, false);
@@ -967,6 +1096,12 @@ int netfs_readpage(struct file *file,
 	netfs_stat(&netfs_n_rh_readpage);
 	trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
 
+	/* Set up the output buffer */
+	ret = netfs_rreq_set_up_buffer(rreq, NULL, page,
+				       page_index(page), thp_nr_pages(page));
+	if (ret < 0)
+		goto out;
+
 	netfs_get_read_request(rreq);
 
 	atomic_set(&rreq->nr_rd_ops, 1);
@@ -1134,13 +1269,18 @@ int netfs_write_begin(struct file *file, struct address_space *mapping,
 	 */
 	ractl._nr_pages = thp_nr_pages(page);
 	netfs_rreq_expand(rreq, &ractl);
-	netfs_get_read_request(rreq);
 
-	/* We hold the page locks, so we can drop the references */
-	while ((xpage = readahead_page(&ractl)))
-		if (xpage != page)
-			put_page(xpage);
+	/* Set up the output buffer */
+	ret = netfs_rreq_set_up_buffer(rreq, &ractl, page,
+				       readahead_index(&ractl), readahead_count(&ractl));
+	if (ret < 0) {
+		while ((xpage = readahead_page(&ractl)))
+			if (xpage != page)
+				put_page(xpage);
+		goto error_put;
+	}
 
+	netfs_get_read_request(rreq);
 	atomic_set(&rreq->nr_rd_ops, 1);
 	do {
 		if (!netfs_rreq_submit_slice(rreq, &debug_index))
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 45d40c622205..815001fe7a76 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -138,6 +138,7 @@ struct netfs_read_request {
 	struct address_space	*mapping;	/* The mapping being accessed */
 	struct netfs_cache_resources cache_resources;
 	struct list_head	subrequests;	/* Requests to fetch I/O from disk or net */
+	struct xarray		buffer;		/* Decryption/decompression buffer */
 	void			*netfs_priv;	/* Private data for the netfs */
 	unsigned int		debug_id;
 	atomic_t		nr_rd_ops;	/* Number of read ops in progress */



  parent reply	other threads:[~2021-07-21 13:46 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-21 13:44 David Howells
2021-07-21 13:44 ` [RFC PATCH 01/12] afs: Sort out symlink reading David Howells
2021-07-21 16:20   ` Jeff Layton
2021-07-26  9:44   ` David Howells
2021-07-21 13:44 ` [RFC PATCH 02/12] netfs: Add an iov_iter to the read subreq for the network fs/cache to use David Howells
2021-07-21 17:16   ` Jeff Layton
2021-07-21 17:20   ` David Howells
2021-07-21 13:45 ` [RFC PATCH 03/12] netfs: Remove netfs_read_subrequest::transferred David Howells
2021-07-21 17:43   ` Jeff Layton
2021-07-21 18:54   ` David Howells
2021-07-21 19:00     ` Jeff Layton
2021-07-21 13:45 ` David Howells [this message]
2021-07-21 13:45 ` [RFC PATCH 05/12] netfs: Add a netfs inode context David Howells
2021-07-21 13:46 ` [RFC PATCH 06/12] netfs: Keep lists of pending, active, dirty and flushed regions David Howells
2021-07-21 13:46 ` [RFC PATCH 07/12] netfs: Initiate write request from a dirty region David Howells
2021-07-21 13:46 ` [RFC PATCH 08/12] netfs: Keep dirty mark for pages with more than one " David Howells
2021-07-21 13:46 ` [RFC PATCH 09/12] netfs: Send write request to multiple destinations David Howells
2021-07-21 13:46 ` [RFC PATCH 10/12] netfs: Do encryption in write preparatory phase David Howells
2021-07-21 13:47 ` [RFC PATCH 11/12] netfs: Put a list of regions in /proc/fs/netfs/regions David Howells
2021-07-21 13:47 ` [RFC PATCH 12/12] netfs: Export some read-request ref functions David Howells
2021-07-21 14:00 ` [RFC PATCH 00/12] netfs: Experimental write helpers, fscrypt and compression David Howells
2021-07-21 18:42 ` [RFC PATCH 13/12] netfs: Do copy-to-cache-on-read through VM writeback David Howells

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=162687512469.276387.15723958695928327041.stgit@warthog.procyon.org.uk \
    --to=dhowells@redhat.com \
    --cc=anna.schumaker@netapp.com \
    --cc=asmadeus@codewreck.org \
    --cc=ceph-devel@vger.kernel.org \
    --cc=devel@lists.orangefs.org \
    --cc=dwysocha@redhat.com \
    --cc=hubcap@omnibond.com \
    --cc=jlayton@redhat.com \
    --cc=linux-afs@lists.infradead.org \
    --cc=linux-cachefs@redhat.com \
    --cc=linux-cifs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=miklos@szeredi.hu \
    --cc=nspmangalore@gmail.com \
    --cc=sfrench@samba.org \
    --cc=torvalds@linux-foundation.org \
    --cc=v9fs-developer@lists.sourceforge.net \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).