All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jeff Layton <jlayton@kernel.org>
To: David Howells <dhowells@redhat.com>, linux-cachefs@redhat.com
Cc: Anna Schumaker <anna.schumaker@netapp.com>,
	Steve French <sfrench@samba.org>,
	Dominique Martinet <asmadeus@codewreck.org>,
	David Wysochanski <dwysocha@redhat.com>,
	Ilya Dryomov <idryomov@gmail.com>,
	Jeffle Xu <jefflexu@linux.alibaba.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	linux-afs@lists.infradead.org, linux-nfs@vger.kernel.org,
	linux-cifs@vger.kernel.org, ceph-devel@vger.kernel.org,
	v9fs-developer@lists.sourceforge.net,
	linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH v2 16/19] netfs: Split fs/netfs/read_helper.c
Date: Wed, 09 Mar 2022 15:27:07 -0500	[thread overview]
Message-ID: <76046e1df11bdf9269b64b7f6d691318c9a62dbf.camel@kernel.org> (raw)
In-Reply-To: <164678217075.1200972.5101072043126828757.stgit@warthog.procyon.org.uk>

On Tue, 2022-03-08 at 23:29 +0000, David Howells wrote:
> Split fs/netfs/read_helper.c into two pieces, one to deal with buffered
> writes and one to deal with the I/O mechanism.
> 

I think you mean buffered reads here?

> Changes
> =======
> ver #2)
>  - Add kdoc reference to new file.
> 
> Signed-off-by: David Howells <dhowells@redhat.com>
> cc: linux-cachefs@redhat.com
> 
> Link: https://lore.kernel.org/r/164623005586.3564931.6149556072728481767.stgit@warthog.procyon.org.uk/ # v1
> ---
> 
>  fs/netfs/Makefile        |    1 
>  fs/netfs/buffered_read.c |  428 ++++++++++++++++++++++++++++++++++++++++++++++
>  fs/netfs/io.c            |  418 ---------------------------------------------
>  3 files changed, 429 insertions(+), 418 deletions(-)
>  create mode 100644 fs/netfs/buffered_read.c
> 
> diff --git a/fs/netfs/Makefile b/fs/netfs/Makefile
> index 51ece4f7bc77..88b904532bc7 100644
> --- a/fs/netfs/Makefile
> +++ b/fs/netfs/Makefile
> @@ -1,6 +1,7 @@
>  # SPDX-License-Identifier: GPL-2.0
>  
>  netfs-y := \
> +	buffered_read.o \
>  	io.o \
>  	objects.o
>  
> diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
> new file mode 100644
> index 000000000000..09ba7097a970
> --- /dev/null
> +++ b/fs/netfs/buffered_read.c
> @@ -0,0 +1,428 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/* Network filesystem high-level buffered read support.
> + *
> + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
> + * Written by David Howells (dhowells@redhat.com)
> + */
> +
> +#include <linux/export.h>
> +#include <linux/task_io_accounting_ops.h>
> +#include "internal.h"
> +
> +/*
> + * Unlock the folios in a read operation.  We need to set PG_fscache on any
> + * folios we're going to write back before we unlock them.
> + */
> +void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
> +{
> +	struct netfs_io_subrequest *subreq;
> +	struct folio *folio;
> +	unsigned int iopos, account = 0;
> +	pgoff_t start_page = rreq->start / PAGE_SIZE;
> +	pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
> +	bool subreq_failed = false;
> +
> +	XA_STATE(xas, &rreq->mapping->i_pages, start_page);
> +
> +	if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) {
> +		__clear_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags);
> +		list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
> +			__clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
> +		}
> +	}
> +
> +	/* Walk through the pagecache and the I/O request lists simultaneously.
> +	 * We may have a mixture of cached and uncached sections and we only
> +	 * really want to write out the uncached sections.  This is slightly
> +	 * complicated by the possibility that we might have huge pages with a
> +	 * mixture inside.
> +	 */
> +	subreq = list_first_entry(&rreq->subrequests,
> +				  struct netfs_io_subrequest, rreq_link);
> +	iopos = 0;
> +	subreq_failed = (subreq->error < 0);
> +
> +	trace_netfs_rreq(rreq, netfs_rreq_trace_unlock);
> +
> +	rcu_read_lock();
> +	xas_for_each(&xas, folio, last_page) {
> +		unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE;
> +		unsigned int pgend = pgpos + folio_size(folio);
> +		bool pg_failed = false;
> +
> +		for (;;) {
> +			if (!subreq) {
> +				pg_failed = true;
> +				break;
> +			}
> +			if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags))
> +				folio_start_fscache(folio);
> +			pg_failed |= subreq_failed;
> +			if (pgend < iopos + subreq->len)
> +				break;
> +
> +			account += subreq->transferred;
> +			iopos += subreq->len;
> +			if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
> +				subreq = list_next_entry(subreq, rreq_link);
> +				subreq_failed = (subreq->error < 0);
> +			} else {
> +				subreq = NULL;
> +				subreq_failed = false;
> +			}
> +			if (pgend == iopos)
> +				break;
> +		}
> +
> +		if (!pg_failed) {
> +			flush_dcache_folio(folio);
> +			folio_mark_uptodate(folio);
> +		}
> +
> +		if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
> +			if (folio_index(folio) == rreq->no_unlock_folio &&
> +			    test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
> +				_debug("no unlock");
> +			else
> +				folio_unlock(folio);
> +		}
> +	}
> +	rcu_read_unlock();
> +
> +	task_io_account_read(account);
> +	if (rreq->netfs_ops->done)
> +		rreq->netfs_ops->done(rreq);
> +}
> +
> +static void netfs_cache_expand_readahead(struct netfs_io_request *rreq,
> +					 loff_t *_start, size_t *_len, loff_t i_size)
> +{
> +	struct netfs_cache_resources *cres = &rreq->cache_resources;
> +
> +	if (cres->ops && cres->ops->expand_readahead)
> +		cres->ops->expand_readahead(cres, _start, _len, i_size);
> +}
> +
> +static void netfs_rreq_expand(struct netfs_io_request *rreq,
> +			      struct readahead_control *ractl)
> +{
> +	/* Give the cache a chance to change the request parameters.  The
> +	 * resultant request must contain the original region.
> +	 */
> +	netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size);
> +
> +	/* Give the netfs a chance to change the request parameters.  The
> +	 * resultant request must contain the original region.
> +	 */
> +	if (rreq->netfs_ops->expand_readahead)
> +		rreq->netfs_ops->expand_readahead(rreq);
> +
> +	/* Expand the request if the cache wants it to start earlier.  Note
> +	 * that the expansion may get further extended if the VM wishes to
> +	 * insert THPs and the preferred start and/or end wind up in the middle
> +	 * of THPs.
> +	 *
> +	 * If this is the case, however, the THP size should be an integer
> +	 * multiple of the cache granule size, so we get a whole number of
> +	 * granules to deal with.
> +	 */
> +	if (rreq->start  != readahead_pos(ractl) ||
> +	    rreq->len != readahead_length(ractl)) {
> +		readahead_expand(ractl, rreq->start, rreq->len);
> +		rreq->start  = readahead_pos(ractl);
> +		rreq->len = readahead_length(ractl);
> +
> +		trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
> +				 netfs_read_trace_expanded);
> +	}
> +}
> +
> +/**
> + * netfs_readahead - Helper to manage a read request
> + * @ractl: The description of the readahead request
> + *
> + * Fulfil a readahead request by drawing data from the cache if possible, or
> + * the netfs if not.  Space beyond the EOF is zero-filled.  Multiple I/O
> + * requests from different sources will get munged together.  If necessary, the
> + * readahead window can be expanded in either direction to a more convenient
> + * alighment for RPC efficiency or to make storage in the cache feasible.
> + *
> + * The calling netfs must initialise a netfs context contiguous to the vfs
> + * inode before calling this.
> + *
> + * This is usable whether or not caching is enabled.
> + */
> +void netfs_readahead(struct readahead_control *ractl)
> +{
> +	struct netfs_io_request *rreq;
> +	struct netfs_i_context *ctx = netfs_i_context(ractl->mapping->host);
> +	int ret;
> +
> +	_enter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
> +
> +	if (readahead_count(ractl) == 0)
> +		return;
> +
> +	rreq = netfs_alloc_request(ractl->mapping, ractl->file,
> +				   readahead_pos(ractl),
> +				   readahead_length(ractl),
> +				   NETFS_READAHEAD);
> +	if (IS_ERR(rreq))
> +		return;
> +
> +	if (ctx->ops->begin_cache_operation) {
> +		ret = ctx->ops->begin_cache_operation(rreq);
> +		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
> +			goto cleanup_free;
> +	}
> +
> +	netfs_stat(&netfs_n_rh_readahead);
> +	trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
> +			 netfs_read_trace_readahead);
> +
> +	netfs_rreq_expand(rreq, ractl);
> +
> +	/* Drop the refs on the folios here rather than in the cache or
> +	 * filesystem.  The locks will be dropped in netfs_rreq_unlock().
> +	 */
> +	while (readahead_folio(ractl))
> +		;
> +
> +	netfs_begin_read(rreq, false);
> +	return;
> +
> +cleanup_free:
> +	netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
> +	return;
> +}
> +EXPORT_SYMBOL(netfs_readahead);
> +
> +/**
> + * netfs_readpage - Helper to manage a readpage request
> + * @file: The file to read from
> + * @subpage: A subpage of the folio to read
> + *
> + * Fulfil a readpage request by drawing data from the cache if possible, or the
> + * netfs if not.  Space beyond the EOF is zero-filled.  Multiple I/O requests
> + * from different sources will get munged together.
> + *
> + * The calling netfs must initialise a netfs context contiguous to the vfs
> + * inode before calling this.
> + *
> + * This is usable whether or not caching is enabled.
> + */
> +int netfs_readpage(struct file *file, struct page *subpage)
> +{
> +	struct folio *folio = page_folio(subpage);
> +	struct address_space *mapping = folio_file_mapping(folio);
> +	struct netfs_io_request *rreq;
> +	struct netfs_i_context *ctx = netfs_i_context(mapping->host);
> +	int ret;
> +
> +	_enter("%lx", folio_index(folio));
> +
> +	rreq = netfs_alloc_request(mapping, file,
> +				   folio_file_pos(folio), folio_size(folio),
> +				   NETFS_READPAGE);
> +	if (IS_ERR(rreq)) {
> +		ret = PTR_ERR(rreq);
> +		goto alloc_error;
> +	}
> +
> +	if (ctx->ops->begin_cache_operation) {
> +		ret = ctx->ops->begin_cache_operation(rreq);
> +		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
> +			goto discard;
> +	}
> +
> +	netfs_stat(&netfs_n_rh_readpage);
> +	trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
> +	return netfs_begin_read(rreq, true);
> +
> +discard:
> +	netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
> +alloc_error:
> +	folio_unlock(folio);
> +	return ret;
> +}
> +EXPORT_SYMBOL(netfs_readpage);
> +
> +/*
> + * Prepare a folio for writing without reading first
> + * @folio: The folio being prepared
> + * @pos: starting position for the write
> + * @len: length of write
> + * @always_fill: T if the folio should always be completely filled/cleared
> + *
> + * In some cases, write_begin doesn't need to read at all:
> + * - full folio write
> + * - write that lies in a folio that is completely beyond EOF
> + * - write that covers the folio from start to EOF or beyond it
> + *
> + * If any of these criteria are met, then zero out the unwritten parts
> + * of the folio and return true. Otherwise, return false.
> + */
> +static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
> +				 bool always_fill)
> +{
> +	struct inode *inode = folio_inode(folio);
> +	loff_t i_size = i_size_read(inode);
> +	size_t offset = offset_in_folio(folio, pos);
> +	size_t plen = folio_size(folio);
> +
> +	if (unlikely(always_fill)) {
> +		if (pos - offset + len <= i_size)
> +			return false; /* Page entirely before EOF */
> +		zero_user_segment(&folio->page, 0, plen);
> +		folio_mark_uptodate(folio);
> +		return true;
> +	}
> +
> +	/* Full folio write */
> +	if (offset == 0 && len >= plen)
> +		return true;
> +
> +	/* Page entirely beyond the end of the file */
> +	if (pos - offset >= i_size)
> +		goto zero_out;
> +
> +	/* Write that covers from the start of the folio to EOF or beyond */
> +	if (offset == 0 && (pos + len) >= i_size)
> +		goto zero_out;
> +
> +	return false;
> +zero_out:
> +	zero_user_segments(&folio->page, 0, offset, offset + len, len);
> +	return true;
> +}
> +
> +/**
> + * netfs_write_begin - Helper to prepare for writing
> + * @file: The file to read from
> + * @mapping: The mapping to read from
> + * @pos: File position at which the write will begin
> + * @len: The length of the write (may extend beyond the end of the folio chosen)
> + * @aop_flags: AOP_* flags
> + * @_folio: Where to put the resultant folio
> + * @_fsdata: Place for the netfs to store a cookie
> + *
> + * Pre-read data for a write-begin request by drawing data from the cache if
> + * possible, or the netfs if not.  Space beyond the EOF is zero-filled.
> + * Multiple I/O requests from different sources will get munged together.  If
> + * necessary, the readahead window can be expanded in either direction to a
> + * more convenient alighment for RPC efficiency or to make storage in the cache
> + * feasible.
> + *
> + * The calling netfs must provide a table of operations, only one of which,
> + * issue_op, is mandatory.
> + *
> + * The check_write_begin() operation can be provided to check for and flush
> + * conflicting writes once the folio is grabbed and locked.  It is passed a
> + * pointer to the fsdata cookie that gets returned to the VM to be passed to
> + * write_end.  It is permitted to sleep.  It should return 0 if the request
> + * should go ahead; unlock the folio and return -EAGAIN to cause the folio to
> + * be regot; or return an error.
> + *
> + * The calling netfs must initialise a netfs context contiguous to the vfs
> + * inode before calling this.
> + *
> + * This is usable whether or not caching is enabled.
> + */
> +int netfs_write_begin(struct file *file, struct address_space *mapping,
> +		      loff_t pos, unsigned int len, unsigned int aop_flags,
> +		      struct folio **_folio, void **_fsdata)
> +{
> +	struct netfs_io_request *rreq;
> +	struct netfs_i_context *ctx = netfs_i_context(file_inode(file ));
> +	struct folio *folio;
> +	unsigned int fgp_flags;
> +	pgoff_t index = pos >> PAGE_SHIFT;
> +	int ret;
> +
> +	DEFINE_READAHEAD(ractl, file, NULL, mapping, index);
> +
> +retry:
> +	fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
> +	if (aop_flags & AOP_FLAG_NOFS)
> +		fgp_flags |= FGP_NOFS;
> +	folio = __filemap_get_folio(mapping, index, fgp_flags,
> +				    mapping_gfp_mask(mapping));
> +	if (!folio)
> +		return -ENOMEM;
> +
> +	if (ctx->ops->check_write_begin) {
> +		/* Allow the netfs (eg. ceph) to flush conflicts. */
> +		ret = ctx->ops->check_write_begin(file, pos, len, folio, _fsdata);
> +		if (ret < 0) {
> +			trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
> +			if (ret == -EAGAIN)
> +				goto retry;
> +			goto error;
> +		}
> +	}
> +
> +	if (folio_test_uptodate(folio))
> +		goto have_folio;
> +
> +	/* If the page is beyond the EOF, we want to clear it - unless it's
> +	 * within the cache granule containing the EOF, in which case we need
> +	 * to preload the granule.
> +	 */
> +	if (!netfs_is_cache_enabled(ctx) &&
> +	    netfs_skip_folio_read(folio, pos, len, false)) {
> +		netfs_stat(&netfs_n_rh_write_zskip);
> +		goto have_folio_no_wait;
> +	}
> +
> +	rreq = netfs_alloc_request(mapping, file,
> +				   folio_file_pos(folio), folio_size(folio),
> +				   NETFS_READ_FOR_WRITE);
> +	if (IS_ERR(rreq)) {
> +		ret = PTR_ERR(rreq);
> +		goto error;
> +	}
> +	rreq->no_unlock_folio	= folio_index(folio);
> +	__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
> +
> +	if (ctx->ops->begin_cache_operation) {
> +		ret = ctx->ops->begin_cache_operation(rreq);
> +		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
> +			goto error_put;
> +	}
> +
> +	netfs_stat(&netfs_n_rh_write_begin);
> +	trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
> +
> +	/* Expand the request to meet caching requirements and download
> +	 * preferences.
> +	 */
> +	ractl._nr_pages = folio_nr_pages(folio);
> +	netfs_rreq_expand(rreq, &ractl);
> +
> +	/* We hold the folio locks, so we can drop the references */
> +	folio_get(folio);
> +	while (readahead_folio(&ractl))
> +		;
> +
> +	ret = netfs_begin_read(rreq, true);
> +	if (ret < 0)
> +		goto error;
> +
> +have_folio:
> +	ret = folio_wait_fscache_killable(folio);
> +	if (ret < 0)
> +		goto error;
> +have_folio_no_wait:
> +	*_folio = folio;
> +	_leave(" = 0");
> +	return 0;
> +
> +error_put:
> +	netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
> +error:
> +	folio_unlock(folio);
> +	folio_put(folio);
> +	_leave(" = %d", ret);
> +	return ret;
> +}
> +EXPORT_SYMBOL(netfs_write_begin);
> diff --git a/fs/netfs/io.c b/fs/netfs/io.c
> index 058a534ba917..1fe9706c58a5 100644
> --- a/fs/netfs/io.c
> +++ b/fs/netfs/io.c
> @@ -246,91 +246,6 @@ static void netfs_rreq_write_to_cache(struct netfs_io_request *rreq)
>  		BUG();
>  }
>  
> -/*
> - * Unlock the folios in a read operation.  We need to set PG_fscache on any
> - * folios we're going to write back before we unlock them.
> - */
> -void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
> -{
> -	struct netfs_io_subrequest *subreq;
> -	struct folio *folio;
> -	unsigned int iopos, account = 0;
> -	pgoff_t start_page = rreq->start / PAGE_SIZE;
> -	pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
> -	bool subreq_failed = false;
> -
> -	XA_STATE(xas, &rreq->mapping->i_pages, start_page);
> -
> -	if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) {
> -		__clear_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags);
> -		list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
> -			__clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
> -		}
> -	}
> -
> -	/* Walk through the pagecache and the I/O request lists simultaneously.
> -	 * We may have a mixture of cached and uncached sections and we only
> -	 * really want to write out the uncached sections.  This is slightly
> -	 * complicated by the possibility that we might have huge pages with a
> -	 * mixture inside.
> -	 */
> -	subreq = list_first_entry(&rreq->subrequests,
> -				  struct netfs_io_subrequest, rreq_link);
> -	iopos = 0;
> -	subreq_failed = (subreq->error < 0);
> -
> -	trace_netfs_rreq(rreq, netfs_rreq_trace_unlock);
> -
> -	rcu_read_lock();
> -	xas_for_each(&xas, folio, last_page) {
> -		unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE;
> -		unsigned int pgend = pgpos + folio_size(folio);
> -		bool pg_failed = false;
> -
> -		for (;;) {
> -			if (!subreq) {
> -				pg_failed = true;
> -				break;
> -			}
> -			if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags))
> -				folio_start_fscache(folio);
> -			pg_failed |= subreq_failed;
> -			if (pgend < iopos + subreq->len)
> -				break;
> -
> -			account += subreq->transferred;
> -			iopos += subreq->len;
> -			if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
> -				subreq = list_next_entry(subreq, rreq_link);
> -				subreq_failed = (subreq->error < 0);
> -			} else {
> -				subreq = NULL;
> -				subreq_failed = false;
> -			}
> -			if (pgend == iopos)
> -				break;
> -		}
> -
> -		if (!pg_failed) {
> -			flush_dcache_folio(folio);
> -			folio_mark_uptodate(folio);
> -		}
> -
> -		if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
> -			if (folio_index(folio) == rreq->no_unlock_folio &&
> -			    test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
> -				_debug("no unlock");
> -			else
> -				folio_unlock(folio);
> -		}
> -	}
> -	rcu_read_unlock();
> -
> -	task_io_account_read(account);
> -	if (rreq->netfs_ops->done)
> -		rreq->netfs_ops->done(rreq);
> -}
> -
>  /*
>   * Handle a short read.
>   */
> @@ -750,336 +665,3 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
>  	}
>  	return ret;
>  }
> -
> -static void netfs_cache_expand_readahead(struct netfs_io_request *rreq,
> -					 loff_t *_start, size_t *_len, loff_t i_size)
> -{
> -	struct netfs_cache_resources *cres = &rreq->cache_resources;
> -
> -	if (cres->ops && cres->ops->expand_readahead)
> -		cres->ops->expand_readahead(cres, _start, _len, i_size);
> -}
> -
> -static void netfs_rreq_expand(struct netfs_io_request *rreq,
> -			      struct readahead_control *ractl)
> -{
> -	/* Give the cache a chance to change the request parameters.  The
> -	 * resultant request must contain the original region.
> -	 */
> -	netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size);
> -
> -	/* Give the netfs a chance to change the request parameters.  The
> -	 * resultant request must contain the original region.
> -	 */
> -	if (rreq->netfs_ops->expand_readahead)
> -		rreq->netfs_ops->expand_readahead(rreq);
> -
> -	/* Expand the request if the cache wants it to start earlier.  Note
> -	 * that the expansion may get further extended if the VM wishes to
> -	 * insert THPs and the preferred start and/or end wind up in the middle
> -	 * of THPs.
> -	 *
> -	 * If this is the case, however, the THP size should be an integer
> -	 * multiple of the cache granule size, so we get a whole number of
> -	 * granules to deal with.
> -	 */
> -	if (rreq->start  != readahead_pos(ractl) ||
> -	    rreq->len != readahead_length(ractl)) {
> -		readahead_expand(ractl, rreq->start, rreq->len);
> -		rreq->start  = readahead_pos(ractl);
> -		rreq->len = readahead_length(ractl);
> -
> -		trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
> -				 netfs_read_trace_expanded);
> -	}
> -}
> -
> -/**
> - * netfs_readahead - Helper to manage a read request
> - * @ractl: The description of the readahead request
> - *
> - * Fulfil a readahead request by drawing data from the cache if possible, or
> - * the netfs if not.  Space beyond the EOF is zero-filled.  Multiple I/O
> - * requests from different sources will get munged together.  If necessary, the
> - * readahead window can be expanded in either direction to a more convenient
> - * alighment for RPC efficiency or to make storage in the cache feasible.
> - *
> - * The calling netfs must initialise a netfs context contiguous to the vfs
> - * inode before calling this.
> - *
> - * This is usable whether or not caching is enabled.
> - */
> -void netfs_readahead(struct readahead_control *ractl)
> -{
> -	struct netfs_io_request *rreq;
> -	struct netfs_i_context *ctx = netfs_i_context(ractl->mapping->host);
> -	int ret;
> -
> -	_enter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
> -
> -	if (readahead_count(ractl) == 0)
> -		return;
> -
> -	rreq = netfs_alloc_request(ractl->mapping, ractl->file,
> -				   readahead_pos(ractl),
> -				   readahead_length(ractl),
> -				   NETFS_READAHEAD);
> -	if (IS_ERR(rreq))
> -		return;
> -
> -	if (ctx->ops->begin_cache_operation) {
> -		ret = ctx->ops->begin_cache_operation(rreq);
> -		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
> -			goto cleanup_free;
> -	}
> -
> -	netfs_stat(&netfs_n_rh_readahead);
> -	trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
> -			 netfs_read_trace_readahead);
> -
> -	netfs_rreq_expand(rreq, ractl);
> -
> -	/* Drop the refs on the folios here rather than in the cache or
> -	 * filesystem.  The locks will be dropped in netfs_rreq_unlock().
> -	 */
> -	while (readahead_folio(ractl))
> -		;
> -
> -	netfs_begin_read(rreq, false);
> -	return;
> -
> -cleanup_free:
> -	netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
> -	return;
> -}
> -EXPORT_SYMBOL(netfs_readahead);
> -
> -/**
> - * netfs_readpage - Helper to manage a readpage request
> - * @file: The file to read from
> - * @subpage: A subpage of the folio to read
> - *
> - * Fulfil a readpage request by drawing data from the cache if possible, or the
> - * netfs if not.  Space beyond the EOF is zero-filled.  Multiple I/O requests
> - * from different sources will get munged together.
> - *
> - * The calling netfs must initialise a netfs context contiguous to the vfs
> - * inode before calling this.
> - *
> - * This is usable whether or not caching is enabled.
> - */
> -int netfs_readpage(struct file *file, struct page *subpage)
> -{
> -	struct folio *folio = page_folio(subpage);
> -	struct address_space *mapping = folio->mapping;
> -	struct netfs_io_request *rreq;
> -	struct netfs_i_context *ctx = netfs_i_context(mapping->host);
> -	int ret;
> -
> -	_enter("%lx", folio_index(folio));
> -
> -	rreq = netfs_alloc_request(mapping, file,
> -				   folio_file_pos(folio), folio_size(folio),
> -				   NETFS_READPAGE);
> -	if (IS_ERR(rreq)) {
> -		ret = PTR_ERR(rreq);
> -		goto alloc_error;
> -	}
> -
> -	if (ctx->ops->begin_cache_operation) {
> -		ret = ctx->ops->begin_cache_operation(rreq);
> -		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
> -			goto discard;
> -	}
> -
> -	netfs_stat(&netfs_n_rh_readpage);
> -	trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
> -	return netfs_begin_read(rreq, true);
> -
> -discard:
> -	netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
> -alloc_error:
> -	folio_unlock(folio);
> -	return ret;
> -}
> -EXPORT_SYMBOL(netfs_readpage);
> -
> -/*
> - * Prepare a folio for writing without reading first
> - * @folio: The folio being prepared
> - * @pos: starting position for the write
> - * @len: length of write
> - * @always_fill: T if the folio should always be completely filled/cleared
> - *
> - * In some cases, write_begin doesn't need to read at all:
> - * - full folio write
> - * - write that lies in a folio that is completely beyond EOF
> - * - write that covers the folio from start to EOF or beyond it
> - *
> - * If any of these criteria are met, then zero out the unwritten parts
> - * of the folio and return true. Otherwise, return false.
> - */
> -static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
> -				 bool always_fill)
> -{
> -	struct inode *inode = folio_inode(folio);
> -	loff_t i_size = i_size_read(inode);
> -	size_t offset = offset_in_folio(folio, pos);
> -	size_t plen = folio_size(folio);
> -
> -	if (unlikely(always_fill)) {
> -		if (pos - offset + len <= i_size)
> -			return false; /* Page entirely before EOF */
> -		zero_user_segment(&folio->page, 0, plen);
> -		folio_mark_uptodate(folio);
> -		return true;
> -	}
> -
> -	/* Full folio write */
> -	if (offset == 0 && len >= plen)
> -		return true;
> -
> -	/* Page entirely beyond the end of the file */
> -	if (pos - offset >= i_size)
> -		goto zero_out;
> -
> -	/* Write that covers from the start of the folio to EOF or beyond */
> -	if (offset == 0 && (pos + len) >= i_size)
> -		goto zero_out;
> -
> -	return false;
> -zero_out:
> -	zero_user_segments(&folio->page, 0, offset, offset + len, len);
> -	return true;
> -}
> -
> -/**
> - * netfs_write_begin - Helper to prepare for writing
> - * @file: The file to read from
> - * @mapping: The mapping to read from
> - * @pos: File position at which the write will begin
> - * @len: The length of the write (may extend beyond the end of the folio chosen)
> - * @aop_flags: AOP_* flags
> - * @_folio: Where to put the resultant folio
> - * @_fsdata: Place for the netfs to store a cookie
> - *
> - * Pre-read data for a write-begin request by drawing data from the cache if
> - * possible, or the netfs if not.  Space beyond the EOF is zero-filled.
> - * Multiple I/O requests from different sources will get munged together.  If
> - * necessary, the readahead window can be expanded in either direction to a
> - * more convenient alighment for RPC efficiency or to make storage in the cache
> - * feasible.
> - *
> - * The calling netfs must provide a table of operations, only one of which,
> - * issue_op, is mandatory.
> - *
> - * The check_write_begin() operation can be provided to check for and flush
> - * conflicting writes once the folio is grabbed and locked.  It is passed a
> - * pointer to the fsdata cookie that gets returned to the VM to be passed to
> - * write_end.  It is permitted to sleep.  It should return 0 if the request
> - * should go ahead; unlock the folio and return -EAGAIN to cause the folio to
> - * be regot; or return an error.
> - *
> - * The calling netfs must initialise a netfs context contiguous to the vfs
> - * inode before calling this.
> - *
> - * This is usable whether or not caching is enabled.
> - */
> -int netfs_write_begin(struct file *file, struct address_space *mapping,
> -		      loff_t pos, unsigned int len, unsigned int aop_flags,
> -		      struct folio **_folio, void **_fsdata)
> -{
> -	struct netfs_io_request *rreq;
> -	struct netfs_i_context *ctx = netfs_i_context(file_inode(file ));
> -	struct folio *folio;
> -	unsigned int fgp_flags;
> -	pgoff_t index = pos >> PAGE_SHIFT;
> -	int ret;
> -
> -	DEFINE_READAHEAD(ractl, file, NULL, mapping, index);
> -
> -retry:
> -	fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
> -	if (aop_flags & AOP_FLAG_NOFS)
> -		fgp_flags |= FGP_NOFS;
> -	folio = __filemap_get_folio(mapping, index, fgp_flags,
> -				    mapping_gfp_mask(mapping));
> -	if (!folio)
> -		return -ENOMEM;
> -
> -	if (ctx->ops->check_write_begin) {
> -		/* Allow the netfs (eg. ceph) to flush conflicts. */
> -		ret = ctx->ops->check_write_begin(file, pos, len, folio, _fsdata);
> -		if (ret < 0) {
> -			trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
> -			if (ret == -EAGAIN)
> -				goto retry;
> -			goto error;
> -		}
> -	}
> -
> -	if (folio_test_uptodate(folio))
> -		goto have_folio;
> -
> -	/* If the page is beyond the EOF, we want to clear it - unless it's
> -	 * within the cache granule containing the EOF, in which case we need
> -	 * to preload the granule.
> -	 */
> -	if (!netfs_is_cache_enabled(ctx) &&
> -	    netfs_skip_folio_read(folio, pos, len, false)) {
> -		netfs_stat(&netfs_n_rh_write_zskip);
> -		goto have_folio_no_wait;
> -	}
> -
> -	rreq = netfs_alloc_request(mapping, file,
> -				   folio_file_pos(folio), folio_size(folio),
> -				   NETFS_READ_FOR_WRITE);
> -	if (IS_ERR(rreq)) {
> -		ret = PTR_ERR(rreq);
> -		goto error;
> -	}
> -	rreq->no_unlock_folio	= folio_index(folio);
> -	__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
> -
> -	if (ctx->ops->begin_cache_operation) {
> -		ret = ctx->ops->begin_cache_operation(rreq);
> -		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
> -			goto error_put;
> -	}
> -
> -	netfs_stat(&netfs_n_rh_write_begin);
> -	trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
> -
> -	/* Expand the request to meet caching requirements and download
> -	 * preferences.
> -	 */
> -	ractl._nr_pages = folio_nr_pages(folio);
> -	netfs_rreq_expand(rreq, &ractl);
> -
> -	/* We hold the folio locks, so we can drop the references */
> -	folio_get(folio);
> -	while (readahead_folio(&ractl))
> -		;
> -
> -	ret = netfs_begin_read(rreq, true);
> -	if (ret < 0)
> -		goto error;
> -
> -have_folio:
> -	ret = folio_wait_fscache_killable(folio);
> -	if (ret < 0)
> -		goto error;
> -have_folio_no_wait:
> -	*_folio = folio;
> -	_leave(" = 0");
> -	return 0;
> -
> -error_put:
> -	netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
> -error:
> -	folio_unlock(folio);
> -	folio_put(folio);
> -	_leave(" = %d", ret);
> -	return ret;
> -}
> -EXPORT_SYMBOL(netfs_write_begin);
> 
> 

Patch itself is fine though.

Reviewed-by: Jeff Layton <jlayton@kernel.org>

  reply	other threads:[~2022-03-09 20:27 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-08 23:24 [PATCH v2 00/19] netfs: Prep for write helpers David Howells
2022-03-08 23:25 ` [PATCH v2 01/19] fscache: export fscache_end_operation() David Howells
2022-03-09 15:26   ` Jeff Layton
2022-03-10  1:40     ` JeffleXu
2022-03-08 23:25 ` [PATCH v2 02/19] netfs: Generate enums from trace symbol mapping lists David Howells
2022-03-09 15:30   ` Jeff Layton
2022-03-09 15:49   ` David Howells
2022-03-09 18:39     ` Jeff Layton
2022-03-08 23:25 ` [PATCH v2 03/19] netfs: Rename netfs_read_*request to netfs_io_*request David Howells
2022-03-09 15:31   ` Jeff Layton
2022-03-08 23:26 ` [PATCH v2 04/19] netfs: Finish off rename of netfs_read_request to netfs_io_request David Howells
2022-03-09 15:34   ` Jeff Layton
2022-03-08 23:26 ` [PATCH v2 05/19] netfs: Split netfs_io_* object handling out David Howells
2022-03-09 15:44   ` Jeff Layton
2022-03-08 23:26 ` [PATCH v2 06/19] netfs: Adjust the netfs_rreq tracepoint slightly David Howells
2022-03-09 15:45   ` Jeff Layton
2022-03-08 23:26 ` [PATCH v2 07/19] netfs: Trace refcounting on the netfs_io_request struct David Howells
2022-03-08 23:27 ` [PATCH v2 08/19] netfs: Trace refcounting on the netfs_io_subrequest struct David Howells
2022-03-08 23:27 ` [PATCH v2 09/19] netfs: Adjust the netfs_failure tracepoint to indicate non-subreq lines David Howells
2022-03-08 23:28 ` [PATCH v2 10/19] netfs: Refactor arguments for netfs_alloc_read_request David Howells
2022-03-08 23:28 ` [PATCH v2 11/19] netfs: Change ->init_request() to return an error code David Howells
2022-03-09 16:52   ` Jeff Layton
2022-03-08 23:28 ` [PATCH v2 12/19] netfs: Add a netfs inode context David Howells
2022-03-09 18:22   ` Jeff Layton
2022-03-09 19:23   ` David Howells
2022-03-09 19:46     ` Jeff Layton
2022-03-08 23:29 ` [PATCH v2 13/19] netfs: Add a function to consolidate beginning a read David Howells
2022-03-09 19:26   ` Jeff Layton
2022-03-09 22:08   ` David Howells
2022-03-08 23:29 ` [PATCH v2 14/19] netfs: Prepare to split read_helper.c David Howells
2022-03-08 23:29 ` [PATCH v2 15/19] netfs: Rename read_helper.c to io.c David Howells
2022-03-08 23:29 ` [PATCH v2 16/19] netfs: Split fs/netfs/read_helper.c David Howells
2022-03-09 20:27   ` Jeff Layton [this message]
2022-03-08 23:29 ` [PATCH v2 17/19] netfs: Split some core bits out into their own file David Howells
2022-03-08 23:29 ` [PATCH v2 18/19] netfs: Keep track of the actual remote file size David Howells
2022-03-09 20:45   ` Jeff Layton
2022-03-09 22:27   ` David Howells
2022-03-08 23:30 ` [PATCH v2 19/19] afs: Maintain netfs_i_context::remote_i_size David Howells
2022-03-09 21:12   ` Jeff Layton
2022-03-09 22:35   ` David Howells

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=76046e1df11bdf9269b64b7f6d691318c9a62dbf.camel@kernel.org \
    --to=jlayton@kernel.org \
    --cc=anna.schumaker@netapp.com \
    --cc=asmadeus@codewreck.org \
    --cc=ceph-devel@vger.kernel.org \
    --cc=dhowells@redhat.com \
    --cc=dwysocha@redhat.com \
    --cc=idryomov@gmail.com \
    --cc=jefflexu@linux.alibaba.com \
    --cc=linux-afs@lists.infradead.org \
    --cc=linux-cachefs@redhat.com \
    --cc=linux-cifs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=sfrench@samba.org \
    --cc=torvalds@linux-foundation.org \
    --cc=v9fs-developer@lists.sourceforge.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.