Linux-Fsdevel Archive on lore.kernel.org
 help / color / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: linux-fsdevel@vger.kernel.org, linux-xfs@vger.kernel.org,
	linux-mm@kvack.org
Subject: Re: [PATCH 09/14] iomap: Change iomap_write_begin calling convention
Date: Wed, 14 Oct 2020 09:47:44 -0700
Message-ID: <20201014164744.GK9832@magnolia> (raw)
In-Reply-To: <20201014030357.21898-10-willy@infradead.org>

On Wed, Oct 14, 2020 at 04:03:52AM +0100, Matthew Wilcox (Oracle) wrote:
> Pass (up to) the remaining length of the extent to iomap_write_begin()
> and have it return the number of bytes that will fit in the page.
> That lets us copy more bytes per call to iomap_write_begin() if the page
> cache has already allocated a THP (and will in future allow us to pass
> a hint to the page cache that it should try to allocate a larger page
> if there are none in the cache).
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  fs/iomap/buffered-io.c | 61 +++++++++++++++++++++++-------------------
>  1 file changed, 33 insertions(+), 28 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 4ef02afaedc5..397795db3ce5 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -616,14 +616,14 @@ iomap_read_page_sync(loff_t block_start, struct page *page, unsigned poff,
>  	return submit_bio_wait(&bio);
>  }
>  
> -static int
> -__iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
> -		struct page *page, struct iomap *srcmap)
> +static ssize_t __iomap_write_begin(struct inode *inode, loff_t pos,
> +		size_t len, int flags, struct page *page, struct iomap *srcmap)
>  {
>  	loff_t block_size = i_blocksize(inode);
>  	loff_t block_start = pos & ~(block_size - 1);
>  	loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
> -	unsigned from = offset_in_page(pos), to = from + len;
> +	size_t from = offset_in_thp(page, pos);
> +	size_t to = from + len;
>  	size_t poff, plen;
>  
>  	if (PageUptodate(page))
> @@ -658,12 +658,13 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
>  	return 0;
>  }
>  
> -static int
> -iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
> -		struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
> +static ssize_t iomap_write_begin(struct inode *inode, loff_t pos, loff_t len,
> +		unsigned flags, struct page **pagep, struct iomap *iomap,

loff_t len?  You've been using size_t (ssize_t?) for length elsewhere,
can't return more than ssize_t, and afaik MAX_RW_COUNT will never go
larger than 2GB so I'm confused about types here...?

Mostly because my brain has been trained to think that if it sees
"size_t len" as an input parameter and a ssize_t return value, then
probably the return value is however much of @len we managed to process.

> +		struct iomap *srcmap)
>  {
>  	const struct iomap_page_ops *page_ops = iomap->page_ops;
>  	struct page *page;
> +	size_t offset;
>  	int status = 0;
>  
>  	BUG_ON(pos + len > iomap->offset + iomap->length);
> @@ -674,6 +675,8 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
>  		return -EINTR;
>  
>  	if (page_ops && page_ops->page_prepare) {
> +		if (len > UINT_MAX)
> +			len = UINT_MAX;

I'm not especially familiar with page_prepare (since it's a gfs2 thing);
why do you clamp len to UINT_MAX here?

--D

>  		status = page_ops->page_prepare(inode, pos, len, iomap);
>  		if (status)
>  			return status;
> @@ -685,6 +688,10 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
>  		status = -ENOMEM;
>  		goto out_no_page;
>  	}
> +	page = thp_head(page);
> +	offset = offset_in_thp(page, pos);
> +	if (len > thp_size(page) - offset)
> +		len = thp_size(page) - offset;
>  
>  	if (srcmap->type == IOMAP_INLINE)
>  		iomap_read_inline_data(inode, page, srcmap);
> @@ -694,11 +701,11 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
>  		status = __iomap_write_begin(inode, pos, len, flags, page,
>  				srcmap);
>  
> -	if (unlikely(status))
> +	if (status < 0)
>  		goto out_unlock;
>  
>  	*pagep = page;
> -	return 0;
> +	return len;
>  
>  out_unlock:
>  	unlock_page(page);
> @@ -854,8 +861,10 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  
>  		status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap,
>  				srcmap);
> -		if (unlikely(status))
> +		if (status < 0)
>  			break;
> +		/* We may be partway through a THP */
> +		offset = offset_in_thp(page, pos);
>  
>  		if (mapping_writably_mapped(inode->i_mapping))
>  			flush_dcache_page(page);
> @@ -915,7 +924,6 @@ static loff_t
>  iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  		struct iomap *iomap, struct iomap *srcmap)
>  {
> -	long status = 0;
>  	loff_t written = 0;
>  
>  	/* don't bother with blocks that are not shared to start with */
> @@ -926,25 +934,24 @@ iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
>  		return length;
>  
>  	do {
> -		unsigned long offset = offset_in_page(pos);
> -		unsigned long bytes = min_t(loff_t, PAGE_SIZE - offset, length);
>  		struct page *page;
> +		ssize_t bytes;
>  
> -		status = iomap_write_begin(inode, pos, bytes,
> +		bytes = iomap_write_begin(inode, pos, length,
>  				IOMAP_WRITE_F_UNSHARE, &page, iomap, srcmap);
> -		if (unlikely(status))
> -			return status;
> +		if (bytes < 0)
> +			return bytes;
>  
> -		status = iomap_write_end(inode, pos, bytes, bytes, page, iomap,
> +		bytes = iomap_write_end(inode, pos, bytes, bytes, page, iomap,
>  				srcmap);
> -		if (WARN_ON_ONCE(status == 0))
> +		if (WARN_ON_ONCE(bytes == 0))
>  			return -EIO;
>  
>  		cond_resched();
>  
> -		pos += status;
> -		written += status;
> -		length -= status;
> +		pos += bytes;
> +		written += bytes;
> +		length -= bytes;
>  
>  		balance_dirty_pages_ratelimited(inode->i_mapping);
>  	} while (length);
> @@ -975,15 +982,13 @@ static s64 iomap_zero(struct inode *inode, loff_t pos, u64 length,
>  		struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct page *page;
> -	int status;
> -	unsigned offset = offset_in_page(pos);
> -	unsigned bytes = min_t(u64, PAGE_SIZE - offset, length);
> +	ssize_t bytes;
>  
> -	status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, srcmap);
> -	if (status)
> -		return status;
> +	bytes = iomap_write_begin(inode, pos, length, 0, &page, iomap, srcmap);
> +	if (bytes < 0)
> +		return bytes;
>  
> -	zero_user(page, offset, bytes);
> +	zero_user(page, offset_in_thp(page, pos), bytes);
>  	mark_page_accessed(page);
>  
>  	return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap);
> -- 
> 2.28.0
> 

  reply index

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-14  3:03 [PATCH 00/14] Transparent Huge Page support for XFS Matthew Wilcox (Oracle)
2020-10-14  3:03 ` [PATCH 01/14] fs: Support THPs in vfs_dedupe_file_range Matthew Wilcox (Oracle)
2020-10-14 16:12   ` Darrick J. Wong
2020-10-14 17:16     ` Matthew Wilcox
2020-10-14  3:03 ` [PATCH 02/14] fs: Make page_mkwrite_check_truncate thp-aware Matthew Wilcox (Oracle)
2020-10-14 16:17   ` Darrick J. Wong
2020-10-14 17:23     ` Matthew Wilcox
2020-10-14  3:03 ` [PATCH 03/14] iomap: Support THPs in BIO completion path Matthew Wilcox (Oracle)
2020-10-15  9:50   ` Christoph Hellwig
2020-10-14  3:03 ` [PATCH 04/14] iomap: Support THPs in iomap_adjust_read_range Matthew Wilcox (Oracle)
2020-10-15  9:50   ` Christoph Hellwig
2020-10-14  3:03 ` [PATCH 05/14] iomap: Support THPs in invalidatepage Matthew Wilcox (Oracle)
2020-10-14 16:33   ` Darrick J. Wong
2020-10-14 17:26     ` Matthew Wilcox
2020-10-14 20:00       ` Brian Foster
2020-10-14  3:03 ` [PATCH 06/14] iomap: Support THPs in iomap_is_partially_uptodate Matthew Wilcox (Oracle)
2020-10-14  3:03 ` [PATCH 07/14] iomap: Support THPs in readpage Matthew Wilcox (Oracle)
2020-10-14 16:39   ` Darrick J. Wong
2020-10-14 17:35     ` Matthew Wilcox
2020-10-14  3:03 ` [PATCH 08/14] iomap: Support THPs in readahead Matthew Wilcox (Oracle)
2020-10-15  9:52   ` Christoph Hellwig
2020-10-14  3:03 ` [PATCH 09/14] iomap: Change iomap_write_begin calling convention Matthew Wilcox (Oracle)
2020-10-14 16:47   ` Darrick J. Wong [this message]
2020-10-14 17:41     ` Matthew Wilcox
2020-10-14 18:08       ` Matthew Wilcox
2020-10-14  3:03 ` [PATCH 10/14] iomap: Handle THPs when writing to pages Matthew Wilcox (Oracle)
2020-10-14  3:03 ` [PATCH 11/14] iomap: Support THP writeback Matthew Wilcox (Oracle)
2020-10-14  3:03 ` [PATCH 12/14] iomap: Inline data shouldn't see THPs Matthew Wilcox (Oracle)
2020-10-14  3:03 ` [PATCH 13/14] iomap: Handle tail pages in iomap_page_mkwrite Matthew Wilcox (Oracle)
2020-10-14  3:03 ` [PATCH 14/14] xfs: Support THPs Matthew Wilcox (Oracle)
2020-10-14 16:51   ` Darrick J. Wong
2020-10-14 17:30     ` Matthew Wilcox

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201014164744.GK9832@magnolia \
    --to=darrick.wong@oracle.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-Fsdevel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-fsdevel/0 linux-fsdevel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-fsdevel linux-fsdevel/ https://lore.kernel.org/linux-fsdevel \
		linux-fsdevel@vger.kernel.org
	public-inbox-index linux-fsdevel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-fsdevel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git