Re: [PATCH v3 04/13] btrfs: refactor how we iterate ordered extent in btrfs_invalidatepage()

From: Anand Jain <anand.jain@oracle.com>
To: Qu Wenruo <wqu@suse.com>, linux-btrfs@vger.kernel.org
Subject: Re: [PATCH v3 04/13] btrfs: refactor how we iterate ordered extent in btrfs_invalidatepage()
Date: Fri, 2 Apr 2021 09:15:50 +0800	[thread overview]
Message-ID: <09d9c11c-02ff-8109-5244-021fac5c3626@oracle.com> (raw)
In-Reply-To: <20210325071445.90896-5-wqu@suse.com>

On 25/03/2021 15:14, Qu Wenruo wrote:
> In btrfs_invalidatepage(), we need to iterate through all ordered
> extents and finish them.
> 
> This involved a loop to exhaust all ordered extents, but that loop is
> implemented using again: label and goto.
> 
> Refactor the code by:
> - Use a while() loop

Just an observation.
At a minimum, while loop does 2 iterations before breaking. Whereas
label and goto could do it without reaching goto at all for the same
value of %length. So the label and goto approach is still faster.

A question below.

> - Extract the code to finish/dec an ordered extent into its own function
>    The new function, invalidate_ordered_extent(), will handle the
>    extent locking, extent bit update, and to finish/dec ordered extent.
> 
> In fact, for regular sectorsize == PAGE_SIZE case, there can only be at
> most one ordered extent for one page, thus the code is from ancient
> subpage preparation patchset.
> 
> But there is a bug hidden inside the ordered extent finish/dec part.
> 
> This patch will remove the ability to handle multiple ordered extent,
> and add extra ASSERT() to make sure for regular sectorsize we won't have
> anything wrong.
> 
> For the proper subpage support, it will be added in later patches.
> 
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
>   fs/btrfs/inode.c | 122 +++++++++++++++++++++++++++++------------------
>   1 file changed, 75 insertions(+), 47 deletions(-)
> 
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index d777f67d366b..99dcadd31870 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -8355,17 +8355,72 @@ static int btrfs_migratepage(struct address_space *mapping,
>   }
>   #endif
>   
> +/*
> + * Helper to finish/dec one ordered extent for btrfs_invalidatepage().
> + *
> + * Return true if the ordered extent is finished.
> + * Return false otherwise
> + */
> +static bool invalidate_ordered_extent(struct btrfs_inode *inode,
> +				      struct btrfs_ordered_extent *ordered,
> +				      struct page *page,
> +				      struct extent_state **cached_state,
> +				      bool inode_evicting)
> +{
> +	u64 start = page_offset(page);
> +	u64 end = page_offset(page) + PAGE_SIZE - 1;
> +	u32 len = PAGE_SIZE;
> +	bool completed_ordered = false;
> +
> +	/*
> +	 * For regular sectorsize == PAGE_SIZE, if the ordered extent covers
> +	 * the page, then it must cover the full page.
> +	 */
> +	ASSERT(ordered->file_offset <= start &&
> +	       ordered->file_offset + ordered->num_bytes > end);
> +	/*
> +	 * IO on this page will never be started, so we need to account
> +	 * for any ordered extents now. Don't clear EXTENT_DELALLOC_NEW
> +	 * here, must leave that up for the ordered extent completion.
> +	 */
> +	if (!inode_evicting)
> +		clear_extent_bit(&inode->io_tree, start, end,
> +				 EXTENT_DELALLOC | EXTENT_LOCKED |
> +				 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 0,
> +				 cached_state);
> +	/*
> +	 * Whoever cleared the private bit is responsible for the
> +	 * finish_ordered_io
> +	 */
> +	if (TestClearPagePrivate2(page)) {
> +		spin_lock_irq(&inode->ordered_tree.lock);
> +		set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
> +		ordered->truncated_len = min(ordered->truncated_len,
> +					     start - ordered->file_offset);
> +		spin_unlock_irq(&inode->ordered_tree.lock);
> +
> +		if (btrfs_dec_test_ordered_pending(inode, &ordered, start, len, 1)) {
> +			btrfs_finish_ordered_io(ordered);
> +			completed_ordered = true;
> +		}
> +	}
> +	btrfs_put_ordered_extent(ordered);
> +	if (!inode_evicting) {
> +		*cached_state = NULL;
> +		lock_extent_bits(&inode->io_tree, start, end, cached_state);
> +	}
> +	return completed_ordered;
> +}
> +
>   static void btrfs_invalidatepage(struct page *page, unsigned int offset,
>   				 unsigned int length)
>   {
>   	struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
>   	struct extent_io_tree *tree = &inode->io_tree;
> -	struct btrfs_ordered_extent *ordered;
>   	struct extent_state *cached_state = NULL;
>   	u64 page_start = page_offset(page);
>   	u64 page_end = page_start + PAGE_SIZE - 1;
> -	u64 start;
> -	u64 end;
> +	u64 cur;
>   	int inode_evicting = inode->vfs_inode.i_state & I_FREEING;
>   	bool found_ordered = false;
>   	bool completed_ordered = false;
> @@ -8387,51 +8442,24 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
>   	if (!inode_evicting)
>   		lock_extent_bits(tree, page_start, page_end, &cached_state);
>   
> -	start = page_start;
> -again:
> -	ordered = btrfs_lookup_ordered_range(inode, start, page_end - start + 1);
> -	if (ordered) {
> -		found_ordered = true;
> -		end = min(page_end,
> -			  ordered->file_offset + ordered->num_bytes - 1);
> -		/*
> -		 * IO on this page will never be started, so we need to account
> -		 * for any ordered extents now. Don't clear EXTENT_DELALLOC_NEW
> -		 * here, must leave that up for the ordered extent completion.
> -		 */
> -		if (!inode_evicting)
> -			clear_extent_bit(tree, start, end,
> -					 EXTENT_DELALLOC |
> -					 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
> -					 EXTENT_DEFRAG, 1, 0, &cached_state);
> -		/*
> -		 * whoever cleared the private bit is responsible
> -		 * for the finish_ordered_io
> -		 */
> -		if (TestClearPagePrivate2(page)) {
> -			spin_lock_irq(&inode->ordered_tree.lock);
> -			set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
> -			ordered->truncated_len = min(ordered->truncated_len,
> -					start - ordered->file_offset);
> -			spin_unlock_irq(&inode->ordered_tree.lock);
> -
> -			if (btrfs_dec_test_ordered_pending(inode, &ordered,
> -							   start,
> -							   end - start + 1, 1)) {
> -				btrfs_finish_ordered_io(ordered);
> -				completed_ordered = true;
> -			}
> -		}
> -		btrfs_put_ordered_extent(ordered);
> -		if (!inode_evicting) {
> -			cached_state = NULL;
> -			lock_extent_bits(tree, start, end,
> -					 &cached_state);
> -		}
> +	cur = page_start;
> +	/* Iterate through all the ordered extents covering the page */
> +	while (cur < page_end) {
> +		struct btrfs_ordered_extent *ordered;
>   
> -		start = end + 1;
> -		if (start < page_end)
> -			goto again;

> +		ordered = btrfs_lookup_ordered_range(inode, cur,
> +				page_end - cur + 1);

  This part is confusing to me. I hope you can clarify.
  btrfs_lookup_ordered_range() also does

                node = tree_search(tree, file_offset + len);

  Essentially the 2nd argument ends up being %page_end + 1 here.

  So wouldn't that end up calling invalidate_ordered_extent()
  beyond %offset + %length?

Thanks, Anand

> +		if (ordered) {
> +			cur = ordered->file_offset + ordered->num_bytes;
> +
> +			found_ordered = true;
> +			completed_ordered = invalidate_ordered_extent(inode,
> +					ordered, page, &cached_state,
> +					inode_evicting);
> +		} else {
> +			/* Exhausted all ordered extents */
> +			break;
> +		}
>   	}
>   
>   	/*
>