All of lore.kernel.org
 help / color / mirror / Atom feed
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Cc: Josef Bacik <josef@toxicpanda.com>
Subject: [PATCH v3 2/4] btrfs: refactor btrfs_buffered_write() into process_one_batch()
Date: Tue, 25 Aug 2020 13:48:06 +0800	[thread overview]
Message-ID: <20200825054808.16241-3-wqu@suse.com> (raw)
In-Reply-To: <20200825054808.16241-1-wqu@suse.com>

Inside btrfs_buffered_write() we had a big chunk of code in the while()
loop.

Refactor this big chunk into process_one_batch(), which will do the main
job.

This refactor doesn't touch the functioniality at all, just make life
easier with more headroom for codes.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
---
 fs/btrfs/file.c | 406 ++++++++++++++++++++++++++----------------------
 1 file changed, 216 insertions(+), 190 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 64f744989697..f906f0910310 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1402,7 +1402,10 @@ static int prepare_uptodate_page(struct inode *inode,
 }
 
 /*
- * this just gets pages into the page cache and locks them down.
+ * This just gets pages into the page cache and locks them down.
+ *
+ * @force_uptodate: Whether to force the first page uptodate.
+ *		    This happens after a short copy on non-uptodate page.
  */
 static noinline int prepare_pages(struct inode *inode, struct page **pages,
 				  size_t num_pages, loff_t pos,
@@ -1639,226 +1642,192 @@ static int calc_nr_pages(loff_t pos, struct iov_iter *iov)
 	return nr_pages;
 }
 
-static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
-					       struct iov_iter *i)
+/*
+ * The helper to copy one batch of data from iov to pages.
+ *
+ * @pages:	The page pointer array we're going to use.
+ * @nrptrs:	The number of page pointers we can utilize
+ * @pos:	The position of this write, in bytes
+ * @iov:	The source of data.
+ * @force_uptodate: Do we need to force the first page uptodate.
+ * 		    true if previous run we hit a short copy.
+ *
+ * Return >0 as the bytes we copied, with iov advanced.
+ * Return 0 if we hit a short copy, and should force the next run to have the
+ * first page uptodate.
+ * Return <0 for error.
+ */
+static ssize_t process_one_batch(struct inode *inode, struct page **pages,
+			int nrptrs, loff_t pos, struct iov_iter *iov,
+			bool force_uptodate)
 {
-	struct file *file = iocb->ki_filp;
-	loff_t pos = iocb->ki_pos;
-	struct inode *inode = file_inode(file);
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct page **pages = NULL;
+	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+	struct extent_state *cached_state = NULL;
 	struct extent_changeset *data_reserved = NULL;
+	size_t offset = offset_in_page(pos);
+	size_t sector_offset;
+	size_t write_bytes = min(iov_iter_count(iov),
+				 nrptrs * (size_t)PAGE_SIZE - offset);
+	size_t num_pages = DIV_ROUND_UP(write_bytes + offset, PAGE_SIZE);
+	size_t reserve_bytes;
+	size_t dirty_pages;
+	size_t copied = 0;
+	size_t dirty_sectors;
+	size_t num_sectors;
+	bool only_release_metadata = false;
 	u64 release_bytes = 0;
 	u64 lockstart;
 	u64 lockend;
-	size_t num_written = 0;
-	int nrptrs;
-	int ret = 0;
-	bool only_release_metadata = false;
-	bool force_page_uptodate = false;
+	int extents_locked;
+	int ret;
 
-	nrptrs = calc_nr_pages(pos, i);
-	pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL);
-	if (!pages)
-		return -ENOMEM;
 
-	while (iov_iter_count(i) > 0) {
-		struct extent_state *cached_state = NULL;
-		size_t offset = offset_in_page(pos);
-		size_t sector_offset;
-		size_t write_bytes = min(iov_iter_count(i),
-					 nrptrs * (size_t)PAGE_SIZE -
-					 offset);
-		size_t num_pages = DIV_ROUND_UP(write_bytes + offset,
-						PAGE_SIZE);
-		size_t reserve_bytes;
-		size_t dirty_pages;
-		size_t copied;
-		size_t dirty_sectors;
-		size_t num_sectors;
-		int extents_locked;
-
-		WARN_ON(num_pages > nrptrs);
-
-		/*
-		 * Fault pages before locking them in prepare_pages
-		 * to avoid recursive lock
-		 */
-		if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) {
-			ret = -EFAULT;
-			break;
-		}
+	/*
+	 * Fault pages before locking them in prepare_pages to avoid recursive
+	 * lock.
+	 */
+	if (unlikely(iov_iter_fault_in_readable(iov, write_bytes))) {
+		ret = -EFAULT;
+		goto out;
+	}
 
-		only_release_metadata = false;
-		sector_offset = pos & (fs_info->sectorsize - 1);
-		reserve_bytes = round_up(write_bytes + sector_offset,
-				fs_info->sectorsize);
+	sector_offset = pos & (fs_info->sectorsize - 1);
+	reserve_bytes = round_up(write_bytes + sector_offset,
+				 fs_info->sectorsize);
 
-		extent_changeset_release(data_reserved);
-		ret = btrfs_check_data_free_space(BTRFS_I(inode),
-						  &data_reserved, pos,
-						  write_bytes);
-		if (ret < 0) {
-			if (btrfs_check_nocow_lock(BTRFS_I(inode), pos,
-						   &write_bytes) > 0) {
-				/*
-				 * For nodata cow case, no need to reserve
-				 * data space.
-				 */
-				only_release_metadata = true;
-				/*
-				 * our prealloc extent may be smaller than
-				 * write_bytes, so scale down.
-				 */
-				num_pages = DIV_ROUND_UP(write_bytes + offset,
-							 PAGE_SIZE);
-				reserve_bytes = round_up(write_bytes +
-							 sector_offset,
-							 fs_info->sectorsize);
-			} else {
-				break;
-			}
+	/* Reserve space for data and metadata */
+	ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved, pos,
+					  write_bytes);
+	if (ret < 0) {
+		if (btrfs_check_nocow_lock(BTRFS_I(inode), pos,
+					   &write_bytes) > 0) {
+			/* Nodata cow case, no need to reserve data space. */
+			only_release_metadata = true;
+			/*
+			 * our prealloc extent may be smaller than
+			 * write_bytes, so scale down.
+			 */
+			num_pages = DIV_ROUND_UP(write_bytes + offset,
+						 PAGE_SIZE);
+			reserve_bytes = round_up(write_bytes + sector_offset,
+						 fs_info->sectorsize);
+		} else {
+			goto out;
 		}
+	}
 
-		WARN_ON(reserve_bytes == 0);
-		ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
-				reserve_bytes);
-		if (ret) {
-			if (!only_release_metadata)
-				btrfs_free_reserved_data_space(BTRFS_I(inode),
-						data_reserved, pos,
-						write_bytes);
-			else
-				btrfs_check_nocow_unlock(BTRFS_I(inode));
-			break;
-		}
+	WARN_ON(reserve_bytes == 0);
+	ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), reserve_bytes);
+	if (ret) {
+		if (!only_release_metadata)
+			btrfs_free_reserved_data_space(BTRFS_I(inode),
+					data_reserved, pos, write_bytes);
+		else
+			btrfs_check_nocow_unlock(BTRFS_I(inode));
+		goto out;
+	}
 
-		release_bytes = reserve_bytes;
-again:
-		/*
-		 * This is going to setup the pages array with the number of
-		 * pages we want, so we don't really need to worry about the
-		 * contents of pages from loop to loop
-		 */
-		ret = prepare_pages(inode, pages, num_pages,
-				    pos, write_bytes,
-				    force_page_uptodate);
-		if (ret) {
-			btrfs_delalloc_release_extents(BTRFS_I(inode),
-						       reserve_bytes);
-			break;
-		}
+	release_bytes = reserve_bytes;
 
-		extents_locked = lock_and_cleanup_extent_if_need(
-				BTRFS_I(inode), pages,
-				num_pages, pos, write_bytes, &lockstart,
-				&lockend, &cached_state);
-		if (extents_locked < 0) {
-			if (extents_locked == -EAGAIN)
-				goto again;
-			btrfs_delalloc_release_extents(BTRFS_I(inode),
-						       reserve_bytes);
-			ret = extents_locked;
-			break;
-		}
+	/* Lock the pages and extent range */
+again:
+	/*
+	 * This is going to setup the pages array with the number of pages we
+	 * want, so we don't really need to worry about the contents of pages
+	 * from loop to loop.
+	 */
+	ret = prepare_pages(inode, pages, num_pages, pos, write_bytes,
+			    force_uptodate);
+	if (ret) {
+		btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
+		goto out;
+	}
 
-		copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
+	extents_locked = lock_and_cleanup_extent_if_need(BTRFS_I(inode), pages,
+			num_pages, pos, write_bytes, &lockstart,
+			&lockend, &cached_state);
+	if (extents_locked < 0) {
+		if (extents_locked == -EAGAIN)
+			goto again;
+		btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
+		ret = extents_locked;
+		goto out;
+	}
 
-		num_sectors = BTRFS_BYTES_TO_BLKS(fs_info, reserve_bytes);
-		dirty_sectors = round_up(copied + sector_offset,
-					fs_info->sectorsize);
-		dirty_sectors = BTRFS_BYTES_TO_BLKS(fs_info, dirty_sectors);
+	/* Copy the data from iov to pages */
+	copied = btrfs_copy_from_user(pos, write_bytes, pages, iov);
 
-		/*
-		 * if we have trouble faulting in the pages, fall
-		 * back to one page at a time
-		 */
-		if (copied < write_bytes)
-			nrptrs = 1;
+	num_sectors = BTRFS_BYTES_TO_BLKS(fs_info, reserve_bytes);
+	dirty_sectors = round_up(copied + sector_offset, fs_info->sectorsize);
+	dirty_sectors = BTRFS_BYTES_TO_BLKS(fs_info, dirty_sectors);
 
-		if (copied == 0) {
-			force_page_uptodate = true;
-			dirty_sectors = 0;
-			dirty_pages = 0;
-		} else {
-			force_page_uptodate = false;
-			dirty_pages = DIV_ROUND_UP(copied + offset,
-						   PAGE_SIZE);
-		}
-
-		if (num_sectors > dirty_sectors) {
-			/* release everything except the sectors we dirtied */
-			release_bytes -= dirty_sectors <<
-						fs_info->sb->s_blocksize_bits;
-			if (only_release_metadata) {
-				btrfs_delalloc_release_metadata(BTRFS_I(inode),
-							release_bytes, true);
-			} else {
-				u64 __pos;
+	if (copied == 0) {
+		dirty_sectors = 0;
+		dirty_pages = 0;
+	} else {
+		dirty_pages = DIV_ROUND_UP(copied + offset, PAGE_SIZE);
+	}
 
-				__pos = round_down(pos,
-						   fs_info->sectorsize) +
-					(dirty_pages << PAGE_SHIFT);
-				btrfs_delalloc_release_space(BTRFS_I(inode),
-						data_reserved, __pos,
+	if (num_sectors > dirty_sectors) {
+		/* release everything except the sectors we dirtied */
+		release_bytes -= dirty_sectors << fs_info->sb->s_blocksize_bits;
+		if (only_release_metadata) {
+			btrfs_delalloc_release_metadata(BTRFS_I(inode),
 						release_bytes, true);
-			}
-		}
-
-		release_bytes = round_up(copied + sector_offset,
-					fs_info->sectorsize);
-
-		if (copied > 0)
-			ret = btrfs_dirty_pages(BTRFS_I(inode), pages,
-						dirty_pages, pos, copied,
-						&cached_state);
-
-		/*
-		 * If we have not locked the extent range, because the range's
-		 * start offset is >= i_size, we might still have a non-NULL
-		 * cached extent state, acquired while marking the extent range
-		 * as delalloc through btrfs_dirty_pages(). Therefore free any
-		 * possible cached extent state to avoid a memory leak.
-		 */
-		if (extents_locked)
-			unlock_extent_cached(&BTRFS_I(inode)->io_tree,
-					     lockstart, lockend, &cached_state);
-		else
-			free_extent_state(cached_state);
+		} else {
+			u64 __pos;
 
-		btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
-		if (ret) {
-			btrfs_drop_pages(pages, num_pages);
-			break;
+			__pos = round_down(pos, fs_info->sectorsize) +
+				(dirty_pages << PAGE_SHIFT);
+			btrfs_delalloc_release_space(BTRFS_I(inode),
+					data_reserved, __pos,
+					release_bytes, true);
 		}
+	}
 
-		release_bytes = 0;
-		if (only_release_metadata)
-			btrfs_check_nocow_unlock(BTRFS_I(inode));
+	release_bytes = round_up(copied + sector_offset, fs_info->sectorsize);
 
-		if (only_release_metadata && copied > 0) {
-			lockstart = round_down(pos,
-					       fs_info->sectorsize);
-			lockend = round_up(pos + copied,
-					   fs_info->sectorsize) - 1;
+	if (copied > 0)
+		ret = btrfs_dirty_pages(BTRFS_I(inode), pages, dirty_pages, pos,
+				copied, &cached_state);
 
-			set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
-				       lockend, EXTENT_NORESERVE, NULL,
-				       NULL, GFP_NOFS);
-		}
+	/*
+	 * If we have not locked the extent range, because the range's
+	 * start offset is >= i_size, we might still have a non-NULL
+	 * cached extent state, acquired while marking the extent range
+	 * as delalloc through btrfs_dirty_pages(). Therefore free any
+	 * possible cached extent state to avoid a memory leak.
+	 */
+	if (extents_locked)
+		unlock_extent_cached(&BTRFS_I(inode)->io_tree,
+				     lockstart, lockend, &cached_state);
+	else
+		free_extent_state(cached_state);
 
+	btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
+	if (ret) {
 		btrfs_drop_pages(pages, num_pages);
+		goto out;
+	}
 
-		cond_resched();
+	release_bytes = 0;
+	if (only_release_metadata)
+		btrfs_check_nocow_unlock(BTRFS_I(inode));
 
-		balance_dirty_pages_ratelimited(inode->i_mapping);
+	if (only_release_metadata && copied > 0) {
+		lockstart = round_down(pos, fs_info->sectorsize);
+		lockend = round_up(pos + copied, fs_info->sectorsize) - 1;
 
-		pos += copied;
-		num_written += copied;
+		set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+				EXTENT_NORESERVE, NULL, NULL, GFP_NOFS);
 	}
 
-	kfree(pages);
+	btrfs_drop_pages(pages, num_pages);
+
+	cond_resched();
 
+	balance_dirty_pages_ratelimited(inode->i_mapping);
+out:
 	if (release_bytes) {
 		if (only_release_metadata) {
 			btrfs_check_nocow_unlock(BTRFS_I(inode));
@@ -1872,7 +1841,64 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
 		}
 	}
 
-	extent_changeset_free(data_reserved);
+	if (!copied && ret < 0)
+		return ret;
+	return copied;
+}
+
+static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
+					       struct iov_iter *i)
+{
+	struct file *file = iocb->ki_filp;
+	loff_t pos = iocb->ki_pos;
+	struct inode *inode = file_inode(file);
+	struct page **pages = NULL;
+	size_t num_written = 0;
+	int nrptrs;
+	int orig_nrptrs;
+	int ret = 0;
+	bool force_page_uptodate = false;
+
+	orig_nrptrs = calc_nr_pages(pos, i);
+	nrptrs = orig_nrptrs;
+	pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	while (iov_iter_count(i) > 0) {
+		ssize_t copied;
+
+		copied = process_one_batch(inode, pages, nrptrs, pos, i,
+					   force_page_uptodate);
+		if (copied < 0) {
+			ret = copied;
+			break;
+		}
+
+		if (copied == 0) {
+			/*
+			 * We had a short copy on even the first page, need to
+			 * force the next page uptodate and fall back to page
+			 * by page pace.
+			 */
+			nrptrs = 1;
+			force_page_uptodate = true;
+		} else {
+			/*
+			 * Copy finished without problem. No longer need to
+			 * force next page uptodate, and revert to regular
+			 * multi-page pace.
+			 */
+			nrptrs = orig_nrptrs;
+			force_page_uptodate = false;
+		}
+
+		pos += copied;
+		num_written += copied;
+	}
+
+	kfree(pages);
+
 	return num_written ? num_written : ret;
 }
 
-- 
2.28.0


  parent reply	other threads:[~2020-08-25  5:48 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-25  5:48 [PATCH v3 0/4] btrfs: basic refactor of btrfs_buffered_write() Qu Wenruo
2020-08-25  5:48 ` [PATCH v3 1/4] btrfs: refactor @nrptrs calculation " Qu Wenruo
2020-08-25  5:48 ` Qu Wenruo [this message]
2020-08-25  5:48 ` [PATCH v3 3/4] btrfs: remove the again: tag in process_one_batch() Qu Wenruo
2020-08-25  5:48 ` [PATCH v3 4/4] btrfs: avoid allocating unnecessary page pointers Qu Wenruo
2020-08-25  7:46   ` kernel test robot
2020-08-25  7:46     ` kernel test robot
2020-08-25  7:57   ` kernel test robot
2020-08-25  7:57     ` kernel test robot
2020-08-26 12:31   ` kernel test robot
2020-08-26 12:31     ` kernel test robot
2020-08-27  8:56   ` [btrfs] a73ab37eba: last_state.is_incomplete_run kernel test robot
2020-08-27  8:56     ` kernel test robot
2020-08-27  8:56     ` [LTP] " kernel test robot
2020-08-25 11:44 ` [PATCH v3 0/4] btrfs: basic refactor of btrfs_buffered_write() Christoph Hellwig
2020-08-25 13:32   ` Goldwyn Rodrigues

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200825054808.16241-3-wqu@suse.com \
    --to=wqu@suse.com \
    --cc=josef@toxicpanda.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.