linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH v2 17/19] btrfs: implement subpage metadata read and its endio function
Date: Tue, 15 Sep 2020 13:35:30 +0800	[thread overview]
Message-ID: <20200915053532.63279-18-wqu@suse.com> (raw)
In-Reply-To: <20200915053532.63279-1-wqu@suse.com>

For subpage metadata read, since we're completely relying on io tree
other than page bits, its read submission and endio function is
different from the original page size.

For submission part:
- Do extent locking/waiting
  Instead of page locking/waiting, since we don't rely on page bits anymore.

- Submit extent page directly
  To simply the process, as all the metadata read is always contained in
  one page.

For endio part:
- Do extent locking/waiting

This behavior has a small problem that, extent locking/waiting are all
going to allocate memory, thus they can all fail.

Currently we're using GFP_ATOMIC, but still we may hit ENOSPC someday.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/disk-io.c   |  89 ++++++++++++++++++++++++++++++++++++++
 fs/btrfs/extent_io.c | 101 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 189 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 2ef35eb7a6e1..1de5a0fef2f5 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -645,6 +645,92 @@ static int btrfs_check_extent_buffer(struct extent_buffer *eb)
 	return ret;
 }
 
+static int btree_read_subpage_endio_hook(struct page *page, u64 start, u64 end,
+					 int mirror)
+{
+	struct btrfs_fs_info *fs_info = page_to_fs_info(page);
+	struct extent_io_tree *io_tree = info_to_btree_io_tree(fs_info);
+	struct extent_buffer *eb;
+	int reads_done;
+	int ret = 0;
+
+	if (!IS_ALIGNED(start, fs_info->sectorsize) ||
+	    !IS_ALIGNED(end - start + 1, fs_info->sectorsize) ||
+	    !IS_ALIGNED(end - start + 1, fs_info->nodesize)) {
+		WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+		btrfs_err(fs_info, "invalid tree read bytenr");
+		return -EUCLEAN;
+	}
+
+	/*
+	 * We don't allow bio merge for subpage metadata read, so we should
+	 * only get one eb for each endio hook.
+	 */
+	ASSERT(end == start + fs_info->nodesize - 1);
+	ASSERT(PagePrivate(page));
+
+	rcu_read_lock();
+	eb = radix_tree_lookup(&fs_info->buffer_radix,
+			       start / fs_info->sectorsize);
+	rcu_read_unlock();
+
+	/*
+	 * When we are reading one tree block, eb must have been
+	 * inserted into the radix tree. If not something is wrong.
+	 */
+	if (!eb) {
+		WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+		btrfs_err(fs_info,
+			"can't find extent buffer for bytenr %llu",
+			start);
+		return -EUCLEAN;
+	}
+	/*
+	 * The pending IO might have been the only thing that kept
+	 * this buffer in memory.  Make sure we have a ref for all
+	 * this other checks
+	 */
+	atomic_inc(&eb->refs);
+
+	reads_done = atomic_dec_and_test(&eb->io_pages);
+	/* Subpage read must finish in page read */
+	ASSERT(reads_done);
+
+	eb->read_mirror= mirror;
+	if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) {
+		ret = -EIO;
+		goto err;
+	}
+	ret = btrfs_check_extent_buffer(eb);
+	if (ret < 0)
+		goto err;
+
+	if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
+		btree_readahead_hook(eb, ret);
+
+	set_extent_buffer_uptodate(eb);
+
+	free_extent_buffer(eb);
+
+	/*
+	 * Since we don't use PageLocked() but extent_io_tree lock to lock the
+	 * range, we need to unlock the range here.
+	 */
+	unlock_extent(io_tree, start, end);
+	return ret;
+err:
+	/*
+	 * our io error hook is going to dec the io pages
+	 * again, we have to make sure it has something to
+	 * decrement
+	 */
+	atomic_inc(&eb->io_pages);
+	clear_extent_buffer_uptodate(eb);
+	free_extent_buffer(eb);
+	unlock_extent(io_tree, start, end);
+	return ret;
+}
+
 static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 				      u64 phy_offset, struct page *page,
 				      u64 start, u64 end, int mirror)
@@ -653,6 +739,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 	int ret = 0;
 	bool reads_done;
 
+	if (page_to_fs_info(page)->sectorsize < PAGE_SIZE)
+		return btree_read_subpage_endio_hook(page, start, end, mirror);
+
 	/* Metadata pages that goes through IO should all have private set */
 	ASSERT(PagePrivate(page) && page->private);
 	eb = (struct extent_buffer *)page->private;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2af6786e6ab4..75437a55a986 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2923,7 +2923,13 @@ static void end_bio_extent_readpage(struct bio *bio)
 			ClearPageUptodate(page);
 			SetPageError(page);
 		}
-		unlock_page(page);
+
+		/*
+		 * For subpage metadata read, we don't have page locked, but
+		 * rely on EXTENT_LOCKED bit to handle lock completely.
+		 */
+		if (!(fs_info->sectorsize < PAGE_SIZE && !data_inode))
+			unlock_page(page);
 		offset += len;
 
 		if (unlikely(!uptodate)) {
@@ -3064,6 +3070,14 @@ static int submit_extent_page(unsigned int opf,
 		else
 			contig = bio_end_sector(bio) == sector;
 
+		/*
+		 * For subpage metadata read, never merge request, so that
+		 * we get endio hook called on each metadata read.
+		 */
+		if (page_to_fs_info(page)->sectorsize < PAGE_SIZE &&
+		    tree->owner == IO_TREE_BTREE_IO)
+			ASSERT(force_bio_submit);
+
 		ASSERT(tree->ops);
 		if (btrfs_bio_fits_in_stripe(page, page_size, bio, bio_flags))
 			can_merge = false;
@@ -5593,6 +5607,15 @@ void clear_extent_buffer_uptodate(struct extent_buffer *eb)
 	int num_pages;
 
 	clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+	/* For subpage eb, we don't set page bits, but extent_io_tree bits */
+	if (eb->fs_info->sectorsize < PAGE_SIZE) {
+		struct extent_io_tree *io_tree =
+			info_to_btree_io_tree(eb->fs_info);
+
+		clear_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
+				  EXTENT_UPTODATE);
+		return;
+	}
 	num_pages = num_extent_pages(eb);
 	for (i = 0; i < num_pages; i++) {
 		page = eb->pages[i];
@@ -5608,6 +5631,20 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
 	int num_pages;
 
 	set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+
+	/*
+	 * For subpage size, we ignore page bits, but need to set io tree range
+	 * uptodate
+	 */
+	if (eb->fs_info->sectorsize < PAGE_SIZE) {
+		struct extent_io_tree *io_tree =
+			info_to_btree_io_tree(eb->fs_info);
+
+		set_extent_uptodate(io_tree, eb->start, eb->start + eb->len - 1,
+				    NULL, GFP_ATOMIC);
+		return;
+	}
+
 	num_pages = num_extent_pages(eb);
 	for (i = 0; i < num_pages; i++) {
 		page = eb->pages[i];
@@ -5615,6 +5652,65 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
 	}
 }
 
+static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
+				      int mirror_num)
+{
+	struct btrfs_fs_info *fs_info = eb->fs_info;
+	struct extent_io_tree *io_tree = info_to_btree_io_tree(fs_info);
+	struct page *page = eb->pages[0];
+	struct bio *bio = NULL;
+	int ret = 0;
+
+	/* We don't lock page, but only extent_io_tree for btree inode*/
+	if (wait == WAIT_NONE) {
+		ret = try_lock_extent(io_tree, eb->start,
+				      eb->start + eb->len - 1);
+		if (ret == 0)
+			return ret;
+	} else {
+		ret = lock_extent(io_tree, eb->start, eb->start + eb->len - 1);
+		if (ret < 0)
+			return ret;
+	}
+
+	ret = 0;
+	atomic_set(&eb->io_pages, 1);
+	if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags) ||
+	    test_range_bit(io_tree, eb->start, eb->start + eb->len - 1,
+			   EXTENT_UPTODATE, 1, NULL)) {
+		unlock_extent(io_tree, eb->start, eb->start + eb->len - 1);
+		return ret;
+	}
+
+	ret = submit_extent_page(REQ_OP_READ | REQ_META, NULL, page, eb->start,
+				 eb->len, eb->start - page_offset(page), &bio,
+				 end_bio_extent_readpage, mirror_num, 0, 0,
+				 true);
+	if (ret) {
+		/*
+		 * In the endio function, if we hit something wrong we will
+		 * increase the io_pages, so here we need to decrease it for error
+		 * path.
+		 */
+		atomic_dec(&eb->io_pages);
+	}
+	if (bio) {
+		int tmp;
+
+		tmp = submit_one_bio(bio, mirror_num, 0);
+		if (tmp < 0)
+			return tmp;
+	}
+	if (ret || wait != WAIT_COMPLETE)
+		return ret;
+
+	wait_extent_bit(io_tree, eb->start, eb->start + eb->len - 1,
+			EXTENT_LOCKED);
+	if (!test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
+		ret = -EIO;
+	return ret;
+}
+
 int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
 {
 	int i;
@@ -5631,6 +5727,9 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
 	if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
 		return 0;
 
+	if (eb->fs_info->sectorsize < PAGE_SIZE)
+		return read_extent_buffer_subpage(eb, wait, mirror_num);
+
 	num_pages = num_extent_pages(eb);
 	for (i = 0; i < num_pages; i++) {
 		page = eb->pages[i];
-- 
2.28.0


  parent reply	other threads:[~2020-09-15  5:36 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-15  5:35 [PATCH v2 00/19] btrfs: add read-only support for subpage sector size Qu Wenruo
2020-09-15  5:35 ` [PATCH v2 01/19] btrfs: extent-io-tests: remove invalid tests Qu Wenruo
2020-09-15  5:35 ` [PATCH v2 02/19] btrfs: remove the unnecessary parameter @start and @len for check_data_csum() Qu Wenruo
2020-09-15  8:39   ` Johannes Thumshirn
2020-09-15  5:35 ` [PATCH v2 03/19] btrfs: calculate inline extent buffer page size based on page size Qu Wenruo
2020-09-15  8:35   ` Nikolay Borisov
2020-09-15 10:05     ` Qu Wenruo
2020-09-15  8:40   ` Johannes Thumshirn
2020-09-15  5:35 ` [PATCH v2 04/19] btrfs: remove the open-code to read disk-key Qu Wenruo
2020-09-15  8:36   ` Nikolay Borisov
2020-09-15  8:40   ` Johannes Thumshirn
2020-09-16 16:01   ` David Sterba
2020-09-17  8:02     ` Qu Wenruo
2020-09-17 12:37       ` David Sterba
2020-09-17 13:15         ` Qu Wenruo
2020-09-17 22:41           ` David Sterba
2020-09-17 23:26             ` Qu Wenruo
2020-09-15  5:35 ` [PATCH v2 05/19] btrfs: make btrfs_fs_info::buffer_radix to take sector size devided values Qu Wenruo
2020-09-15  8:27   ` Johannes Thumshirn
2020-09-15 10:04     ` Qu Wenruo
2020-09-15 10:12       ` Johannes Thumshirn
2020-09-15 17:40   ` kernel test robot
2020-09-15  5:35 ` [PATCH v2 06/19] btrfs: don't allow tree block to cross page boundary for subpage support Qu Wenruo
2020-09-15  8:37   ` Nikolay Borisov
2020-09-15 10:06     ` Qu Wenruo
2020-09-15  8:44   ` Johannes Thumshirn
2020-09-15  5:35 ` [PATCH v2 07/19] btrfs: update num_extent_pages() to support subpage sized extent buffer Qu Wenruo
2020-09-15  8:42   ` Johannes Thumshirn
2020-09-15 10:07     ` Qu Wenruo
2020-09-15 10:12       ` Johannes Thumshirn
2020-09-15 10:07     ` Qu Wenruo
2020-09-15  5:35 ` [PATCH v2 08/19] btrfs: handle sectorsize < PAGE_SIZE case for extent buffer accessors Qu Wenruo
2020-09-15  5:35 ` [PATCH v2 09/19] btrfs: make csum_tree_block() handle sectorsize smaller than page size Qu Wenruo
2020-09-15  8:47   ` Johannes Thumshirn
2020-09-15  5:35 ` [PATCH v2 10/19] btrfs: add assert_spin_locked() for attach_extent_buffer_page() Qu Wenruo
2020-09-15  8:52   ` Johannes Thumshirn
2020-09-15  5:35 ` [PATCH v2 11/19] btrfs: extract the extent buffer verification from btree_readpage_end_io_hook() Qu Wenruo
2020-09-15  5:35 ` [PATCH v2 12/19] btrfs: extent_io: only require sector size alignment for page read Qu Wenruo
2020-09-15  5:35 ` [PATCH v2 13/19] btrfs: make btrfs_readpage_end_io_hook() follow sector size Qu Wenruo
2020-09-15  5:35 ` [PATCH v2 14/19] btrfs: make btree inode io_tree has its special owner Qu Wenruo
2020-09-16  9:28   ` Johannes Thumshirn
2020-09-16 16:06   ` David Sterba
2020-09-17  0:02     ` Qu Wenruo
2020-09-17 12:50       ` David Sterba
2020-09-18  8:18         ` Qu Wenruo
2020-09-22 14:06           ` David Sterba
2020-09-22 14:14   ` David Sterba
2020-09-15  5:35 ` [PATCH v2 15/19] btrfs: don't set extent_io_tree bits for btree inode at endio time Qu Wenruo
2020-09-15  5:35 ` [PATCH v2 16/19] btrfs: use extent_io_tree to handle subpage extent buffer allocation Qu Wenruo
2020-09-15  5:35 ` Qu Wenruo [this message]
2020-09-16  8:47   ` [PATCH v2 17/19] btrfs: implement subpage metadata read and its endio function kernel test robot
2020-09-15  5:35 ` [PATCH v2 18/19] btrfs: implement btree_readpage() and try_release_extent_buffer() for subpage Qu Wenruo
2020-09-15  5:35 ` [PATCH v2 19/19] btrfs: allow RO mount of 4K sector size fs on 64K page system Qu Wenruo
2020-09-16  1:35 ` [PATCH v2 00/19] btrfs: add read-only support for subpage sector size Qu Wenruo
2020-09-16 16:18 ` Neal Gompa
2020-09-17  0:03   ` Qu Wenruo
2020-09-17  0:13     ` Neal Gompa
2020-09-17  0:24       ` Qu Wenruo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200915053532.63279-18-wqu@suse.com \
    --to=wqu@suse.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).