All of lore.kernel.org
 help / color / mirror / Atom feed
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 15/17] btrfs: introduce subpage_eb_mapping for extent buffers
Date: Tue,  8 Sep 2020 15:52:28 +0800	[thread overview]
Message-ID: <20200908075230.86856-16-wqu@suse.com> (raw)
In-Reply-To: <20200908075230.86856-1-wqu@suse.com>

One of the design blockage for subpage support is the btree inode
page::private mapping.

Currently page::private for btree inode is a pointer to extent buffer
who owns this page.
This is fine for sectorsize == PAGE_SIZE case, but not suitable for
subpage size support, as in that case one page can hold multiple tree
blocks.

So to support subpage, here we introduce a new structure,
subpage_eb_mapping, to record how many extent buffers are referring to
one page.

It uses a bitmap (at most 16 bits used) to record tree blocks, and a
extent buffer pointers array (at most 16 too) to record the owners.

This patch will modify the following functions to add subpage support
using subpage_eb_mapping structure:
- attach_extent_buffer_page()
- detach_extent_buffer_page()
- grab_extent_buffer_from_page()
- try_release_extent_buffer()

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/extent_io.c | 221 ++++++++++++++++++++++++++++++++++++++++---
 fs/btrfs/extent_io.h |   3 +
 2 files changed, 212 insertions(+), 12 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a83b63ecc5f8..87b3bb781532 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -29,6 +29,34 @@ static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
 static struct bio_set btrfs_bioset;
 
+/* Upper limit of how many extent buffers can be stored in one page */
+#define SUBPAGE_NR_EXTENT_BUFFERS (SZ_64K / SZ_4K)
+/*
+ * Structure for subpage support, recording the page -> extent buffer mapping
+ *
+ * For subpage support, one 64K page can contain several tree blocks, other than
+ * 1:1 page <-> extent buffer mapping from sectorsize == PAGE_SIZE case.
+ */
+struct subpage_eb_mapping {
+	/*
+	 * Which range has extent buffer.
+	 *
+	 * One bit represents one sector, bit nr represents the offset in page.
+	 * At most 16 bits are utilized.
+	 */
+	unsigned long bitmap;
+
+	/* We only support 64K PAGE_SIZE system to mount 4K sectorsize fs */
+	struct extent_buffer *buffers[SUBPAGE_NR_EXTENT_BUFFERS];
+};
+
+struct btrfs_fs_info *page_to_fs_info(struct page *page)
+{
+	ASSERT(page && page->mapping);
+
+	return BTRFS_I(page->mapping->host)->root->fs_info;
+}
+
 static inline bool extent_state_in_tree(const struct extent_state *state)
 {
 	return !RB_EMPTY_NODE(&state->rb_node);
@@ -3098,12 +3126,50 @@ static int submit_extent_page(unsigned int opf,
 	return ret;
 }
 
+static void attach_subpage_mapping(struct extent_buffer *eb,
+				   struct page *page,
+				   struct subpage_eb_mapping *mapping)
+{
+	u32 sectorsize = eb->fs_info->sectorsize;
+	u32 nodesize = eb->fs_info->nodesize;
+	int index_start = (eb->start - page_offset(page)) / sectorsize;
+	int nr_bits = nodesize / sectorsize;
+	int i;
+
+	ASSERT(mapping);
+	if (!PagePrivate(page)) {
+		/* Attach mapping to page::private and initialize */
+		memset(mapping, 0, sizeof(*mapping));
+		attach_page_private(page, mapping);
+	} else {
+		/* Use the existing page::private as mapping */
+		kfree(mapping);
+		mapping = (struct subpage_eb_mapping *) page->private;
+	}
+
+	/* Set the bitmap and pointers */
+	for (i = index_start; i < index_start + nr_bits; i++) {
+		set_bit(i, &mapping->bitmap);
+		mapping->buffers[i] = eb;
+	}
+}
+
 static void attach_extent_buffer_page(struct extent_buffer *eb,
-				      struct page *page)
+				      struct page *page,
+				      struct subpage_eb_mapping *mapping)
 {
+	bool subpage = (eb->fs_info->sectorsize < PAGE_SIZE);
 	if (page->mapping)
 		assert_spin_locked(&page->mapping->private_lock);
 
+	if (subpage && page->mapping) {
+		attach_subpage_mapping(eb, page, mapping);
+		return;
+	}
+	/*
+	 * Anonymous page and sectorsize == PAGE_SIZE uses page::private as a
+	 * pointer to eb directly.
+	 */
 	if (!PagePrivate(page))
 		attach_page_private(page, eb);
 	else
@@ -4928,16 +4994,61 @@ int extent_buffer_under_io(const struct extent_buffer *eb)
 		test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
 }
 
+static void detach_subpage_mapping(struct extent_buffer *eb, struct page *page)
+{
+	struct subpage_eb_mapping *mapping;
+	u32 sectorsize = eb->fs_info->sectorsize;
+	int start_index;
+	int nr_bits = eb->fs_info->nodesize / sectorsize;
+	int i;
+
+	/* Page already detached */
+	if (!PagePrivate(page))
+		return;
+
+	assert_spin_locked(&page->mapping->private_lock);
+	ASSERT(eb->start >= page_offset(page) &&
+	       eb->start < page_offset(page) + PAGE_SIZE);
+
+	mapping = (struct subpage_eb_mapping *)page->private;
+	start_index = (eb->start - page_offset(page)) / sectorsize;
+
+	for (i = start_index; i < start_index + nr_bits; i++) {
+		if (test_bit(i, &mapping->bitmap) &&
+		    mapping->buffers[i] == eb) {
+			clear_bit(i, &mapping->bitmap);
+			mapping->buffers[i] = NULL;
+		}
+	}
+
+	/* Are we the last owner ? */
+	if (mapping->bitmap == 0) {
+		kfree(mapping);
+		detach_page_private(page);
+		/* One for the first time allocated the page */
+		put_page(page);
+	}
+}
+
 static void detach_extent_buffer_page(struct extent_buffer *eb,
 				      struct page *page)
 {
 	bool mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
+	bool subpage = (eb->fs_info->sectorsize < PAGE_SIZE);
 
 	if (!page)
 		return;
 
 	if (mapped)
 		spin_lock(&page->mapping->private_lock);
+
+	if (subpage && page->mapping) {
+		detach_subpage_mapping(eb, page);
+		if (mapped)
+			spin_unlock(&page->mapping->private_lock);
+		return;
+	}
+
 	if (PagePrivate(page) && page->private == (unsigned long)eb) {
 		BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
 		BUG_ON(PageDirty(page));
@@ -5035,7 +5146,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src)
 			btrfs_release_extent_buffer(new);
 			return NULL;
 		}
-		attach_extent_buffer_page(new, p);
+		attach_extent_buffer_page(new, p, NULL);
 		WARN_ON(PageDirty(p));
 		SetPageUptodate(p);
 		new->pages[i] = p;
@@ -5243,8 +5354,31 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
  * The function here is to ensure we have proper locking and detect such race
  * so we won't allocating an eb twice.
  */
-static struct extent_buffer *grab_extent_buffer_from_page(struct page *page)
+static struct extent_buffer *grab_extent_buffer_from_page(struct page *page,
+							  u64 bytenr)
 {
+	struct btrfs_fs_info *fs_info = page_to_fs_info(page);
+	bool subpage = (fs_info->sectorsize < PAGE_SIZE);
+
+	if (!PagePrivate(page))
+		return NULL;
+
+	if (subpage) {
+		struct subpage_eb_mapping *mapping;
+		u32 sectorsize = fs_info->sectorsize;
+		int start_index;
+
+		ASSERT(bytenr >= page_offset(page) &&
+		       bytenr < page_offset(page) + PAGE_SIZE);
+
+		start_index = (bytenr - page_offset(page)) / sectorsize;
+		mapping = (struct subpage_eb_mapping *)page->private;
+
+		if (test_bit(start_index, &mapping->bitmap))
+			return mapping->buffers[start_index];
+		return NULL;
+	}
+
 	/*
 	 * For PAGE_SIZE == sectorsize case, a btree_inode page should have its
 	 * private pointer as extent buffer who owns this page.
@@ -5263,6 +5397,8 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 	struct extent_buffer *exists = NULL;
 	struct page *p;
 	struct address_space *mapping = fs_info->btree_inode->i_mapping;
+	struct subpage_eb_mapping *subpage_mapping = NULL;
+	bool subpage = (fs_info->sectorsize < PAGE_SIZE);
 	int uptodate = 1;
 	int ret;
 
@@ -5286,6 +5422,14 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 	if (!eb)
 		return ERR_PTR(-ENOMEM);
 
+	if (subpage) {
+		subpage_mapping = kmalloc(sizeof(*subpage_mapping), GFP_NOFS);
+		if (!mapping) {
+			exists = ERR_PTR(-ENOMEM);
+			goto free_eb;
+		}
+	}
+
 	num_pages = num_extent_pages(eb);
 	for (i = 0; i < num_pages; i++, index++) {
 		p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
@@ -5296,7 +5440,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 
 		spin_lock(&mapping->private_lock);
 		if (PagePrivate(p)) {
-			exists = grab_extent_buffer_from_page(p);
+			exists = grab_extent_buffer_from_page(p, start);
 			if (exists && atomic_inc_not_zero(&exists->refs)) {
 				spin_unlock(&mapping->private_lock);
 				unlock_page(p);
@@ -5306,16 +5450,19 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 			}
 			exists = NULL;
 
-			/*
-			 * Do this so attach doesn't complain and we need to
-			 * drop the ref the old guy had.
-			 */
-			ClearPagePrivate(p);
-			WARN_ON(PageDirty(p));
-			put_page(p);
+			if (!subpage) {
+				/*
+				 * Do this so attach doesn't complain and we
+				 * need to drop the ref the old guy had.
+				 */
+				ClearPagePrivate(p);
+				WARN_ON(PageDirty(p));
+				put_page(p);
+			}
 		}
-		attach_extent_buffer_page(eb, p);
+		attach_extent_buffer_page(eb, p, subpage_mapping);
 		spin_unlock(&mapping->private_lock);
+		subpage_mapping = NULL;
 		WARN_ON(PageDirty(p));
 		eb->pages[i] = p;
 		if (!PageUptodate(p))
@@ -5365,6 +5512,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 
 free_eb:
 	WARN_ON(!atomic_dec_and_test(&eb->refs));
+	kfree(subpage_mapping);
 	for (i = 0; i < num_pages; i++) {
 		if (eb->pages[i])
 			unlock_page(eb->pages[i]);
@@ -6158,8 +6306,49 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
 	}
 }
 
+int try_release_subpage_ebs(struct page *page)
+{
+	struct subpage_eb_mapping *mapping;
+	int i;
+
+	assert_spin_locked(&page->mapping->private_lock);
+	if (!PagePrivate(page))
+		return 1;
+
+	mapping = (struct subpage_eb_mapping *)page->private;
+	for (i = 0; i < SUBPAGE_NR_EXTENT_BUFFERS && PagePrivate(page); i++) {
+		struct btrfs_fs_info *fs_info = page_to_fs_info(page);
+		struct extent_buffer *eb;
+		int ret;
+
+		if (!test_bit(i, &mapping->bitmap))
+			continue;
+
+		eb = mapping->buffers[i];
+		spin_unlock(&page->mapping->private_lock);
+		spin_lock(&eb->refs_lock);
+		ret = release_extent_buffer(eb);
+		spin_lock(&page->mapping->private_lock);
+
+		/*
+		 * Extent buffer can't be freed yet, must jump to next slot
+		 * and avoid calling release_extent_buffer().
+		 */
+		if (!ret)
+			i += (fs_info->nodesize / fs_info->sectorsize - 1);
+	}
+	/*
+	 * detach_subpage_mapping() from release_extent_buffer() has detached
+	 * all ebs from this page. All related ebs are released.
+	 */
+	if (!PagePrivate(page))
+		return 1;
+	return 0;
+}
+
 int try_release_extent_buffer(struct page *page)
 {
+	bool subpage = (page_to_fs_info(page)->sectorsize < PAGE_SIZE);
 	struct extent_buffer *eb;
 
 	/*
@@ -6172,6 +6361,14 @@ int try_release_extent_buffer(struct page *page)
 		return 1;
 	}
 
+	if (subpage) {
+		int ret;
+
+		ret = try_release_subpage_ebs(page);
+		spin_unlock(&page->mapping->private_lock);
+		return ret;
+	}
+
 	eb = (struct extent_buffer *)page->private;
 	BUG_ON(!eb);
 
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index e16c5449ba48..6593b6883438 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -184,6 +184,9 @@ static inline int extent_compress_type(unsigned long bio_flags)
 	return bio_flags >> EXTENT_BIO_FLAG_SHIFT;
 }
 
+/* Unable to inline it due to the requirement for both ASSERT() and BTRFS_I() */
+struct btrfs_fs_info *page_to_fs_info(struct page *page);
+
 struct extent_map_tree;
 
 typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
-- 
2.28.0


  parent reply	other threads:[~2020-09-08  7:53 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-08  7:52 [PATCH 00/17] btrfs: add read-only support for subpage sector size Qu Wenruo
2020-09-08  7:52 ` [PATCH 01/17] btrfs: extent-io-tests: remove invalid tests Qu Wenruo
2020-09-09 12:26   ` Nikolay Borisov
2020-09-09 13:06     ` Qu Wenruo
2020-09-08  7:52 ` [PATCH 02/17] btrfs: calculate inline extent buffer page size based on page size Qu Wenruo
2020-09-11  9:56   ` Nikolay Borisov
2020-09-11 10:13     ` Qu Wenruo
2020-09-08  7:52 ` [PATCH 03/17] btrfs: remove the open-code to read disk-key Qu Wenruo
2020-09-11 10:07   ` Nikolay Borisov
2020-09-08  7:52 ` [PATCH 04/17] btrfs: make btrfs_fs_info::buffer_radix to take sector size devided values Qu Wenruo
2020-09-08 18:03   ` kernel test robot
2020-09-11 10:11   ` Nikolay Borisov
2020-09-11 10:15     ` Qu Wenruo
2020-09-08  7:52 ` [PATCH 05/17] btrfs: don't allow tree block to cross page boundary for subpage support Qu Wenruo
2020-09-11 10:26   ` Nikolay Borisov
2020-09-11 11:36     ` Qu Wenruo
2020-09-11 12:08       ` Nikolay Borisov
2020-09-08  7:52 ` [PATCH 06/17] btrfs: handle sectorsize < PAGE_SIZE case for extent buffer accessors Qu Wenruo
2020-09-08  7:52 ` [PATCH 07/17] btrfs: make csum_tree_block() handle sectorsize smaller than page size Qu Wenruo
2020-09-11 11:10   ` Nikolay Borisov
2020-09-08  7:52 ` [PATCH 08/17] btrfs: refactor how we extract extent buffer from page for alloc_extent_buffer() Qu Wenruo
2020-09-11 11:14   ` Nikolay Borisov
2020-09-08  7:52 ` [PATCH 09/17] btrfs: refactor btrfs_release_extent_buffer_pages() Qu Wenruo
2020-09-11 11:17   ` Nikolay Borisov
2020-09-11 11:39     ` Qu Wenruo
2020-09-08  7:52 ` [PATCH 10/17] btrfs: add assert_spin_locked() for attach_extent_buffer_page() Qu Wenruo
2020-09-11 11:22   ` Nikolay Borisov
2020-09-08  7:52 ` [PATCH 11/17] btrfs: extract the extent buffer verification from btree_readpage_end_io_hook() Qu Wenruo
2020-09-11 13:00   ` Nikolay Borisov
2020-09-08  7:52 ` [PATCH 12/17] btrfs: remove the unnecessary parameter @start and @len for check_data_csum() Qu Wenruo
2020-09-11 13:50   ` Nikolay Borisov
2020-09-08  7:52 ` [PATCH 13/17] btrfs: extent_io: only require sector size alignment for page read Qu Wenruo
2020-09-11 13:55   ` Nikolay Borisov
2020-09-15  1:54     ` Qu Wenruo
2020-09-08  7:52 ` [PATCH 14/17] btrfs: make btrfs_readpage_end_io_hook() follow sector size Qu Wenruo
2020-09-09 17:34   ` Goldwyn Rodrigues
2020-09-10  0:05     ` Qu Wenruo
2020-09-10 14:26       ` Goldwyn Rodrigues
2020-09-08  7:52 ` Qu Wenruo [this message]
2020-09-08 10:22   ` [PATCH 15/17] btrfs: introduce subpage_eb_mapping for extent buffers kernel test robot
2020-09-08 10:22     ` kernel test robot
2020-09-08 12:11   ` kernel test robot
2020-09-08 14:24   ` Dan Carpenter
2020-09-08 14:24     ` Dan Carpenter
2020-09-08 14:24     ` Dan Carpenter
2020-09-08  7:52 ` [PATCH 16/17] btrfs: handle extent buffer verification proper for subpage size Qu Wenruo
2020-09-08  7:52 ` [PATCH 17/17] btrfs: allow RO mount of 4K sector size fs on 64K page system Qu Wenruo
2020-09-08  8:03 ` [PATCH 00/17] btrfs: add read-only support for subpage sector size Qu Wenruo
2020-09-11 10:24 ` Qu Wenruo
2020-09-08 13:12 [PATCH 15/17] btrfs: introduce subpage_eb_mapping for extent buffers kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200908075230.86856-16-wqu@suse.com \
    --to=wqu@suse.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.