From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 15/17] btrfs: introduce subpage_eb_mapping for extent buffers
Date: Tue, 8 Sep 2020 15:52:28 +0800 [thread overview]
Message-ID: <20200908075230.86856-16-wqu@suse.com> (raw)
In-Reply-To: <20200908075230.86856-1-wqu@suse.com>
One of the design blockage for subpage support is the btree inode
page::private mapping.
Currently page::private for btree inode is a pointer to extent buffer
who owns this page.
This is fine for sectorsize == PAGE_SIZE case, but not suitable for
subpage size support, as in that case one page can hold multiple tree
blocks.
So to support subpage, here we introduce a new structure,
subpage_eb_mapping, to record how many extent buffers are referring to
one page.
It uses a bitmap (at most 16 bits used) to record tree blocks, and a
extent buffer pointers array (at most 16 too) to record the owners.
This patch will modify the following functions to add subpage support
using subpage_eb_mapping structure:
- attach_extent_buffer_page()
- detach_extent_buffer_page()
- grab_extent_buffer_from_page()
- try_release_extent_buffer()
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/extent_io.c | 221 ++++++++++++++++++++++++++++++++++++++++---
fs/btrfs/extent_io.h | 3 +
2 files changed, 212 insertions(+), 12 deletions(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a83b63ecc5f8..87b3bb781532 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -29,6 +29,34 @@ static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
static struct bio_set btrfs_bioset;
+/* Upper limit of how many extent buffers can be stored in one page */
+#define SUBPAGE_NR_EXTENT_BUFFERS (SZ_64K / SZ_4K)
+/*
+ * Structure for subpage support, recording the page -> extent buffer mapping
+ *
+ * For subpage support, one 64K page can contain several tree blocks, other than
+ * 1:1 page <-> extent buffer mapping from sectorsize == PAGE_SIZE case.
+ */
+struct subpage_eb_mapping {
+ /*
+ * Which range has extent buffer.
+ *
+ * One bit represents one sector, bit nr represents the offset in page.
+ * At most 16 bits are utilized.
+ */
+ unsigned long bitmap;
+
+ /* We only support 64K PAGE_SIZE system to mount 4K sectorsize fs */
+ struct extent_buffer *buffers[SUBPAGE_NR_EXTENT_BUFFERS];
+};
+
+struct btrfs_fs_info *page_to_fs_info(struct page *page)
+{
+ ASSERT(page && page->mapping);
+
+ return BTRFS_I(page->mapping->host)->root->fs_info;
+}
+
static inline bool extent_state_in_tree(const struct extent_state *state)
{
return !RB_EMPTY_NODE(&state->rb_node);
@@ -3098,12 +3126,50 @@ static int submit_extent_page(unsigned int opf,
return ret;
}
+static void attach_subpage_mapping(struct extent_buffer *eb,
+ struct page *page,
+ struct subpage_eb_mapping *mapping)
+{
+ u32 sectorsize = eb->fs_info->sectorsize;
+ u32 nodesize = eb->fs_info->nodesize;
+ int index_start = (eb->start - page_offset(page)) / sectorsize;
+ int nr_bits = nodesize / sectorsize;
+ int i;
+
+ ASSERT(mapping);
+ if (!PagePrivate(page)) {
+ /* Attach mapping to page::private and initialize */
+ memset(mapping, 0, sizeof(*mapping));
+ attach_page_private(page, mapping);
+ } else {
+ /* Use the existing page::private as mapping */
+ kfree(mapping);
+ mapping = (struct subpage_eb_mapping *) page->private;
+ }
+
+ /* Set the bitmap and pointers */
+ for (i = index_start; i < index_start + nr_bits; i++) {
+ set_bit(i, &mapping->bitmap);
+ mapping->buffers[i] = eb;
+ }
+}
+
static void attach_extent_buffer_page(struct extent_buffer *eb,
- struct page *page)
+ struct page *page,
+ struct subpage_eb_mapping *mapping)
{
+ bool subpage = (eb->fs_info->sectorsize < PAGE_SIZE);
if (page->mapping)
assert_spin_locked(&page->mapping->private_lock);
+ if (subpage && page->mapping) {
+ attach_subpage_mapping(eb, page, mapping);
+ return;
+ }
+ /*
+ * Anonymous page and sectorsize == PAGE_SIZE uses page::private as a
+ * pointer to eb directly.
+ */
if (!PagePrivate(page))
attach_page_private(page, eb);
else
@@ -4928,16 +4994,61 @@ int extent_buffer_under_io(const struct extent_buffer *eb)
test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
}
+static void detach_subpage_mapping(struct extent_buffer *eb, struct page *page)
+{
+ struct subpage_eb_mapping *mapping;
+ u32 sectorsize = eb->fs_info->sectorsize;
+ int start_index;
+ int nr_bits = eb->fs_info->nodesize / sectorsize;
+ int i;
+
+ /* Page already detached */
+ if (!PagePrivate(page))
+ return;
+
+ assert_spin_locked(&page->mapping->private_lock);
+ ASSERT(eb->start >= page_offset(page) &&
+ eb->start < page_offset(page) + PAGE_SIZE);
+
+ mapping = (struct subpage_eb_mapping *)page->private;
+ start_index = (eb->start - page_offset(page)) / sectorsize;
+
+ for (i = start_index; i < start_index + nr_bits; i++) {
+ if (test_bit(i, &mapping->bitmap) &&
+ mapping->buffers[i] == eb) {
+ clear_bit(i, &mapping->bitmap);
+ mapping->buffers[i] = NULL;
+ }
+ }
+
+ /* Are we the last owner ? */
+ if (mapping->bitmap == 0) {
+ kfree(mapping);
+ detach_page_private(page);
+ /* One for the first time allocated the page */
+ put_page(page);
+ }
+}
+
static void detach_extent_buffer_page(struct extent_buffer *eb,
struct page *page)
{
bool mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
+ bool subpage = (eb->fs_info->sectorsize < PAGE_SIZE);
if (!page)
return;
if (mapped)
spin_lock(&page->mapping->private_lock);
+
+ if (subpage && page->mapping) {
+ detach_subpage_mapping(eb, page);
+ if (mapped)
+ spin_unlock(&page->mapping->private_lock);
+ return;
+ }
+
if (PagePrivate(page) && page->private == (unsigned long)eb) {
BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
BUG_ON(PageDirty(page));
@@ -5035,7 +5146,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src)
btrfs_release_extent_buffer(new);
return NULL;
}
- attach_extent_buffer_page(new, p);
+ attach_extent_buffer_page(new, p, NULL);
WARN_ON(PageDirty(p));
SetPageUptodate(p);
new->pages[i] = p;
@@ -5243,8 +5354,31 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
* The function here is to ensure we have proper locking and detect such race
* so we won't allocating an eb twice.
*/
-static struct extent_buffer *grab_extent_buffer_from_page(struct page *page)
+static struct extent_buffer *grab_extent_buffer_from_page(struct page *page,
+ u64 bytenr)
{
+ struct btrfs_fs_info *fs_info = page_to_fs_info(page);
+ bool subpage = (fs_info->sectorsize < PAGE_SIZE);
+
+ if (!PagePrivate(page))
+ return NULL;
+
+ if (subpage) {
+ struct subpage_eb_mapping *mapping;
+ u32 sectorsize = fs_info->sectorsize;
+ int start_index;
+
+ ASSERT(bytenr >= page_offset(page) &&
+ bytenr < page_offset(page) + PAGE_SIZE);
+
+ start_index = (bytenr - page_offset(page)) / sectorsize;
+ mapping = (struct subpage_eb_mapping *)page->private;
+
+ if (test_bit(start_index, &mapping->bitmap))
+ return mapping->buffers[start_index];
+ return NULL;
+ }
+
/*
* For PAGE_SIZE == sectorsize case, a btree_inode page should have its
* private pointer as extent buffer who owns this page.
@@ -5263,6 +5397,8 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
struct extent_buffer *exists = NULL;
struct page *p;
struct address_space *mapping = fs_info->btree_inode->i_mapping;
+ struct subpage_eb_mapping *subpage_mapping = NULL;
+ bool subpage = (fs_info->sectorsize < PAGE_SIZE);
int uptodate = 1;
int ret;
@@ -5286,6 +5422,14 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
if (!eb)
return ERR_PTR(-ENOMEM);
+ if (subpage) {
+ subpage_mapping = kmalloc(sizeof(*subpage_mapping), GFP_NOFS);
+ if (!mapping) {
+ exists = ERR_PTR(-ENOMEM);
+ goto free_eb;
+ }
+ }
+
num_pages = num_extent_pages(eb);
for (i = 0; i < num_pages; i++, index++) {
p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
@@ -5296,7 +5440,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
spin_lock(&mapping->private_lock);
if (PagePrivate(p)) {
- exists = grab_extent_buffer_from_page(p);
+ exists = grab_extent_buffer_from_page(p, start);
if (exists && atomic_inc_not_zero(&exists->refs)) {
spin_unlock(&mapping->private_lock);
unlock_page(p);
@@ -5306,16 +5450,19 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
}
exists = NULL;
- /*
- * Do this so attach doesn't complain and we need to
- * drop the ref the old guy had.
- */
- ClearPagePrivate(p);
- WARN_ON(PageDirty(p));
- put_page(p);
+ if (!subpage) {
+ /*
+ * Do this so attach doesn't complain and we
+ * need to drop the ref the old guy had.
+ */
+ ClearPagePrivate(p);
+ WARN_ON(PageDirty(p));
+ put_page(p);
+ }
}
- attach_extent_buffer_page(eb, p);
+ attach_extent_buffer_page(eb, p, subpage_mapping);
spin_unlock(&mapping->private_lock);
+ subpage_mapping = NULL;
WARN_ON(PageDirty(p));
eb->pages[i] = p;
if (!PageUptodate(p))
@@ -5365,6 +5512,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
free_eb:
WARN_ON(!atomic_dec_and_test(&eb->refs));
+ kfree(subpage_mapping);
for (i = 0; i < num_pages; i++) {
if (eb->pages[i])
unlock_page(eb->pages[i]);
@@ -6158,8 +6306,49 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
}
}
+int try_release_subpage_ebs(struct page *page)
+{
+ struct subpage_eb_mapping *mapping;
+ int i;
+
+ assert_spin_locked(&page->mapping->private_lock);
+ if (!PagePrivate(page))
+ return 1;
+
+ mapping = (struct subpage_eb_mapping *)page->private;
+ for (i = 0; i < SUBPAGE_NR_EXTENT_BUFFERS && PagePrivate(page); i++) {
+ struct btrfs_fs_info *fs_info = page_to_fs_info(page);
+ struct extent_buffer *eb;
+ int ret;
+
+ if (!test_bit(i, &mapping->bitmap))
+ continue;
+
+ eb = mapping->buffers[i];
+ spin_unlock(&page->mapping->private_lock);
+ spin_lock(&eb->refs_lock);
+ ret = release_extent_buffer(eb);
+ spin_lock(&page->mapping->private_lock);
+
+ /*
+ * Extent buffer can't be freed yet, must jump to next slot
+ * and avoid calling release_extent_buffer().
+ */
+ if (!ret)
+ i += (fs_info->nodesize / fs_info->sectorsize - 1);
+ }
+ /*
+ * detach_subpage_mapping() from release_extent_buffer() has detached
+ * all ebs from this page. All related ebs are released.
+ */
+ if (!PagePrivate(page))
+ return 1;
+ return 0;
+}
+
int try_release_extent_buffer(struct page *page)
{
+ bool subpage = (page_to_fs_info(page)->sectorsize < PAGE_SIZE);
struct extent_buffer *eb;
/*
@@ -6172,6 +6361,14 @@ int try_release_extent_buffer(struct page *page)
return 1;
}
+ if (subpage) {
+ int ret;
+
+ ret = try_release_subpage_ebs(page);
+ spin_unlock(&page->mapping->private_lock);
+ return ret;
+ }
+
eb = (struct extent_buffer *)page->private;
BUG_ON(!eb);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index e16c5449ba48..6593b6883438 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -184,6 +184,9 @@ static inline int extent_compress_type(unsigned long bio_flags)
return bio_flags >> EXTENT_BIO_FLAG_SHIFT;
}
+/* Unable to inline it due to the requirement for both ASSERT() and BTRFS_I() */
+struct btrfs_fs_info *page_to_fs_info(struct page *page);
+
struct extent_map_tree;
typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
--
2.28.0
next prev parent reply other threads:[~2020-09-08 7:53 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-09-08 7:52 [PATCH 00/17] btrfs: add read-only support for subpage sector size Qu Wenruo
2020-09-08 7:52 ` [PATCH 01/17] btrfs: extent-io-tests: remove invalid tests Qu Wenruo
2020-09-09 12:26 ` Nikolay Borisov
2020-09-09 13:06 ` Qu Wenruo
2020-09-08 7:52 ` [PATCH 02/17] btrfs: calculate inline extent buffer page size based on page size Qu Wenruo
2020-09-11 9:56 ` Nikolay Borisov
2020-09-11 10:13 ` Qu Wenruo
2020-09-08 7:52 ` [PATCH 03/17] btrfs: remove the open-code to read disk-key Qu Wenruo
2020-09-11 10:07 ` Nikolay Borisov
2020-09-08 7:52 ` [PATCH 04/17] btrfs: make btrfs_fs_info::buffer_radix to take sector size devided values Qu Wenruo
2020-09-11 10:11 ` Nikolay Borisov
2020-09-11 10:15 ` Qu Wenruo
2020-09-08 7:52 ` [PATCH 05/17] btrfs: don't allow tree block to cross page boundary for subpage support Qu Wenruo
2020-09-11 10:26 ` Nikolay Borisov
2020-09-11 11:36 ` Qu Wenruo
2020-09-11 12:08 ` Nikolay Borisov
2020-09-08 7:52 ` [PATCH 06/17] btrfs: handle sectorsize < PAGE_SIZE case for extent buffer accessors Qu Wenruo
2020-09-08 7:52 ` [PATCH 07/17] btrfs: make csum_tree_block() handle sectorsize smaller than page size Qu Wenruo
2020-09-11 11:10 ` Nikolay Borisov
2020-09-08 7:52 ` [PATCH 08/17] btrfs: refactor how we extract extent buffer from page for alloc_extent_buffer() Qu Wenruo
2020-09-11 11:14 ` Nikolay Borisov
2020-09-08 7:52 ` [PATCH 09/17] btrfs: refactor btrfs_release_extent_buffer_pages() Qu Wenruo
2020-09-11 11:17 ` Nikolay Borisov
2020-09-11 11:39 ` Qu Wenruo
2020-09-08 7:52 ` [PATCH 10/17] btrfs: add assert_spin_locked() for attach_extent_buffer_page() Qu Wenruo
2020-09-11 11:22 ` Nikolay Borisov
2020-09-08 7:52 ` [PATCH 11/17] btrfs: extract the extent buffer verification from btree_readpage_end_io_hook() Qu Wenruo
2020-09-11 13:00 ` Nikolay Borisov
2020-09-08 7:52 ` [PATCH 12/17] btrfs: remove the unnecessary parameter @start and @len for check_data_csum() Qu Wenruo
2020-09-11 13:50 ` Nikolay Borisov
2020-09-08 7:52 ` [PATCH 13/17] btrfs: extent_io: only require sector size alignment for page read Qu Wenruo
2020-09-11 13:55 ` Nikolay Borisov
2020-09-15 1:54 ` Qu Wenruo
2020-09-08 7:52 ` [PATCH 14/17] btrfs: make btrfs_readpage_end_io_hook() follow sector size Qu Wenruo
2020-09-09 17:34 ` Goldwyn Rodrigues
2020-09-10 0:05 ` Qu Wenruo
2020-09-10 14:26 ` Goldwyn Rodrigues
2020-09-08 7:52 ` Qu Wenruo [this message]
2020-09-08 10:22 ` [PATCH 15/17] btrfs: introduce subpage_eb_mapping for extent buffers kernel test robot
2020-09-08 14:24 ` Dan Carpenter
2020-09-08 7:52 ` [PATCH 16/17] btrfs: handle extent buffer verification proper for subpage size Qu Wenruo
2020-09-08 7:52 ` [PATCH 17/17] btrfs: allow RO mount of 4K sector size fs on 64K page system Qu Wenruo
2020-09-08 8:03 ` [PATCH 00/17] btrfs: add read-only support for subpage sector size Qu Wenruo
2020-09-11 10:24 ` Qu Wenruo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200908075230.86856-16-wqu@suse.com \
--to=wqu@suse.com \
--cc=linux-btrfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).