All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] btrfs: subpage: pack all subpage bitmaps into a larger bitmap
@ 2021-08-16  6:00 Qu Wenruo
  2021-08-16  6:00 ` [PATCH 1/2] btrfs: introduce btrfs_subpage_bitmap_info Qu Wenruo
  2021-08-16  6:00 ` [PATCH 2/2] btrfs: subpage: pack all subpage bitmaps into a larger bitmap Qu Wenruo
  0 siblings, 2 replies; 10+ messages in thread
From: Qu Wenruo @ 2021-08-16  6:00 UTC (permalink / raw)
  To: linux-btrfs

Currently we use u16 bitmap to make 4k sectorsize work for 64K page
size.

But this u16 bitmap is not large enough to contain larger page size like
128K, nor is space efficient for 16K page size.

To handle both cases, here we pack all subpage bitmaps into a larger
bitmap, now btrfs_subpage::bitmaps[] will be the ultimate bitmap for
subpage usage.

This is the first step towards more page size support.

Although to really enable extra page size like 16K and 128K, we need to
rework the metadata alignment check first.
Which will happen in another patchset.


Qu Wenruo (2):
  btrfs: introduce btrfs_subpage_bitmap_info
  btrfs: subpage: pack all subpage bitmaps into a larger bitmap

 fs/btrfs/ctree.h     |   1 +
 fs/btrfs/disk-io.c   |  12 ++-
 fs/btrfs/extent_io.c |  58 +++++++++------
 fs/btrfs/subpage.c   | 172 ++++++++++++++++++++++++++++++++-----------
 fs/btrfs/subpage.h   |  41 +++++++----
 5 files changed, 201 insertions(+), 83 deletions(-)

-- 
2.32.0


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/2] btrfs: introduce btrfs_subpage_bitmap_info
  2021-08-16  6:00 [PATCH 0/2] btrfs: subpage: pack all subpage bitmaps into a larger bitmap Qu Wenruo
@ 2021-08-16  6:00 ` Qu Wenruo
  2021-08-16  9:28   ` Nikolay Borisov
  2021-08-16  6:00 ` [PATCH 2/2] btrfs: subpage: pack all subpage bitmaps into a larger bitmap Qu Wenruo
  1 sibling, 1 reply; 10+ messages in thread
From: Qu Wenruo @ 2021-08-16  6:00 UTC (permalink / raw)
  To: linux-btrfs

Currently we use fixed size u16 bitmap for subpage bitmap.
This is fine for 4K sectorsize with 64K page size.

But for 4K sectorsize and larger page size, the bitmap is too small,
while for smaller page size like 16K, u16 bitmaps waste too much space.

Here we introduce a new helper structure, btrfs_subpage_bitmap_info, to
record the proper bitmap size, and where each bitmap should start at.

By this, we can later compact all subpage bitmaps into one u32 bitmap.

This patch is the first step towards such compact bitmap.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/ctree.h   |  1 +
 fs/btrfs/disk-io.c | 12 ++++++++++--
 fs/btrfs/subpage.c | 35 +++++++++++++++++++++++++++++++++++
 fs/btrfs/subpage.h | 28 ++++++++++++++++++++++++++++
 4 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4a69aa604ac5..a98fd6a24113 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -898,6 +898,7 @@ struct btrfs_fs_info {
 	struct btrfs_workqueue *scrub_workers;
 	struct btrfs_workqueue *scrub_wr_completion_workers;
 	struct btrfs_workqueue *scrub_parity_workers;
+	struct btrfs_subpage_info *subpage_info;
 
 	struct btrfs_discard_ctl discard_ctl;
 
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1510a9d92858..f35b875f9e53 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1644,6 +1644,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
 	btrfs_extent_buffer_leak_debug_check(fs_info);
 	kfree(fs_info->super_copy);
 	kfree(fs_info->super_for_commit);
+	kfree(fs_info->subpage_info);
 	kvfree(fs_info);
 }
 
@@ -3393,11 +3394,13 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 	}
 
 	if (sectorsize != PAGE_SIZE) {
+		struct btrfs_subpage_info *subpage_info;
+
+		ASSERT(sectorsize < PAGE_SIZE);
+
 		btrfs_warn(fs_info,
 	"read-write for sector size %u with page size %lu is experimental",
 			   sectorsize, PAGE_SIZE);
-	}
-	if (sectorsize != PAGE_SIZE) {
 		if (btrfs_super_incompat_flags(fs_info->super_copy) &
 			BTRFS_FEATURE_INCOMPAT_RAID56) {
 			btrfs_err(fs_info,
@@ -3406,6 +3409,11 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 			err = -EINVAL;
 			goto fail_alloc;
 		}
+		subpage_info = kzalloc(sizeof(*subpage_info), GFP_NOFS);
+		if (!subpage_info)
+			goto fail_alloc;
+		btrfs_init_subpage_info(subpage_info, sectorsize);
+		fs_info->subpage_info = subpage_info;
 	}
 
 	ret = btrfs_init_workqueues(fs_info, fs_devices);
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index a61aa33aeeee..014256d47beb 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -63,6 +63,41 @@
  *   This means a slightly higher tree locking latency.
  */
 
+void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info,
+			     u32 sectorsize)
+{
+	unsigned int cur = 0;
+	unsigned int nr_bits;
+
+	/*
+	 * Just in case we have super large PAGE_SIZE that unsigned int is not
+	 * enough to contain the number of sectors for the minimal sectorsize.
+	 */
+	BUILD_BUG_ON(UINT_MAX * SZ_4K < PAGE_SIZE);
+
+	ASSERT(IS_ALIGNED(PAGE_SIZE, sectorsize));
+
+	nr_bits = PAGE_SIZE / sectorsize;
+	subpage_info->bitmap_nr_bits = nr_bits;
+
+	subpage_info->uptodate_start = cur;
+	cur += nr_bits;
+
+	subpage_info->error_start = cur;
+	cur += nr_bits;
+
+	subpage_info->dirty_start = cur;
+	cur += nr_bits;
+
+	subpage_info->writeback_start = cur;
+	cur += nr_bits;
+
+	subpage_info->ordered_start = cur;
+	cur += nr_bits;
+
+	subpage_info->total_nr_bits = cur;
+}
+
 int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
 			 struct page *page, enum btrfs_subpage_type type)
 {
diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h
index 9aa40d795ba9..ea90ba42c97b 100644
--- a/fs/btrfs/subpage.h
+++ b/fs/btrfs/subpage.h
@@ -11,6 +11,32 @@
  */
 #define BTRFS_SUBPAGE_BITMAP_SIZE	16
 
+/*
+ * Extra info for subpapge bitmap.
+ *
+ * For subpage we integrate all uptodate/error/dirty/writeback/ordered
+ * bitmaps into one larger bitmap.
+ * This structure records the basic info.
+ */
+struct btrfs_subpage_info {
+	/* Number of bits for each bitmap*/
+	unsigned int bitmap_nr_bits;
+
+	/* Total number of bits for the whole bitmap */
+	unsigned int total_nr_bits;
+
+	/*
+	 * *_start indicates where the bitmap starts, the length
+	 * is always @bitmap_size, which is calculated from
+	 * PAGE_SIZE / sectorsize.
+	 */
+	unsigned int uptodate_start;
+	unsigned int error_start;
+	unsigned int dirty_start;
+	unsigned int writeback_start;
+	unsigned int ordered_start;
+};
+
 /*
  * Structure to trace status of each sector inside a page, attached to
  * page::private for both data and metadata inodes.
@@ -53,6 +79,8 @@ enum btrfs_subpage_type {
 	BTRFS_SUBPAGE_DATA,
 };
 
+void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info,
+			     u32 sectorsize);
 int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
 			 struct page *page, enum btrfs_subpage_type type);
 void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info,
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/2] btrfs: subpage: pack all subpage bitmaps into a larger bitmap
  2021-08-16  6:00 [PATCH 0/2] btrfs: subpage: pack all subpage bitmaps into a larger bitmap Qu Wenruo
  2021-08-16  6:00 ` [PATCH 1/2] btrfs: introduce btrfs_subpage_bitmap_info Qu Wenruo
@ 2021-08-16  6:00 ` Qu Wenruo
  2021-08-16 10:26   ` Nikolay Borisov
  1 sibling, 1 reply; 10+ messages in thread
From: Qu Wenruo @ 2021-08-16  6:00 UTC (permalink / raw)
  To: linux-btrfs

Currently we use u16 bitmap to make 4k sectorsize work for 64K page
size.

But this u16 bitmap is not large enough to contain larger page size like
128K, nor is space efficient for 16K page size.

To handle both cases, here we pack all subpage bitmaps into a larger
bitmap, now btrfs_subpage::bitmaps[] will be the ultimate bitmap for
subpage usage.

Each sub-bitmap will has its start bit number recorded in
btrfs_subpage_info::*_start, and its bitmap length will be recorded in
btrfs_subpage_info::bitmap_nr_bits.

All subpage bitmap operations will be converted from using direct u16
operations to bitmap operations, with above *_start calculated.

For 64K page size with 4K sectorsize, this should not cause much difference.
While for 16K page size, we will only need 1 unsigned long (u32) to
restore the bitmap.
And will be able to support 128K page size in the future.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/extent_io.c |  58 ++++++++++--------
 fs/btrfs/subpage.c   | 137 +++++++++++++++++++++++++++++--------------
 fs/btrfs/subpage.h   |  19 +-----
 3 files changed, 130 insertions(+), 84 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 543f87ea372e..e428d6208bb7 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3865,10 +3865,9 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info,
 	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
 	u64 orig_start = *start;
 	/* Declare as unsigned long so we can use bitmap ops */
-	unsigned long dirty_bitmap;
 	unsigned long flags;
-	int nbits = (orig_start - page_offset(page)) >> fs_info->sectorsize_bits;
-	int range_start_bit = nbits;
+	int nbits = offset_in_page(orig_start) >> fs_info->sectorsize_bits;
+	int range_start_bit = nbits + fs_info->subpage_info->dirty_start;
 	int range_end_bit;
 
 	/*
@@ -3883,11 +3882,14 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info,
 
 	/* We should have the page locked, but just in case */
 	spin_lock_irqsave(&subpage->lock, flags);
-	dirty_bitmap = subpage->dirty_bitmap;
+	bitmap_next_set_region(subpage->bitmaps, &range_start_bit, &range_end_bit,
+			       fs_info->subpage_info->dirty_start +
+			       fs_info->subpage_info->bitmap_nr_bits);
 	spin_unlock_irqrestore(&subpage->lock, flags);
 
-	bitmap_next_set_region(&dirty_bitmap, &range_start_bit, &range_end_bit,
-			       BTRFS_SUBPAGE_BITMAP_SIZE);
+	range_start_bit -= fs_info->subpage_info->dirty_start;
+	range_end_bit -= fs_info->subpage_info->dirty_start;
+
 	*start = page_offset(page) + range_start_bit * fs_info->sectorsize;
 	*end = page_offset(page) + range_end_bit * fs_info->sectorsize;
 }
@@ -4613,7 +4615,7 @@ static int submit_eb_subpage(struct page *page,
 	int submitted = 0;
 	u64 page_start = page_offset(page);
 	int bit_start = 0;
-	const int nbits = BTRFS_SUBPAGE_BITMAP_SIZE;
+	const int nbits = fs_info->subpage_info->bitmap_nr_bits;
 	int sectors_per_node = fs_info->nodesize >> fs_info->sectorsize_bits;
 	int ret;
 
@@ -4634,7 +4636,8 @@ static int submit_eb_subpage(struct page *page,
 			break;
 		}
 		spin_lock_irqsave(&subpage->lock, flags);
-		if (!((1 << bit_start) & subpage->dirty_bitmap)) {
+		if (!test_bit(bit_start + fs_info->subpage_info->dirty_start,
+			      subpage->bitmaps)) {
 			spin_unlock_irqrestore(&subpage->lock, flags);
 			spin_unlock(&page->mapping->private_lock);
 			bit_start++;
@@ -7178,32 +7181,41 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
 	}
 }
 
+#define GANG_LOOKUP_SIZE	16
 static struct extent_buffer *get_next_extent_buffer(
 		struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
 {
-	struct extent_buffer *gang[BTRFS_SUBPAGE_BITMAP_SIZE];
+	struct extent_buffer *gang[GANG_LOOKUP_SIZE];
 	struct extent_buffer *found = NULL;
 	u64 page_start = page_offset(page);
-	int ret;
-	int i;
+	u64 cur = page_start;
 
 	ASSERT(in_range(bytenr, page_start, PAGE_SIZE));
-	ASSERT(PAGE_SIZE / fs_info->nodesize <= BTRFS_SUBPAGE_BITMAP_SIZE);
 	lockdep_assert_held(&fs_info->buffer_lock);
 
-	ret = radix_tree_gang_lookup(&fs_info->buffer_radix, (void **)gang,
-			bytenr >> fs_info->sectorsize_bits,
-			PAGE_SIZE / fs_info->nodesize);
-	for (i = 0; i < ret; i++) {
-		/* Already beyond page end */
-		if (gang[i]->start >= page_start + PAGE_SIZE)
-			break;
-		/* Found one */
-		if (gang[i]->start >= bytenr) {
-			found = gang[i];
-			break;
+	while (cur < page_start + PAGE_SIZE) {
+		int ret;
+		int i;
+
+		ret = radix_tree_gang_lookup(&fs_info->buffer_radix,
+				(void **)gang, cur >> fs_info->sectorsize_bits,
+				min_t(unsigned int, GANG_LOOKUP_SIZE,
+				      PAGE_SIZE / fs_info->nodesize));
+		if (ret == 0)
+			goto out;
+		for (i = 0; i < ret; i++) {
+			/* Already beyond page end */
+			if (gang[i]->start >= page_start + PAGE_SIZE)
+				goto out;
+			/* Found one */
+			if (gang[i]->start >= bytenr) {
+				found = gang[i];
+				break;
+			}
 		}
+		cur = gang[ret - 1]->start + gang[ret - 1]->len;
 	}
+out:
 	return found;
 }
 
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index 014256d47beb..9d6da9f2d77e 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -139,10 +139,14 @@ int btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
 			struct btrfs_subpage **ret,
 			enum btrfs_subpage_type type)
 {
+	unsigned int real_size;
+
 	if (fs_info->sectorsize == PAGE_SIZE)
 		return 0;
 
-	*ret = kzalloc(sizeof(struct btrfs_subpage), GFP_NOFS);
+	real_size = BITS_TO_LONGS(fs_info->subpage_info->total_nr_bits) *
+		    sizeof(unsigned long) + sizeof(struct btrfs_subpage);
+	*ret = kzalloc(real_size, GFP_NOFS);
 	if (!*ret)
 		return -ENOMEM;
 	spin_lock_init(&(*ret)->lock);
@@ -324,37 +328,60 @@ void btrfs_page_end_writer_lock(const struct btrfs_fs_info *fs_info,
 		unlock_page(page);
 }
 
-/*
- * Convert the [start, start + len) range into a u16 bitmap
- *
- * For example: if start == page_offset() + 16K, len = 16K, we get 0x00f0.
- */
-static u16 btrfs_subpage_calc_bitmap(const struct btrfs_fs_info *fs_info,
-		struct page *page, u64 start, u32 len)
+static bool bitmap_test_range_all_set(unsigned long *addr, unsigned start,
+				      unsigned int nbits)
 {
-	const int bit_start = offset_in_page(start) >> fs_info->sectorsize_bits;
-	const int nbits = len >> fs_info->sectorsize_bits;
+	unsigned int found_zero;
 
-	btrfs_subpage_assert(fs_info, page, start, len);
+	found_zero = find_next_zero_bit(addr, start + nbits, start);
+	if (found_zero == start + nbits)
+		return true;
+	return false;
+}
 
-	/*
-	 * Here nbits can be 16, thus can go beyond u16 range. We make the
-	 * first left shift to be calculate in unsigned long (at least u32),
-	 * then truncate the result to u16.
-	 */
-	return (u16)(((1UL << nbits) - 1) << bit_start);
+static bool bitmap_test_range_all_zero(unsigned long *addr, unsigned start,
+				       unsigned int nbits)
+{
+	unsigned int found_set;
+
+	found_set = find_next_bit(addr, start + nbits, start);
+	if (found_set == start + nbits)
+		return true;
+	return false;
 }
 
+#define subpage_calc_start_bit(fs_info, page, name, start, len)		\
+({									\
+	unsigned int start_bit;						\
+									\
+	btrfs_subpage_assert(fs_info, page, start, len);		\
+	start_bit = offset_in_page(start) >> fs_info->sectorsize_bits;	\
+	start_bit += fs_info->subpage_info->name##_start;		\
+	start_bit;							\
+})
+
+#define subpage_test_bitmap_all_set(fs_info, subpage, name)		\
+ 	bitmap_test_range_all_set(subpage->bitmaps,			\
+			fs_info->subpage_info->name##_start,		\
+			fs_info->subpage_info->bitmap_nr_bits)
+
+#define subpage_test_bitmap_all_zero(fs_info, subpage, name)		\
+ 	bitmap_test_range_all_zero(subpage->bitmaps,			\
+			fs_info->subpage_info->name##_start,		\
+			fs_info->subpage_info->bitmap_nr_bits)
+
 void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info,
 		struct page *page, u64 start, u32 len)
 {
 	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
-	const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
+	unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
+							uptodate, start, len);
 	unsigned long flags;
 
 	spin_lock_irqsave(&subpage->lock, flags);
-	subpage->uptodate_bitmap |= tmp;
-	if (subpage->uptodate_bitmap == U16_MAX)
+	bitmap_set(subpage->bitmaps, start_bit,
+		   len >> fs_info->sectorsize_bits);
+	if (subpage_test_bitmap_all_set(fs_info, subpage, uptodate))
 		SetPageUptodate(page);
 	spin_unlock_irqrestore(&subpage->lock, flags);
 }
@@ -363,11 +390,13 @@ void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info,
 		struct page *page, u64 start, u32 len)
 {
 	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
-	const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
+	unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
+							uptodate, start, len);
 	unsigned long flags;
 
 	spin_lock_irqsave(&subpage->lock, flags);
-	subpage->uptodate_bitmap &= ~tmp;
+	bitmap_clear(subpage->bitmaps, start_bit,
+		     len >> fs_info->sectorsize_bits);
 	ClearPageUptodate(page);
 	spin_unlock_irqrestore(&subpage->lock, flags);
 }
@@ -376,11 +405,13 @@ void btrfs_subpage_set_error(const struct btrfs_fs_info *fs_info,
 		struct page *page, u64 start, u32 len)
 {
 	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
-	const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
+	unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
+							error, start, len);
 	unsigned long flags;
 
 	spin_lock_irqsave(&subpage->lock, flags);
-	subpage->error_bitmap |= tmp;
+	bitmap_set(subpage->bitmaps, start_bit,
+		   len >> fs_info->sectorsize_bits);
 	SetPageError(page);
 	spin_unlock_irqrestore(&subpage->lock, flags);
 }
@@ -389,12 +420,14 @@ void btrfs_subpage_clear_error(const struct btrfs_fs_info *fs_info,
 		struct page *page, u64 start, u32 len)
 {
 	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
-	const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
+	unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
+							error, start, len);
 	unsigned long flags;
 
 	spin_lock_irqsave(&subpage->lock, flags);
-	subpage->error_bitmap &= ~tmp;
-	if (subpage->error_bitmap == 0)
+	bitmap_clear(subpage->bitmaps, start_bit,
+		     len >> fs_info->sectorsize_bits);
+	if (subpage_test_bitmap_all_zero(fs_info, subpage, error))
 		ClearPageError(page);
 	spin_unlock_irqrestore(&subpage->lock, flags);
 }
@@ -403,11 +436,13 @@ void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info,
 		struct page *page, u64 start, u32 len)
 {
 	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
-	u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
+	unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
+							dirty, start, len);
 	unsigned long flags;
 
 	spin_lock_irqsave(&subpage->lock, flags);
-	subpage->dirty_bitmap |= tmp;
+	bitmap_set(subpage->bitmaps, start_bit,
+		   len >> fs_info->sectorsize_bits);
 	spin_unlock_irqrestore(&subpage->lock, flags);
 	set_page_dirty(page);
 }
@@ -426,13 +461,15 @@ bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
 		struct page *page, u64 start, u32 len)
 {
 	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
-	u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
+	unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
+							dirty, start, len);
 	unsigned long flags;
 	bool last = false;
 
 	spin_lock_irqsave(&subpage->lock, flags);
-	subpage->dirty_bitmap &= ~tmp;
-	if (subpage->dirty_bitmap == 0)
+	bitmap_clear(subpage->bitmaps, start_bit,
+		     len >> fs_info->sectorsize_bits);
+	if (subpage_test_bitmap_all_zero(fs_info, subpage, dirty)) 
 		last = true;
 	spin_unlock_irqrestore(&subpage->lock, flags);
 	return last;
@@ -452,11 +489,13 @@ void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info,
 		struct page *page, u64 start, u32 len)
 {
 	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
-	u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
+	unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
+							writeback, start, len);
 	unsigned long flags;
 
 	spin_lock_irqsave(&subpage->lock, flags);
-	subpage->writeback_bitmap |= tmp;
+	bitmap_set(subpage->bitmaps, start_bit,
+		   len >> fs_info->sectorsize_bits);
 	set_page_writeback(page);
 	spin_unlock_irqrestore(&subpage->lock, flags);
 }
@@ -465,12 +504,14 @@ void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info,
 		struct page *page, u64 start, u32 len)
 {
 	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
-	u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
+	unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
+							writeback, start, len);
 	unsigned long flags;
 
 	spin_lock_irqsave(&subpage->lock, flags);
-	subpage->writeback_bitmap &= ~tmp;
-	if (subpage->writeback_bitmap == 0) {
+	bitmap_clear(subpage->bitmaps, start_bit,
+		     len >> fs_info->sectorsize_bits);
+	if (subpage_test_bitmap_all_zero(fs_info, subpage, writeback)) {
 		ASSERT(PageWriteback(page));
 		end_page_writeback(page);
 	}
@@ -481,11 +522,13 @@ void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info,
 		struct page *page, u64 start, u32 len)
 {
 	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
-	const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
+	unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
+							ordered, start, len);
 	unsigned long flags;
 
 	spin_lock_irqsave(&subpage->lock, flags);
-	subpage->ordered_bitmap |= tmp;
+	bitmap_set(subpage->bitmaps, start_bit,
+		   len >> fs_info->sectorsize_bits);
 	SetPageOrdered(page);
 	spin_unlock_irqrestore(&subpage->lock, flags);
 }
@@ -494,12 +537,14 @@ void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info,
 		struct page *page, u64 start, u32 len)
 {
 	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
-	const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
+	unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
+							ordered, start, len);
 	unsigned long flags;
 
 	spin_lock_irqsave(&subpage->lock, flags);
-	subpage->ordered_bitmap &= ~tmp;
-	if (subpage->ordered_bitmap == 0)
+	bitmap_clear(subpage->bitmaps, start_bit,
+		     len >> fs_info->sectorsize_bits);
+	if (subpage_test_bitmap_all_zero(fs_info, subpage, ordered))
 		ClearPageOrdered(page);
 	spin_unlock_irqrestore(&subpage->lock, flags);
 }
@@ -512,12 +557,14 @@ bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info,	\
 		struct page *page, u64 start, u32 len)			\
 {									\
 	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; \
-	const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len); \
+	unsigned int start_bit = subpage_calc_start_bit(fs_info, page,	\
+						name, start, len);	\
 	unsigned long flags;						\
 	bool ret;							\
 									\
 	spin_lock_irqsave(&subpage->lock, flags);			\
-	ret = ((subpage->name##_bitmap & tmp) == tmp);			\
+	ret = bitmap_test_range_all_set(subpage->bitmaps, start_bit, 	\
+				len >> fs_info->sectorsize_bits);	\
 	spin_unlock_irqrestore(&subpage->lock, flags);			\
 	return ret;							\
 }
@@ -610,5 +657,5 @@ void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info,
 		return;
 
 	ASSERT(PagePrivate(page) && page->private);
-	ASSERT(subpage->dirty_bitmap == 0);
+	ASSERT(subpage_test_bitmap_all_zero(fs_info, subpage, dirty));
 }
diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h
index ea90ba42c97b..6276947c4020 100644
--- a/fs/btrfs/subpage.h
+++ b/fs/btrfs/subpage.h
@@ -5,12 +5,6 @@
 
 #include <linux/spinlock.h>
 
-/*
- * Maximum page size we support is 64K, minimum sector size is 4K, u16 bitmap
- * is sufficient. Regular bitmap_* is not used due to size reasons.
- */
-#define BTRFS_SUBPAGE_BITMAP_SIZE	16
-
 /*
  * Extra info for subpapge bitmap.
  *
@@ -44,10 +38,6 @@ struct btrfs_subpage_info {
 struct btrfs_subpage {
 	/* Common members for both data and metadata pages */
 	spinlock_t lock;
-	u16 uptodate_bitmap;
-	u16 error_bitmap;
-	u16 dirty_bitmap;
-	u16 writeback_bitmap;
 	/*
 	 * Both data and metadata needs to track how many readers are for the
 	 * page.
@@ -64,14 +54,11 @@ struct btrfs_subpage {
 		 * manages whether the subpage can be detached.
 		 */
 		atomic_t eb_refs;
-		/* Structures only used by data */
-		struct {
-			atomic_t writers;
 
-			/* Tracke pending ordered extent in this sector */
-			u16 ordered_bitmap;
-		};
+		/* Structures only used by data */
+		atomic_t writers;
 	};
+	unsigned long bitmaps[];
 };
 
 enum btrfs_subpage_type {
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] btrfs: introduce btrfs_subpage_bitmap_info
  2021-08-16  6:00 ` [PATCH 1/2] btrfs: introduce btrfs_subpage_bitmap_info Qu Wenruo
@ 2021-08-16  9:28   ` Nikolay Borisov
  2021-08-16 10:12     ` Qu Wenruo
  0 siblings, 1 reply; 10+ messages in thread
From: Nikolay Borisov @ 2021-08-16  9:28 UTC (permalink / raw)
  To: Qu Wenruo, linux-btrfs



On 16.08.21 г. 9:00, Qu Wenruo wrote:
> Currently we use fixed size u16 bitmap for subpage bitmap.
> This is fine for 4K sectorsize with 64K page size.
> 
> But for 4K sectorsize and larger page size, the bitmap is too small,
> while for smaller page size like 16K, u16 bitmaps waste too much space.
> 
> Here we introduce a new helper structure, btrfs_subpage_bitmap_info, to
> record the proper bitmap size, and where each bitmap should start at.
> 
> By this, we can later compact all subpage bitmaps into one u32 bitmap.
> 
> This patch is the first step towards such compact bitmap.
> 
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
>  fs/btrfs/ctree.h   |  1 +
>  fs/btrfs/disk-io.c | 12 ++++++++++--
>  fs/btrfs/subpage.c | 35 +++++++++++++++++++++++++++++++++++
>  fs/btrfs/subpage.h | 28 ++++++++++++++++++++++++++++
>  4 files changed, 74 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 4a69aa604ac5..a98fd6a24113 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -898,6 +898,7 @@ struct btrfs_fs_info {
>  	struct btrfs_workqueue *scrub_workers;
>  	struct btrfs_workqueue *scrub_wr_completion_workers;
>  	struct btrfs_workqueue *scrub_parity_workers;
> +	struct btrfs_subpage_info *subpage_info;
>  
>  	struct btrfs_discard_ctl discard_ctl;
>  
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 1510a9d92858..f35b875f9e53 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -1644,6 +1644,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
>  	btrfs_extent_buffer_leak_debug_check(fs_info);
>  	kfree(fs_info->super_copy);
>  	kfree(fs_info->super_for_commit);
> +	kfree(fs_info->subpage_info);
>  	kvfree(fs_info);
>  }
>  
> @@ -3393,11 +3394,13 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
>  	}
>  
>  	if (sectorsize != PAGE_SIZE) {
> +		struct btrfs_subpage_info *subpage_info;
> +
> +		ASSERT(sectorsize < PAGE_SIZE);

nit: Simply make the check sectorsize < PAGE_SIZE and that renders the
assert redundant.

> +
>  		btrfs_warn(fs_info,
>  	"read-write for sector size %u with page size %lu is experimental",
>  			   sectorsize, PAGE_SIZE);
> -	}
> -	if (sectorsize != PAGE_SIZE) {
>  		if (btrfs_super_incompat_flags(fs_info->super_copy) &
>  			BTRFS_FEATURE_INCOMPAT_RAID56) {
>  			btrfs_err(fs_info,
> @@ -3406,6 +3409,11 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
>  			err = -EINVAL;
>  			goto fail_alloc;
>  		}
> +		subpage_info = kzalloc(sizeof(*subpage_info), GFP_NOFS);
> +		if (!subpage_info)
> +			goto fail_alloc;
> +		btrfs_init_subpage_info(subpage_info, sectorsize);
> +		fs_info->subpage_info = subpage_info;
>  	}
>  
>  	ret = btrfs_init_workqueues(fs_info, fs_devices);
> diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
> index a61aa33aeeee..014256d47beb 100644
> --- a/fs/btrfs/subpage.c
> +++ b/fs/btrfs/subpage.c
> @@ -63,6 +63,41 @@
>   *   This means a slightly higher tree locking latency.
>   */
>  
> +void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info,
> +			     u32 sectorsize)
> +{
> +	unsigned int cur = 0;
> +	unsigned int nr_bits;
> +
> +	/*
> +	 * Just in case we have super large PAGE_SIZE that unsigned int is not
> +	 * enough to contain the number of sectors for the minimal sectorsize.
> +	 */
> +	BUILD_BUG_ON(UINT_MAX * SZ_4K < PAGE_SIZE);
> +
> +	ASSERT(IS_ALIGNED(PAGE_SIZE, sectorsize));
> +
> +	nr_bits = PAGE_SIZE / sectorsize;
> +	subpage_info->bitmap_nr_bits = nr_bits;
> +
> +	subpage_info->uptodate_start = cur;
> +	cur += nr_bits;
> +
> +	subpage_info->error_start = cur;
> +	cur += nr_bits;
> +
> +	subpage_info->dirty_start = cur;
> +	cur += nr_bits;
> +
> +	subpage_info->writeback_start = cur;
> +	cur += nr_bits;
> +
> +	subpage_info->ordered_start = cur;
> +	cur += nr_bits;
> +
> +	subpage_info->total_nr_bits = cur;

So those values are really consts, however due to them being allocated
on the heap you can't simply define them as const and initialize them.
What a bummer...


On the other hand the namings are a bit generic, those are really
offsets into subpage->bitmaps. As such I'd prefer something along the
lines of
writeback_bitmap_offset
ordered_bitmap_offset etc.

Also I believe a graphical representation is in order i.e


[u][u][u][u][e][e][e][e][e]
^			  ^
|-uptodate_start	  |- error_start etc

Since it's a bit unexpected to have multiple, logically independent
bitmaps be tracked in the same physical location.
> +}
> +
>  int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
>  			 struct page *page, enum btrfs_subpage_type type)
>  {
> diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h
> index 9aa40d795ba9..ea90ba42c97b 100644
> --- a/fs/btrfs/subpage.h
> +++ b/fs/btrfs/subpage.h
> @@ -11,6 +11,32 @@
>   */
>  #define BTRFS_SUBPAGE_BITMAP_SIZE	16
>  
> +/*
> + * Extra info for subpapge bitmap.
> + *
> + * For subpage we integrate all uptodate/error/dirty/writeback/ordered
> + * bitmaps into one larger bitmap.
> + * This structure records the basic info.
> + */
> +struct btrfs_subpage_info {
> +	/* Number of bits for each bitmap*/
> +	unsigned int bitmap_nr_bits;
> +
> +	/* Total number of bits for the whole bitmap */
> +	unsigned int total_nr_bits;
> +
> +	/*
> +	 * *_start indicates where the bitmap starts, the length
> +	 * is always @bitmap_size, which is calculated from
> +	 * PAGE_SIZE / sectorsize.
> +	 */
> +	unsigned int uptodate_start;
> +	unsigned int error_start;
> +	unsigned int dirty_start;
> +	unsigned int writeback_start;
> +	unsigned int ordered_start;
> +};
> +
>  /*
>   * Structure to trace status of each sector inside a page, attached to
>   * page::private for both data and metadata inodes.
> @@ -53,6 +79,8 @@ enum btrfs_subpage_type {
>  	BTRFS_SUBPAGE_DATA,
>  };
>  
> +void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info,
> +			     u32 sectorsize);
>  int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
>  			 struct page *page, enum btrfs_subpage_type type);
>  void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info,
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] btrfs: introduce btrfs_subpage_bitmap_info
  2021-08-16  9:28   ` Nikolay Borisov
@ 2021-08-16 10:12     ` Qu Wenruo
  2021-08-16 10:17       ` David Sterba
  0 siblings, 1 reply; 10+ messages in thread
From: Qu Wenruo @ 2021-08-16 10:12 UTC (permalink / raw)
  To: Nikolay Borisov, Qu Wenruo, linux-btrfs



On 2021/8/16 下午5:28, Nikolay Borisov wrote:
>
>
[...]
>>
>>   	if (sectorsize != PAGE_SIZE) {
>> +		struct btrfs_subpage_info *subpage_info;
>> +
>> +		ASSERT(sectorsize < PAGE_SIZE);
>
> nit: Simply make the check sectorsize < PAGE_SIZE and that renders the
> assert redundant.

I guess that's the way to go.

Since I don't believe we will support multi-page sectorsize anyway, we
don't need to bother sectorsize > PAGE_SIZE forever.
Thus we can forget the sectorsize > PAGE_SIZE case.

[...]
>> +
>> +	subpage_info->total_nr_bits = cur;
>
> So those values are really consts, however due to them being allocated
> on the heap you can't simply define them as const and initialize them.
> What a bummer...

Yeah, that's the limit of C.

>
>
> On the other hand the namings are a bit generic, those are really
> offsets into subpage->bitmaps. As such I'd prefer something along the
> lines of
> writeback_bitmap_offset
> ordered_bitmap_offset etc.

The *_bitmap_offset seems a little long, especially in the 2nd patch
we're already introducing too many new lines just to handle the *_start
naming.

Especially all these members are used with thing like
fs_info->subpage_info->uptodate_start.

What about the name "uptodate_offset"?

>
> Also I believe a graphical representation is in order i.e
>
>
> [u][u][u][u][e][e][e][e][e]
> ^			  ^
> |-uptodate_start	  |- error_start etc

That looks awesome.

>
> Since it's a bit unexpected to have multiple, logically independent
> bitmaps be tracked in the same physical location.

That's also I'm concerning of.

I haven't seen other code sides doing the same behavior.
IOmap just waste all the memory by going full u32 bitmap even for case
like 16K page size and 4K sectorsize, exactly I want to avoid.

Thanks,
Qu

>> +}
>> +
>>   int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
>>   			 struct page *page, enum btrfs_subpage_type type)
>>   {
>> diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h
>> index 9aa40d795ba9..ea90ba42c97b 100644
>> --- a/fs/btrfs/subpage.h
>> +++ b/fs/btrfs/subpage.h
>> @@ -11,6 +11,32 @@
>>    */
>>   #define BTRFS_SUBPAGE_BITMAP_SIZE	16
>>
>> +/*
>> + * Extra info for subpapge bitmap.
>> + *
>> + * For subpage we integrate all uptodate/error/dirty/writeback/ordered
>> + * bitmaps into one larger bitmap.
>> + * This structure records the basic info.
>> + */
>> +struct btrfs_subpage_info {
>> +	/* Number of bits for each bitmap*/
>> +	unsigned int bitmap_nr_bits;
>> +
>> +	/* Total number of bits for the whole bitmap */
>> +	unsigned int total_nr_bits;
>> +
>> +	/*
>> +	 * *_start indicates where the bitmap starts, the length
>> +	 * is always @bitmap_size, which is calculated from
>> +	 * PAGE_SIZE / sectorsize.
>> +	 */
>> +	unsigned int uptodate_start;
>> +	unsigned int error_start;
>> +	unsigned int dirty_start;
>> +	unsigned int writeback_start;
>> +	unsigned int ordered_start;
>> +};
>> +
>>   /*
>>    * Structure to trace status of each sector inside a page, attached to
>>    * page::private for both data and metadata inodes.
>> @@ -53,6 +79,8 @@ enum btrfs_subpage_type {
>>   	BTRFS_SUBPAGE_DATA,
>>   };
>>
>> +void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info,
>> +			     u32 sectorsize);
>>   int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
>>   			 struct page *page, enum btrfs_subpage_type type);
>>   void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info,
>>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/2] btrfs: introduce btrfs_subpage_bitmap_info
  2021-08-16 10:12     ` Qu Wenruo
@ 2021-08-16 10:17       ` David Sterba
  0 siblings, 0 replies; 10+ messages in thread
From: David Sterba @ 2021-08-16 10:17 UTC (permalink / raw)
  To: Qu Wenruo; +Cc: Nikolay Borisov, Qu Wenruo, linux-btrfs

On Mon, Aug 16, 2021 at 06:12:16PM +0800, Qu Wenruo wrote:
> > Also I believe a graphical representation is in order i.e
> >
> > [u][u][u][u][e][e][e][e][e]
> > ^			  ^
> > |-uptodate_start	  |- error_start etc
> 
> That looks awesome.
> 
> > Since it's a bit unexpected to have multiple, logically independent
> > bitmaps be tracked in the same physical location.
> 
> That's also I'm concerning of.
> 
> I haven't seen other code sides doing the same behavior.
> IOmap just waste all the memory by going full u32 bitmap even for case
> like 16K page size and 4K sectorsize, exactly I want to avoid.

Yeah, it is mixing independent things in one structure, but that is to
decrease the waste and memory consumption. The subpage is attached to
each page, that come in large numbers so this is a justified
optimization, even if it's not "conceptually clean".

In the first implementation there were 4 bitmaps, so we see how things
work without the optimization. The switch to one bitmap was about to
happen once we get things working.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] btrfs: subpage: pack all subpage bitmaps into a larger bitmap
  2021-08-16  6:00 ` [PATCH 2/2] btrfs: subpage: pack all subpage bitmaps into a larger bitmap Qu Wenruo
@ 2021-08-16 10:26   ` Nikolay Borisov
  2021-08-16 13:41     ` Qu Wenruo
  0 siblings, 1 reply; 10+ messages in thread
From: Nikolay Borisov @ 2021-08-16 10:26 UTC (permalink / raw)
  To: Qu Wenruo, linux-btrfs



On 16.08.21 г. 9:00, Qu Wenruo wrote:
> Currently we use u16 bitmap to make 4k sectorsize work for 64K page
> size.
> 
> But this u16 bitmap is not large enough to contain larger page size like
> 128K, nor is space efficient for 16K page size.
> 
> To handle both cases, here we pack all subpage bitmaps into a larger
> bitmap, now btrfs_subpage::bitmaps[] will be the ultimate bitmap for
> subpage usage.
> 
> Each sub-bitmap will has its start bit number recorded in
> btrfs_subpage_info::*_start, and its bitmap length will be recorded in
> btrfs_subpage_info::bitmap_nr_bits.
> 
> All subpage bitmap operations will be converted from using direct u16
> operations to bitmap operations, with above *_start calculated.
> 
> For 64K page size with 4K sectorsize, this should not cause much difference.
> While for 16K page size, we will only need 1 unsigned long (u32) to
> restore the bitmap.
> And will be able to support 128K page size in the future.
> 
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
>  fs/btrfs/extent_io.c |  58 ++++++++++--------
>  fs/btrfs/subpage.c   | 137 +++++++++++++++++++++++++++++--------------
>  fs/btrfs/subpage.h   |  19 +-----
>  3 files changed, 130 insertions(+), 84 deletions(-)
> 
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 543f87ea372e..e428d6208bb7 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -3865,10 +3865,9 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info,
>  	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
>  	u64 orig_start = *start;
>  	/* Declare as unsigned long so we can use bitmap ops */
> -	unsigned long dirty_bitmap;
>  	unsigned long flags;
> -	int nbits = (orig_start - page_offset(page)) >> fs_info->sectorsize_bits;
> -	int range_start_bit = nbits;
> +	int nbits = offset_in_page(orig_start) >> fs_info->sectorsize_bits;

nbits is rather dubious, by reading it I'd expect it to hold a
count/number of bits. In fact it holds a sector index. A better name
would be sector_index or block_index.

> +	int range_start_bit = nbits + fs_info->subpage_info->dirty_start;
>  	int range_end_bit;
>  
>  	/*
> @@ -3883,11 +3882,14 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info,
>  
>  	/* We should have the page locked, but just in case */
>  	spin_lock_irqsave(&subpage->lock, flags);
> -	dirty_bitmap = subpage->dirty_bitmap;
> +	bitmap_next_set_region(subpage->bitmaps, &range_start_bit, &range_end_bit,
> +			       fs_info->subpage_info->dirty_start +
> +			       fs_info->subpage_info->bitmap_nr_bits);
>  	spin_unlock_irqrestore(&subpage->lock, flags);
>  
> -	bitmap_next_set_region(&dirty_bitmap, &range_start_bit, &range_end_bit,
> -			       BTRFS_SUBPAGE_BITMAP_SIZE);
> +	range_start_bit -= fs_info->subpage_info->dirty_start;
> +	range_end_bit -= fs_info->subpage_info->dirty_start;
> +
>  	*start = page_offset(page) + range_start_bit * fs_info->sectorsize;
>  	*end = page_offset(page) + range_end_bit * fs_info->sectorsize;
>  }
> @@ -4613,7 +4615,7 @@ static int submit_eb_subpage(struct page *page,
>  	int submitted = 0;
>  	u64 page_start = page_offset(page);
>  	int bit_start = 0;
> -	const int nbits = BTRFS_SUBPAGE_BITMAP_SIZE;
> +	const int nbits = fs_info->subpage_info->bitmap_nr_bits;

nit: do we really need nbits, given that
fs_info->subpage_info->bitmap_nr_bits fits on one line inside the while
expression. Also see the discrepancy - nbits here indeed holds a "number
of bits value".

>  	int sectors_per_node = fs_info->nodesize >> fs_info->sectorsize_bits;
>  	int ret;
>  

<snip>

> @@ -7178,32 +7181,41 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
>  	}
>  }
>  
> +#define GANG_LOOKUP_SIZE	16
>  static struct extent_buffer *get_next_extent_buffer(
>  		struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
>  {
> -	struct extent_buffer *gang[BTRFS_SUBPAGE_BITMAP_SIZE];
> +	struct extent_buffer *gang[GANG_LOOKUP_SIZE];
>  	struct extent_buffer *found = NULL;
>  	u64 page_start = page_offset(page);
> -	int ret;
> -	int i;
> +	u64 cur = page_start;
>  
>  	ASSERT(in_range(bytenr, page_start, PAGE_SIZE));
> -	ASSERT(PAGE_SIZE / fs_info->nodesize <= BTRFS_SUBPAGE_BITMAP_SIZE);
>  	lockdep_assert_held(&fs_info->buffer_lock);
>  
> -	ret = radix_tree_gang_lookup(&fs_info->buffer_radix, (void **)gang,
> -			bytenr >> fs_info->sectorsize_bits,
> -			PAGE_SIZE / fs_info->nodesize);
> -	for (i = 0; i < ret; i++) {
> -		/* Already beyond page end */
> -		if (gang[i]->start >= page_start + PAGE_SIZE)
> -			break;
> -		/* Found one */
> -		if (gang[i]->start >= bytenr) {
> -			found = gang[i];
> -			break;
> +	while (cur < page_start + PAGE_SIZE) {
> +		int ret;
> +		int i;
> +
> +		ret = radix_tree_gang_lookup(&fs_info->buffer_radix,
> +				(void **)gang, cur >> fs_info->sectorsize_bits,
> +				min_t(unsigned int, GANG_LOOKUP_SIZE,
> +				      PAGE_SIZE / fs_info->nodesize));
> +		if (ret == 0)
> +			goto out;
> +		for (i = 0; i < ret; i++) {
> +			/* Already beyond page end */
> +			if (gang[i]->start >= page_start + PAGE_SIZE)
> +				goto out;
> +			/* Found one */
> +			if (gang[i]->start >= bytenr) {
> +				found = gang[i];
> +				break;

Instead of break, don't you want to do got out? Break would exit from
the inner for() loop and continue at the outter while loop.

> +			}
>  		}
> +		cur = gang[ret - 1]->start + gang[ret - 1]->len;
>  	}
> +out:
>  	return found;
>  }
>  
> diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
> index 014256d47beb..9d6da9f2d77e 100644
> --- a/fs/btrfs/subpage.c
> +++ b/fs/btrfs/subpage.c
> @@ -139,10 +139,14 @@ int btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
>  			struct btrfs_subpage **ret,
>  			enum btrfs_subpage_type type)
>  {
> +	unsigned int real_size;
> +
>  	if (fs_info->sectorsize == PAGE_SIZE)
>  		return 0;

offtopic: Instead of having a bunch of those checks can't we replace
them with ASSERTS and ensure that the decision whether we do subpage or
not is taken at a higher level ?
>  
> -	*ret = kzalloc(sizeof(struct btrfs_subpage), GFP_NOFS);
> +	real_size = BITS_TO_LONGS(fs_info->subpage_info->total_nr_bits) *
> +		    sizeof(unsigned long) + sizeof(struct btrfs_subpage);

real_size = struct_size(*ret, bitmaps,
BITS_TO_LONGS(fs_info->subpage_info->total_nr_bits));

And the calling convention for this function is awful. Just make it
return struct btrfs_subpage * and kill the dreadful **ret....

> +	*ret = kzalloc(real_size, GFP_NOFS);
>  	if (!*ret)
>  		return -ENOMEM;
>  	spin_lock_init(&(*ret)->lock);
> @@ -324,37 +328,60 @@ void btrfs_page_end_writer_lock(const struct btrfs_fs_info *fs_info,
>  		unlock_page(page);
>  }
>  
> -/*
> - * Convert the [start, start + len) range into a u16 bitmap
> - *
> - * For example: if start == page_offset() + 16K, len = 16K, we get 0x00f0.
> - */
> -static u16 btrfs_subpage_calc_bitmap(const struct btrfs_fs_info *fs_info,
> -		struct page *page, u64 start, u32 len)
> +static bool bitmap_test_range_all_set(unsigned long *addr, unsigned start,
> +				      unsigned int nbits)
>  {
> -	const int bit_start = offset_in_page(start) >> fs_info->sectorsize_bits;
> -	const int nbits = len >> fs_info->sectorsize_bits;
> +	unsigned int found_zero;
>  
> -	btrfs_subpage_assert(fs_info, page, start, len);
> +	found_zero = find_next_zero_bit(addr, start + nbits, start);

2 argument of find_next_zero_bit is 'size' which would be nbits as it
expects the size to be in bits , not start + nbit. Every logical bitmap
in ->bitmaps is defined by [start, nbits] no ? Unfortunately there is a
discrepancy between the order of documentation and the order of actual
arguments in the definition of this function....

> +	if (found_zero == start + nbits)
> +		return true;
> +	return false;
> +}
>  
> -	/*
> -	 * Here nbits can be 16, thus can go beyond u16 range. We make the
> -	 * first left shift to be calculate in unsigned long (at least u32),
> -	 * then truncate the result to u16.
> -	 */
> -	return (u16)(((1UL << nbits) - 1) << bit_start);
> +static bool bitmap_test_range_all_zero(unsigned long *addr, unsigned start,
> +				       unsigned int nbits)
> +{
> +	unsigned int found_set;
> +
> +	found_set = find_next_bit(addr, start + nbits, start);
> +	if (found_set == start + nbits)
> +		return true;
> +	return false;

This can be implemented by simply doing !bitmap_test_range_all_set no
need to have 2 separate implementations. Given those 2 functions are
only used in the definition oof the subpage_test_bitmap* macros I'da say
remove one of them and simply code the macros as
bitmap_test_range_all_set and !bitmap_test_range_all_set respectively.

>  }
>  

<snip>


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] btrfs: subpage: pack all subpage bitmaps into a larger bitmap
  2021-08-16 10:26   ` Nikolay Borisov
@ 2021-08-16 13:41     ` Qu Wenruo
  2021-08-16 14:27       ` Nikolay Borisov
  0 siblings, 1 reply; 10+ messages in thread
From: Qu Wenruo @ 2021-08-16 13:41 UTC (permalink / raw)
  To: Nikolay Borisov, Qu Wenruo, linux-btrfs



On 2021/8/16 下午6:26, Nikolay Borisov wrote:
>
>
> On 16.08.21 г. 9:00, Qu Wenruo wrote:
>> Currently we use u16 bitmap to make 4k sectorsize work for 64K page
>> size.
>>
>> But this u16 bitmap is not large enough to contain larger page size like
>> 128K, nor is space efficient for 16K page size.
>>
>> To handle both cases, here we pack all subpage bitmaps into a larger
>> bitmap, now btrfs_subpage::bitmaps[] will be the ultimate bitmap for
>> subpage usage.
>>
>> Each sub-bitmap will has its start bit number recorded in
>> btrfs_subpage_info::*_start, and its bitmap length will be recorded in
>> btrfs_subpage_info::bitmap_nr_bits.
>>
>> All subpage bitmap operations will be converted from using direct u16
>> operations to bitmap operations, with above *_start calculated.
>>
>> For 64K page size with 4K sectorsize, this should not cause much difference.
>> While for 16K page size, we will only need 1 unsigned long (u32) to
>> restore the bitmap.
>> And will be able to support 128K page size in the future.
>>
>> Signed-off-by: Qu Wenruo <wqu@suse.com>
>> ---
>>   fs/btrfs/extent_io.c |  58 ++++++++++--------
>>   fs/btrfs/subpage.c   | 137 +++++++++++++++++++++++++++++--------------
>>   fs/btrfs/subpage.h   |  19 +-----
>>   3 files changed, 130 insertions(+), 84 deletions(-)
>>
>> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
>> index 543f87ea372e..e428d6208bb7 100644
>> --- a/fs/btrfs/extent_io.c
>> +++ b/fs/btrfs/extent_io.c
>> @@ -3865,10 +3865,9 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info,
>>   	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
>>   	u64 orig_start = *start;
>>   	/* Declare as unsigned long so we can use bitmap ops */
>> -	unsigned long dirty_bitmap;
>>   	unsigned long flags;
>> -	int nbits = (orig_start - page_offset(page)) >> fs_info->sectorsize_bits;
>> -	int range_start_bit = nbits;
>> +	int nbits = offset_in_page(orig_start) >> fs_info->sectorsize_bits;
>
> nbits is rather dubious, by reading it I'd expect it to hold a
> count/number of bits. In fact it holds a sector index. A better name
> would be sector_index or block_index.

Not sure if "sector" in the context of bitmap is preferred.

But since nbits is only used once, deleting nbits completely would be
better.

>
>> +	int range_start_bit = nbits + fs_info->subpage_info->dirty_start;
>>   	int range_end_bit;
>>
>>   	/*
>> @@ -3883,11 +3882,14 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info,
>>
>>   	/* We should have the page locked, but just in case */
>>   	spin_lock_irqsave(&subpage->lock, flags);
>> -	dirty_bitmap = subpage->dirty_bitmap;
>> +	bitmap_next_set_region(subpage->bitmaps, &range_start_bit, &range_end_bit,
>> +			       fs_info->subpage_info->dirty_start +
>> +			       fs_info->subpage_info->bitmap_nr_bits);
>>   	spin_unlock_irqrestore(&subpage->lock, flags);
>>
>> -	bitmap_next_set_region(&dirty_bitmap, &range_start_bit, &range_end_bit,
>> -			       BTRFS_SUBPAGE_BITMAP_SIZE);
>> +	range_start_bit -= fs_info->subpage_info->dirty_start;
>> +	range_end_bit -= fs_info->subpage_info->dirty_start;
>> +
>>   	*start = page_offset(page) + range_start_bit * fs_info->sectorsize;
>>   	*end = page_offset(page) + range_end_bit * fs_info->sectorsize;
>>   }
>> @@ -4613,7 +4615,7 @@ static int submit_eb_subpage(struct page *page,
>>   	int submitted = 0;
>>   	u64 page_start = page_offset(page);
>>   	int bit_start = 0;
>> -	const int nbits = BTRFS_SUBPAGE_BITMAP_SIZE;
>> +	const int nbits = fs_info->subpage_info->bitmap_nr_bits;
>
> nit: do we really need nbits, given that
> fs_info->subpage_info->bitmap_nr_bits fits on one line inside the while
> expression. Also see the discrepancy - nbits here indeed holds a "number
> of bits value".

Yeah, deleteing nbits completely here would be better.

>
>>   	int sectors_per_node = fs_info->nodesize >> fs_info->sectorsize_bits;
>>   	int ret;
>>
>
> <snip>
>
>> @@ -7178,32 +7181,41 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
>>   	}
>>   }
>>
>> +#define GANG_LOOKUP_SIZE	16
>>   static struct extent_buffer *get_next_extent_buffer(
>>   		struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
>>   {
>> -	struct extent_buffer *gang[BTRFS_SUBPAGE_BITMAP_SIZE];
>> +	struct extent_buffer *gang[GANG_LOOKUP_SIZE];
>>   	struct extent_buffer *found = NULL;
>>   	u64 page_start = page_offset(page);
>> -	int ret;
>> -	int i;
>> +	u64 cur = page_start;
>>
>>   	ASSERT(in_range(bytenr, page_start, PAGE_SIZE));
>> -	ASSERT(PAGE_SIZE / fs_info->nodesize <= BTRFS_SUBPAGE_BITMAP_SIZE);
>>   	lockdep_assert_held(&fs_info->buffer_lock);
>>
>> -	ret = radix_tree_gang_lookup(&fs_info->buffer_radix, (void **)gang,
>> -			bytenr >> fs_info->sectorsize_bits,
>> -			PAGE_SIZE / fs_info->nodesize);
>> -	for (i = 0; i < ret; i++) {
>> -		/* Already beyond page end */
>> -		if (gang[i]->start >= page_start + PAGE_SIZE)
>> -			break;
>> -		/* Found one */
>> -		if (gang[i]->start >= bytenr) {
>> -			found = gang[i];
>> -			break;
>> +	while (cur < page_start + PAGE_SIZE) {
>> +		int ret;
>> +		int i;
>> +
>> +		ret = radix_tree_gang_lookup(&fs_info->buffer_radix,
>> +				(void **)gang, cur >> fs_info->sectorsize_bits,
>> +				min_t(unsigned int, GANG_LOOKUP_SIZE,
>> +				      PAGE_SIZE / fs_info->nodesize));
>> +		if (ret == 0)
>> +			goto out;
>> +		for (i = 0; i < ret; i++) {
>> +			/* Already beyond page end */
>> +			if (gang[i]->start >= page_start + PAGE_SIZE)
>> +				goto out;
>> +			/* Found one */
>> +			if (gang[i]->start >= bytenr) {
>> +				found = gang[i];
>> +				break;
>
> Instead of break, don't you want to do got out? Break would exit from
> the inner for() loop and continue at the outter while loop.

Damn, why my subpage tests didn't catch the problem...

I guess since the function is only called in
try_release_subpage_extent_buffer(), and it only needs to exhaust all
the ebs of the page, it doesn't really care about if we returned some
wrong eb.

But still, it should be "goto out".

Thanks for catching this one.

>
>> +			}
>>   		}
>> +		cur = gang[ret - 1]->start + gang[ret - 1]->len;
>>   	}
>> +out:
>>   	return found;
>>   }
>>
>> diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
>> index 014256d47beb..9d6da9f2d77e 100644
>> --- a/fs/btrfs/subpage.c
>> +++ b/fs/btrfs/subpage.c
>> @@ -139,10 +139,14 @@ int btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
>>   			struct btrfs_subpage **ret,
>>   			enum btrfs_subpage_type type)
>>   {
>> +	unsigned int real_size;
>> +
>>   	if (fs_info->sectorsize == PAGE_SIZE)
>>   		return 0;
>
> offtopic: Instead of having a bunch of those checks can't we replace
> them with ASSERTS and ensure that the decision whether we do subpage or
> not is taken at a higher level ?

Nope, in this particular call site, btrfs_alloc_subpage() can be called
with regular page size.

I guess it's better to rename this function like btrfs_prepare_page()?

>>
>> -	*ret = kzalloc(sizeof(struct btrfs_subpage), GFP_NOFS);
>> +	real_size = BITS_TO_LONGS(fs_info->subpage_info->total_nr_bits) *
>> +		    sizeof(unsigned long) + sizeof(struct btrfs_subpage);
>
> real_size = struct_size(*ret, bitmaps,
> BITS_TO_LONGS(fs_info->subpage_info->total_nr_bits));
>
> And the calling convention for this function is awful. Just make it
> return struct btrfs_subpage * and kill the dreadful **ret....

Above two advices indeed sound much better.

>
>> +	*ret = kzalloc(real_size, GFP_NOFS);
>>   	if (!*ret)
>>   		return -ENOMEM;
>>   	spin_lock_init(&(*ret)->lock);
>> @@ -324,37 +328,60 @@ void btrfs_page_end_writer_lock(const struct btrfs_fs_info *fs_info,
>>   		unlock_page(page);
>>   }
>>
>> -/*
>> - * Convert the [start, start + len) range into a u16 bitmap
>> - *
>> - * For example: if start == page_offset() + 16K, len = 16K, we get 0x00f0.
>> - */
>> -static u16 btrfs_subpage_calc_bitmap(const struct btrfs_fs_info *fs_info,
>> -		struct page *page, u64 start, u32 len)
>> +static bool bitmap_test_range_all_set(unsigned long *addr, unsigned start,
>> +				      unsigned int nbits)
>>   {
>> -	const int bit_start = offset_in_page(start) >> fs_info->sectorsize_bits;
>> -	const int nbits = len >> fs_info->sectorsize_bits;
>> +	unsigned int found_zero;
>>
>> -	btrfs_subpage_assert(fs_info, page, start, len);
>> +	found_zero = find_next_zero_bit(addr, start + nbits, start);
>
> 2 argument of find_next_zero_bit is 'size' which would be nbits as it
> expects the size to be in bits , not start + nbit. Every logical bitmap
> in ->bitmaps is defined by [start, nbits] no ? Unfortunately there is a
> discrepancy between the order of documentation and the order of actual
> arguments in the definition of this function....

IT'S A TRAP!

Paramater 2 (@size) is the total size of the search range, it should be
larger than the 3rd parameter.

You can check the _find_next_bit(), where there are a lot of @start >=
@nbits checks.

I guess this is the tricky part when we pack the bitmaps into one larger
bitmap...

>
>> +	if (found_zero == start + nbits)
>> +		return true;
>> +	return false;
>> +}
>>
>> -	/*
>> -	 * Here nbits can be 16, thus can go beyond u16 range. We make the
>> -	 * first left shift to be calculate in unsigned long (at least u32),
>> -	 * then truncate the result to u16.
>> -	 */
>> -	return (u16)(((1UL << nbits) - 1) << bit_start);
>> +static bool bitmap_test_range_all_zero(unsigned long *addr, unsigned start,
>> +				       unsigned int nbits)
>> +{
>> +	unsigned int found_set;
>> +
>> +	found_set = find_next_bit(addr, start + nbits, start);
>> +	if (found_set == start + nbits)
>> +		return true;
>> +	return false;
>
> This can be implemented by simply doing !bitmap_test_range_all_set no
> need to have 2 separate implementations. Given those 2 functions are
> only used in the definition oof the subpage_test_bitmap* macros I'da say
> remove one of them and simply code the macros as
> bitmap_test_range_all_set and !bitmap_test_range_all_set respectively.

TRAP CARD!

bitmap_test_range_all_set() will only return 1 if all bits in the range
are 1.

But bitmap_test_range_all_zero() should only return 1 if all bits in the
range are 0.

What if only part of the range is 1?

Thanks,
Qu


>
>>   }
>>
>
> <snip>
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] btrfs: subpage: pack all subpage bitmaps into a larger bitmap
  2021-08-16 13:41     ` Qu Wenruo
@ 2021-08-16 14:27       ` Nikolay Borisov
  2021-08-16 23:18         ` Qu Wenruo
  0 siblings, 1 reply; 10+ messages in thread
From: Nikolay Borisov @ 2021-08-16 14:27 UTC (permalink / raw)
  To: Qu Wenruo, Qu Wenruo, linux-btrfs



On 16.08.21 г. 16:41, Qu Wenruo wrote:
> 
> 
> On 2021/8/16 下午6:26, Nikolay Borisov wrote:
>>
>>
>> On 16.08.21 г. 9:00, Qu Wenruo wrote:
>>> Currently we use u16 bitmap to make 4k sectorsize work for 64K page
>>> size.
>>>
>>> But this u16 bitmap is not large enough to contain larger page size like
>>> 128K, nor is space efficient for 16K page size.
>>>
>>> To handle both cases, here we pack all subpage bitmaps into a larger
>>> bitmap, now btrfs_subpage::bitmaps[] will be the ultimate bitmap for
>>> subpage usage.
>>>
>>> Each sub-bitmap will has its start bit number recorded in
>>> btrfs_subpage_info::*_start, and its bitmap length will be recorded in
>>> btrfs_subpage_info::bitmap_nr_bits.
>>>
>>> All subpage bitmap operations will be converted from using direct u16
>>> operations to bitmap operations, with above *_start calculated.
>>>
>>> For 64K page size with 4K sectorsize, this should not cause much
>>> difference.
>>> While for 16K page size, we will only need 1 unsigned long (u32) to
>>> restore the bitmap.
>>> And will be able to support 128K page size in the future.
>>>

<snip>

>>
>> offtopic: Instead of having a bunch of those checks can't we replace
>> them with ASSERTS and ensure that the decision whether we do subpage or
>> not is taken at a higher level ?
> 
> Nope, in this particular call site, btrfs_alloc_subpage() can be called
> with regular page size.
> 
> I guess it's better to rename this function like btrfs_prepare_page()?

There are currently 2 callers:

btrfs_attach_subpage - this one only calls it iff sectorsize != alloc_subage

alloc_extent_buffer - here it's called unconditionally but that can
easily be rectified with an if(subpage) check

<snip>


>> 2 argument of find_next_zero_bit is 'size' which would be nbits as it
>> expects the size to be in bits , not start + nbit. Every logical bitmap
>> in ->bitmaps is defined by [start, nbits] no ? Unfortunately there is a
>> discrepancy between the order of documentation and the order of actual
>> arguments in the definition of this function....
> 
> IT'S A TRAP!
> 
> Paramater 2 (@size) is the total size of the search range, it should be
> larger than the 3rd parameter.

It's not even that it's the end index of the bit we are looking for. I.e
if we want to check bits 20-29 we'd pass 20 as start, and 29 as size ...
This is fucked, but it is what it is. I guess the documentation of the
bits function is dodgy...

<snip>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] btrfs: subpage: pack all subpage bitmaps into a larger bitmap
  2021-08-16 14:27       ` Nikolay Borisov
@ 2021-08-16 23:18         ` Qu Wenruo
  0 siblings, 0 replies; 10+ messages in thread
From: Qu Wenruo @ 2021-08-16 23:18 UTC (permalink / raw)
  To: Nikolay Borisov, Qu Wenruo, linux-btrfs



On 2021/8/16 下午10:27, Nikolay Borisov wrote:
>
>
> On 16.08.21 г. 16:41, Qu Wenruo wrote:
>>
>>
>> On 2021/8/16 下午6:26, Nikolay Borisov wrote:
>>>
>>>
>>> On 16.08.21 г. 9:00, Qu Wenruo wrote:
>>>> Currently we use u16 bitmap to make 4k sectorsize work for 64K page
>>>> size.
>>>>
>>>> But this u16 bitmap is not large enough to contain larger page size like
>>>> 128K, nor is space efficient for 16K page size.
>>>>
>>>> To handle both cases, here we pack all subpage bitmaps into a larger
>>>> bitmap, now btrfs_subpage::bitmaps[] will be the ultimate bitmap for
>>>> subpage usage.
>>>>
>>>> Each sub-bitmap will has its start bit number recorded in
>>>> btrfs_subpage_info::*_start, and its bitmap length will be recorded in
>>>> btrfs_subpage_info::bitmap_nr_bits.
>>>>
>>>> All subpage bitmap operations will be converted from using direct u16
>>>> operations to bitmap operations, with above *_start calculated.
>>>>
>>>> For 64K page size with 4K sectorsize, this should not cause much
>>>> difference.
>>>> While for 16K page size, we will only need 1 unsigned long (u32) to
>>>> restore the bitmap.
>>>> And will be able to support 128K page size in the future.
>>>>
>
> <snip>
>
>>>
>>> offtopic: Instead of having a bunch of those checks can't we replace
>>> them with ASSERTS and ensure that the decision whether we do subpage or
>>> not is taken at a higher level ?
>>
>> Nope, in this particular call site, btrfs_alloc_subpage() can be called
>> with regular page size.
>>
>> I guess it's better to rename this function like btrfs_prepare_page()?
>
> There are currently 2 callers:
>
> btrfs_attach_subpage - this one only calls it iff sectorsize != alloc_subage
>
> alloc_extent_buffer - here it's called unconditionally but that can
> easily be rectified with an if(subpage) check
>
> <snip>
>
>
>>> 2 argument of find_next_zero_bit is 'size' which would be nbits as it
>>> expects the size to be in bits , not start + nbit. Every logical bitmap
>>> in ->bitmaps is defined by [start, nbits] no ? Unfortunately there is a
>>> discrepancy between the order of documentation and the order of actual
>>> arguments in the definition of this function....
>>
>> IT'S A TRAP!
>>
>> Paramater 2 (@size) is the total size of the search range, it should be
>> larger than the 3rd parameter.
>
> It's not even that it's the end index of the bit we are looking for. I.e
> if we want to check bits 20-29 we'd pass 20 as start, and 29 as size ...
> This is fucked, but it is what it is. I guess the documentation of the
> bits function is dodgy...

Well, the size part is not correct IMHO.

Just a simple run could prove that:

number=0b0011100010111010101010001010011
find_next_zero_bit(&number, 3, 0)=2

In fact, if you pass 29 as size, it will not touch bit 29, and bit 28 is
the last bit to be touched.

As that function will return @size to indicate that there is no matching
bit found.

Thanks,
Qu

>
> <snip>
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-08-16 23:18 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-16  6:00 [PATCH 0/2] btrfs: subpage: pack all subpage bitmaps into a larger bitmap Qu Wenruo
2021-08-16  6:00 ` [PATCH 1/2] btrfs: introduce btrfs_subpage_bitmap_info Qu Wenruo
2021-08-16  9:28   ` Nikolay Borisov
2021-08-16 10:12     ` Qu Wenruo
2021-08-16 10:17       ` David Sterba
2021-08-16  6:00 ` [PATCH 2/2] btrfs: subpage: pack all subpage bitmaps into a larger bitmap Qu Wenruo
2021-08-16 10:26   ` Nikolay Borisov
2021-08-16 13:41     ` Qu Wenruo
2021-08-16 14:27       ` Nikolay Borisov
2021-08-16 23:18         ` Qu Wenruo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.