linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 2/2] btrfs: fix oops when leafsize is greator than nodesize
@ 2010-07-13  8:55 Miao Xie
  2010-07-14  9:11 ` Miao Xie
  0 siblings, 1 reply; 2+ messages in thread
From: Miao Xie @ 2010-07-13  8:55 UTC (permalink / raw)
  To: Chris Mason; +Cc: Linux Btrfs

oops occured when we ran the following commands on the filesystem whose
leafsize is greater than its nodesize.
  # mkfs.btrfs -l 8192 /dev/sda1
                  ^^^^ 2 * PAGESIZE
  # mount /dev/sda1 /mnt
  # cat /dev/zero > /mnt/tmp_file0
  # umount /mnt
  Oops occured.
(Sometimes we must do the loop of mount/umount several times to hit this bug)

Oops infomation:
------------[ cut here ]------------
WARNING: at fs/btrfs/extent_io.c:3685 copy_extent_buffer+0x48/0x192()
Hardware name: FFFFFFFFFF
Modules linked in: [snip]
Pid: 3743, comm: umount Not tainted 2.6.35-rc4 #22
Call Trace:
  [<ffffffff8103782e>] warn_slowpath_common+0x80/0x98
  [<ffffffff8103785b>] warn_slowpath_null+0x15/0x17
  [<ffffffff811713a3>] copy_extent_buffer+0x48/0x192
  [<ffffffff81140488>] __btrfs_cow_block+0x16f/0x559
  [<ffffffff81140ee7>] btrfs_cow_block+0x189/0x1a6
  [<ffffffff81157cf0>] commit_cowonly_roots+0x54/0x193
  [<ffffffff811587d5>] btrfs_commit_transaction+0x380/0x610
  [<ffffffff81050a67>] ? autoremove_wake_function+0x0/0x34
  [<ffffffff811554bf>] btrfs_commit_super+0xa2/0xc3
  [<ffffffff8115551a>] close_ctree+0x3a/0x330
  [<ffffffff810545e8>] ? up_write+0x1e/0x36
  [<ffffffff810f4818>] ? invalidate_inodes+0x120/0x132
  [<ffffffff8113b233>] btrfs_put_super+0x18/0x27
  [<ffffffff810e395c>] generic_shutdown_super+0x51/0xd2
  [<ffffffff810e3a28>] kill_anon_super+0x11/0x4f
  [<ffffffff810e2af8>] deactivate_locked_super+0x21/0x41
  [<ffffffff810e2cd0>] deactivate_super+0x40/0x44
  [<ffffffff810f7a4b>] mntput_no_expire+0xb8/0xe5
  [<ffffffff810f7fea>] sys_umount+0x2c8/0x2f3
  [<ffffffff8106062a>] ? trace_hardirqs_on_caller+0x10c/0x130
  [<ffffffff81001f2b>] system_call_fastpath+0x16/0x1b
---[ end trace 5f7f4dbc8bcadbc3 ]---

The reason is that:
In order to reuse the extent buffer, the btrfs doesn't release the extent
buffer, When the btrfs releases its device space. So if the btrfs allocates
the device space that has been released just now, the btrfs will get the old
extent buffer mapping to the device space.

But if the length of the new space is greator than the old extent buffer, a
BUG() will be touched.

The patch fixes this problem by change the length of the old extent buffer.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
---
  fs/btrfs/extent_io.c |   90 +++++++++++++++++++++++++++++++++++++++++++++++--
  1 files changed, 86 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 70b7cc5..b4f1c42 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3130,6 +3130,45 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
  	__free_extent_buffer(eb);
  }
  
+/*
+ * we may reuse a extent buffer whose device space has been released, if the len
+ * of the extent buffer is smaller than we expect, we must enlarge the extent
+ * buffer, and before doing that, we must release the extent buffer that
+ * intersects it.
+ *
+ * Don't worry about the state of the extent buffer that is going to be release.
+ * because it is just an image left in the memory, and its device space has been
+ * released, or the btrfs can't allocate its device space for other extent
+ * buffer.
+ *
+ * Note: Must hold io_tree->buffer_lock
+ */
+static int btrfs_enlarge_extent_buffer(struct extent_io_tree *tree,
+					struct extent_buffer *eb,
+					unsigned long newlen)
+{
+	struct rb_node *next;
+	struct extent_buffer *next_eb;
+
+	eb->len = newlen;
+	set_page_extent_head(eb->first_page, newlen);
+
+	next = rb_next(&eb->rb_node);
+	while (next) {
+		next_eb = rb_entry(next, struct extent_buffer, rb_node);
+		if (next_eb->start >= eb->start + eb->len)
+			break;
+
+		if (atomic_read(&next_eb->refs) > 1)
+			return 1;
+
+		next = rb_next(next);
+		rb_erase(&next_eb->rb_node, &tree->buffer);
+		btrfs_release_extent_buffer(next_eb);
+	}
+	return 0;
+}
+
  struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
  					  u64 start, unsigned long len,
  					  struct page *page0,
@@ -3147,10 +3186,49 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
  	spin_lock(&tree->buffer_lock);
  	eb = buffer_search(tree, start);
  	if (eb) {
-		atomic_inc(&eb->refs);
-		spin_unlock(&tree->buffer_lock);
-		mark_page_accessed(eb->first_page);
-		return eb;
+		/*
+		 * If this extent buffer's device space has been released some
+		 * time ago, and is reallocated again to store other metadata,
+		 * but it hasn't been release, we may get the old entent buffer
+		 * and reuse it.
+		 *
+		 * But, we must change it according the new len.
+		 */
+		if (eb->len >= len) {
+			if (eb->len > len) {
+				btrfs_release_extent_buffer_page(eb, num_pages);
+
+				eb->len = len;
+				set_page_extent_head(eb->first_page, len);
+			}
+
+			atomic_inc(&eb->refs);
+			spin_unlock(&tree->buffer_lock);
+			mark_page_accessed(eb->first_page);
+			return eb;
+		} else {
+			int ret;
+
+			/*
+			 * if eb->len != len, it means this extent buffer
+			 * is reused as a new extent buffer.
+			 */
+			BUG_ON(atomic_read(&eb->refs) != 1);
+
+			i = num_extent_pages(eb->start, eb->len);
+			index += i;
+
+			ret = btrfs_enlarge_extent_buffer(tree, eb, len);
+			if (ret) {
+				spin_unlock(&tree->buffer_lock);
+				return NULL;
+			} else {
+				rb_erase(&eb->rb_node, &tree->buffer);
+				spin_unlock(&tree->buffer_lock);
+				mark_page_accessed(eb->first_page);
+				goto eb_alloc_pages;
+			}
+		}
  	}
  	spin_unlock(&tree->buffer_lock);
  
@@ -3170,6 +3248,8 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
  	} else {
  		i = 0;
  	}
+
+eb_alloc_pages:
  	for (; i < num_pages; i++, index++) {
  		p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
  		if (!p) {
@@ -3197,6 +3277,8 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
  		/* add one reference for the caller */
  		atomic_inc(&exists->refs);
  		spin_unlock(&tree->buffer_lock);
+
+		BUG_ON(exists->len != eb->len);
  		goto free_eb;
  	}
  	/* add one reference for the tree */
-- 
1.7.0.1

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH 2/2] btrfs: fix oops when leafsize is greator than nodesize
  2010-07-13  8:55 [PATCH 2/2] btrfs: fix oops when leafsize is greator than nodesize Miao Xie
@ 2010-07-14  9:11 ` Miao Xie
  0 siblings, 0 replies; 2+ messages in thread
From: Miao Xie @ 2010-07-14  9:11 UTC (permalink / raw)
  To: Chris Mason; +Cc: Linux Btrfs

Hi, Chris

Could you review these patches for me? I have tested them and everything
works ok.

[PATCH 1/2] btrfs: restructure try_release_extent_buffer()
[PATCH 2/2] btrfs: fix oops when leafsize is greator than nodesize
[PATCH] btrfs-progs: fix wrong extent buffer size when reading tree block

Thanks
Miao Xie

On Tue Jul 13 2010 16:55:55 GMT+0800 (CST), Miao Xie wrote:
> oops occured when we ran the following commands on the filesystem whose
> leafsize is greater than its nodesize.
> # mkfs.btrfs -l 8192 /dev/sda1
> ^^^^ 2 * PAGESIZE
> # mount /dev/sda1 /mnt
> # cat /dev/zero > /mnt/tmp_file0
> # umount /mnt
> Oops occured.
> (Sometimes we must do the loop of mount/umount several times to hit this
> bug)
>
> Oops infomation:
> ------------[ cut here ]------------
> WARNING: at fs/btrfs/extent_io.c:3685 copy_extent_buffer+0x48/0x192()
> Hardware name: FFFFFFFFFF
> Modules linked in: [snip]
> Pid: 3743, comm: umount Not tainted 2.6.35-rc4 #22
> Call Trace:
> [<ffffffff8103782e>] warn_slowpath_common+0x80/0x98
> [<ffffffff8103785b>] warn_slowpath_null+0x15/0x17
> [<ffffffff811713a3>] copy_extent_buffer+0x48/0x192
> [<ffffffff81140488>] __btrfs_cow_block+0x16f/0x559
> [<ffffffff81140ee7>] btrfs_cow_block+0x189/0x1a6
> [<ffffffff81157cf0>] commit_cowonly_roots+0x54/0x193
> [<ffffffff811587d5>] btrfs_commit_transaction+0x380/0x610
> [<ffffffff81050a67>] ? autoremove_wake_function+0x0/0x34
> [<ffffffff811554bf>] btrfs_commit_super+0xa2/0xc3
> [<ffffffff8115551a>] close_ctree+0x3a/0x330
> [<ffffffff810545e8>] ? up_write+0x1e/0x36
> [<ffffffff810f4818>] ? invalidate_inodes+0x120/0x132
> [<ffffffff8113b233>] btrfs_put_super+0x18/0x27
> [<ffffffff810e395c>] generic_shutdown_super+0x51/0xd2
> [<ffffffff810e3a28>] kill_anon_super+0x11/0x4f
> [<ffffffff810e2af8>] deactivate_locked_super+0x21/0x41
> [<ffffffff810e2cd0>] deactivate_super+0x40/0x44
> [<ffffffff810f7a4b>] mntput_no_expire+0xb8/0xe5
> [<ffffffff810f7fea>] sys_umount+0x2c8/0x2f3
> [<ffffffff8106062a>] ? trace_hardirqs_on_caller+0x10c/0x130
> [<ffffffff81001f2b>] system_call_fastpath+0x16/0x1b
> ---[ end trace 5f7f4dbc8bcadbc3 ]---
>
> The reason is that:
> In order to reuse the extent buffer, the btrfs doesn't release the extent
> buffer, When the btrfs releases its device space. So if the btrfs allocates
> the device space that has been released just now, the btrfs will get the
> old
> extent buffer mapping to the device space.
>
> But if the length of the new space is greator than the old extent buffer, a
> BUG() will be touched.
>
> The patch fixes this problem by change the length of the old extent buffer.
>
> Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
> ---
> fs/btrfs/extent_io.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++--
> 1 files changed, 86 insertions(+), 4 deletions(-)
>
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 70b7cc5..b4f1c42 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -3130,6 +3130,45 @@ static inline void
> btrfs_release_extent_buffer(struct extent_buffer *eb)
> __free_extent_buffer(eb);
> }
>
> +/*
> + * we may reuse a extent buffer whose device space has been released,
> if the len
> + * of the extent buffer is smaller than we expect, we must enlarge the
> extent
> + * buffer, and before doing that, we must release the extent buffer that
> + * intersects it.
> + *
> + * Don't worry about the state of the extent buffer that is going to be
> release.
> + * because it is just an image left in the memory, and its device space
> has been
> + * released, or the btrfs can't allocate its device space for other extent
> + * buffer.
> + *
> + * Note: Must hold io_tree->buffer_lock
> + */
> +static int btrfs_enlarge_extent_buffer(struct extent_io_tree *tree,
> + struct extent_buffer *eb,
> + unsigned long newlen)
> +{
> + struct rb_node *next;
> + struct extent_buffer *next_eb;
> +
> + eb->len = newlen;
> + set_page_extent_head(eb->first_page, newlen);
> +
> + next = rb_next(&eb->rb_node);
> + while (next) {
> + next_eb = rb_entry(next, struct extent_buffer, rb_node);
> + if (next_eb->start >= eb->start + eb->len)
> + break;
> +
> + if (atomic_read(&next_eb->refs) > 1)
> + return 1;
> +
> + next = rb_next(next);
> + rb_erase(&next_eb->rb_node, &tree->buffer);
> + btrfs_release_extent_buffer(next_eb);
> + }
> + return 0;
> +}
> +
> struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
> u64 start, unsigned long len,
> struct page *page0,
> @@ -3147,10 +3186,49 @@ struct extent_buffer *alloc_extent_buffer(struct
> extent_io_tree *tree,
> spin_lock(&tree->buffer_lock);
> eb = buffer_search(tree, start);
> if (eb) {
> - atomic_inc(&eb->refs);
> - spin_unlock(&tree->buffer_lock);
> - mark_page_accessed(eb->first_page);
> - return eb;
> + /*
> + * If this extent buffer's device space has been released some
> + * time ago, and is reallocated again to store other metadata,
> + * but it hasn't been release, we may get the old entent buffer
> + * and reuse it.
> + *
> + * But, we must change it according the new len.
> + */
> + if (eb->len >= len) {
> + if (eb->len > len) {
> + btrfs_release_extent_buffer_page(eb, num_pages);
> +
> + eb->len = len;
> + set_page_extent_head(eb->first_page, len);
> + }
> +
> + atomic_inc(&eb->refs);
> + spin_unlock(&tree->buffer_lock);
> + mark_page_accessed(eb->first_page);
> + return eb;
> + } else {
> + int ret;
> +
> + /*
> + * if eb->len != len, it means this extent buffer
> + * is reused as a new extent buffer.
> + */
> + BUG_ON(atomic_read(&eb->refs) != 1);
> +
> + i = num_extent_pages(eb->start, eb->len);
> + index += i;
> +
> + ret = btrfs_enlarge_extent_buffer(tree, eb, len);
> + if (ret) {
> + spin_unlock(&tree->buffer_lock);
> + return NULL;
> + } else {
> + rb_erase(&eb->rb_node, &tree->buffer);
> + spin_unlock(&tree->buffer_lock);
> + mark_page_accessed(eb->first_page);
> + goto eb_alloc_pages;
> + }
> + }
> }
> spin_unlock(&tree->buffer_lock);
>
> @@ -3170,6 +3248,8 @@ struct extent_buffer *alloc_extent_buffer(struct
> extent_io_tree *tree,
> } else {
> i = 0;
> }
> +
> +eb_alloc_pages:
> for (; i < num_pages; i++, index++) {
> p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
> if (!p) {
> @@ -3197,6 +3277,8 @@ struct extent_buffer *alloc_extent_buffer(struct
> extent_io_tree *tree,
> /* add one reference for the caller */
> atomic_inc(&exists->refs);
> spin_unlock(&tree->buffer_lock);
> +
> + BUG_ON(exists->len != eb->len);
> goto free_eb;
> }
> /* add one reference for the tree */


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2010-07-14  9:11 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-07-13  8:55 [PATCH 2/2] btrfs: fix oops when leafsize is greator than nodesize Miao Xie
2010-07-14  9:11 ` Miao Xie

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).