All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2 v4] Btrfs: snapshot-aware defrag
@ 2012-10-27 10:28 Liu Bo
  2012-10-27 10:28 ` [PATCH 2/2] Btrfs: make snapshot-aware defrag as a mount option Liu Bo
                   ` (2 more replies)
  0 siblings, 3 replies; 20+ messages in thread
From: Liu Bo @ 2012-10-27 10:28 UTC (permalink / raw)
  To: linux-btrfs

This comes from one of btrfs's project ideas,
As we defragment files, we break any sharing from other snapshots.
The balancing code will preserve the sharing, and defrag needs to grow this
as well.

Now we're able to fill the blank with this patch, in which we make full use of
backref walking stuff.

Here is the basic idea,
o  set the writeback ranges started by defragment with flag EXTENT_DEFRAG
o  at endio, after we finish updating fs tree, we use backref walking to find
   all parents of the ranges and re-link them with the new COWed file layout by
   adding corresponding backrefs.

Originally patch by Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
---
v3->v4:
      - fix duplicated refs bugs detected by mounting with autodefrag, thanks
        for the bug report from Mitch and Chris.

 fs/btrfs/inode.c |  609 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 609 insertions(+), 0 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 85a1e50..35e6993 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -54,6 +54,7 @@
 #include "locking.h"
 #include "free-space-cache.h"
 #include "inode-map.h"
+#include "backref.h"
 
 struct btrfs_iget_args {
 	u64 ino;
@@ -1839,6 +1840,600 @@ out:
 	return ret;
 }
 
+/* snapshot-aware defrag */
+struct sa_defrag_extent_backref {
+	struct rb_node node;
+	struct old_sa_defrag_extent *old;
+	u64 root_id;
+	u64 inum;
+	u64 file_pos;
+	u64 extent_offset;
+	u64 num_bytes;
+	u64 generation;
+};
+
+struct old_sa_defrag_extent {
+	struct list_head list;
+	struct new_sa_defrag_extent *new;
+
+	u64 extent_offset;
+	u64 bytenr;
+	u64 offset;
+	u64 len;
+	int count;
+};
+
+struct new_sa_defrag_extent {
+	struct rb_root root;
+	struct list_head head;
+	struct btrfs_path *path;
+	struct inode *inode;
+	u64 file_pos;
+	u64 len;
+	u64 bytenr;
+	u64 disk_len;
+	u8 compress_type;
+};
+
+static int backref_comp(struct sa_defrag_extent_backref *b1,
+			struct sa_defrag_extent_backref *b2)
+{
+	if (b1->root_id < b2->root_id)
+		return -1;
+	else if (b1->root_id > b2->root_id)
+		return 1;
+
+	if (b1->inum < b2->inum)
+		return -1;
+	else if (b1->inum > b2->inum)
+		return 1;
+
+	if (b1->file_pos < b2->file_pos)
+		return -1;
+	else if (b1->file_pos > b2->file_pos)
+		return 1;
+
+	return 0;
+}
+
+static void backref_insert(struct rb_root *root,
+			   struct sa_defrag_extent_backref *backref)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct sa_defrag_extent_backref *entry;
+	int ret;
+
+	while (*p) {
+		parent = *p;
+		entry = rb_entry(parent, struct sa_defrag_extent_backref, node);
+
+		ret = backref_comp(backref, entry);
+		if (ret < 0)
+			p = &(*p)->rb_left;
+		else
+			/*
+			 * Since space can be shared, so there can be
+			 * some backrefs(extent tree to fs/file tree)
+			 * whoes fs/file extents map to the same address.
+			 * If so, we just put it after what we've found.
+			 */
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&backref->node, parent, p);
+	rb_insert_color(&backref->node, root);
+}
+
+/*
+ * Note the backref might has changed, and in this case we just return 0.
+ */
+static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
+				       void *ctx)
+{
+	struct btrfs_file_extent_item *extent;
+	struct btrfs_fs_info *fs_info;
+	struct old_sa_defrag_extent *old = ctx;
+	struct new_sa_defrag_extent *new = old->new;
+	struct btrfs_path *path = new->path;
+	struct btrfs_key key;
+	struct btrfs_root *root;
+	struct sa_defrag_extent_backref *backref;
+	struct extent_buffer *leaf;
+	struct inode *inode = new->inode;
+	int slot;
+	int ret;
+	u64 extent_offset;
+	u64 num_bytes;
+
+	if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
+	    inum == btrfs_ino(inode))
+		return 0;
+
+	key.objectid = root_id;
+	key.type = BTRFS_ROOT_ITEM_KEY;
+	key.offset = (u64)-1;
+
+	fs_info = BTRFS_I(inode)->root->fs_info;
+	root = btrfs_read_fs_root_no_name(fs_info, &key);
+	if (IS_ERR(root)) {
+		if (PTR_ERR(root) == -ENOENT)
+			return 0;
+		WARN_ON(1);
+		pr_debug("inum=%llu, offset=%llu, root_id=%llu\n",
+			 inum, offset, root_id);
+		return PTR_ERR(root);
+	}
+
+	key.objectid = inum;
+	key.type = BTRFS_EXTENT_DATA_KEY;
+	if (offset > (u64)-1 << 32)
+		key.offset = 0;
+	else
+		key.offset = offset;
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0) {
+		WARN_ON(1);
+		return ret;
+	}
+
+	while (1) {
+		cond_resched();
+
+		leaf = path->nodes[0];
+		slot = path->slots[0];
+
+		if (slot >= btrfs_header_nritems(leaf)) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret < 0) {
+				goto out;
+			} else if (ret > 0) {
+				ret = 0;
+				goto out;
+			}
+			continue;
+		}
+
+		path->slots[0]++;
+
+		btrfs_item_key_to_cpu(leaf, &key, slot);
+
+		if (key.objectid > inum)
+			goto out;
+
+		if (key.objectid < inum || key.type != BTRFS_EXTENT_DATA_KEY)
+			continue;
+
+		extent = btrfs_item_ptr(leaf, slot,
+					struct btrfs_file_extent_item);
+
+		if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
+			continue;
+
+		extent_offset = btrfs_file_extent_offset(leaf, extent);
+		if (key.offset - extent_offset != offset)
+			continue;
+
+		num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
+		if (extent_offset >= old->extent_offset + old->offset +
+		    old->len || extent_offset + num_bytes <=
+		    old->extent_offset + old->offset)
+			continue;
+
+		break;
+	}
+
+	backref = kmalloc(sizeof(*backref), GFP_NOFS);
+	if (!backref) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	backref->root_id = root_id;
+	backref->inum = inum;
+	backref->file_pos = offset + extent_offset;
+	backref->num_bytes = num_bytes;
+	backref->extent_offset = extent_offset;
+	backref->generation = btrfs_file_extent_generation(leaf, extent);
+	backref->old = old;
+	backref_insert(&new->root, backref);
+	old->count++;
+out:
+	btrfs_release_path(path);
+	WARN_ON(ret);
+	return ret;
+}
+
+static noinline bool record_extent_backrefs(struct btrfs_path *path,
+				   struct new_sa_defrag_extent *new)
+{
+	struct btrfs_fs_info *fs_info = BTRFS_I(new->inode)->root->fs_info;
+	struct old_sa_defrag_extent *old, *tmp;
+	int ret;
+
+	new->path = path;
+
+	list_for_each_entry_safe(old, tmp, &new->head, list) {
+		ret = iterate_inodes_from_logical(old->bytenr, fs_info,
+						  path, record_one_backref,
+						  old);
+		BUG_ON(ret < 0 && ret != -ENOENT);
+
+		/* no backref to be processed for this extent */
+		if (!old->count) {
+			list_del(&old->list);
+			kfree(old);
+		}
+	}
+
+	if (list_empty(&new->head))
+		return false;
+
+	return true;
+}
+
+/*
+ * Note the backref might has changed, and in this case we just return 0.
+ */
+static noinline int relink_extent_backref(struct btrfs_path *path,
+				 struct sa_defrag_extent_backref *prev,
+				 struct sa_defrag_extent_backref *backref)
+{
+	struct btrfs_file_extent_item *extent;
+	struct btrfs_file_extent_item *item;
+	struct btrfs_ordered_extent *ordered;
+	struct btrfs_trans_handle *trans;
+	struct btrfs_fs_info *fs_info;
+	struct btrfs_root *root;
+	struct btrfs_key key;
+	struct extent_buffer *leaf;
+	struct old_sa_defrag_extent *old = backref->old;
+	struct new_sa_defrag_extent *new = old->new;
+	struct inode *src_inode = new->inode;
+	struct inode *inode;
+	struct extent_state *cached = NULL;
+	int ret = 0;
+	u64 start;
+	u64 len;
+	u64 lock_start;
+	u64 lock_end;
+	bool merge = false;
+
+	if (prev && prev->root_id == backref->root_id &&
+	    prev->inum == backref->inum &&
+	    prev->file_pos + prev->num_bytes == backref->file_pos)
+		merge = true;
+
+	key.objectid = backref->root_id;
+	key.type = BTRFS_ROOT_ITEM_KEY;
+	key.offset = (u64)-1;
+
+	fs_info = BTRFS_I(src_inode)->root->fs_info;
+	root = btrfs_read_fs_root_no_name(fs_info, &key);
+	if (IS_ERR(root)) {
+		if (PTR_ERR(root) == -ENOENT)
+			return 0;
+		return PTR_ERR(root);
+	}
+
+	key.objectid = backref->inum;
+	key.type = BTRFS_INODE_ITEM_KEY;
+	key.offset = 0;
+
+	inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+	if (IS_ERR_OR_NULL(inode) || is_bad_inode(inode)) {
+		if (inode && !IS_ERR(inode))
+			iput(inode);
+		return 0;
+	}
+
+	lock_start = backref->file_pos;
+	lock_end = backref->file_pos + backref->num_bytes - 1;
+	lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
+			 0, &cached);
+
+	ordered = btrfs_lookup_first_ordered_extent(inode, lock_end);
+	if (ordered) {
+		btrfs_put_ordered_extent(ordered);
+		goto out_unlock;
+	}
+
+	trans = btrfs_join_transaction(root);
+	if (IS_ERR(trans)) {
+		ret = PTR_ERR(trans);
+		goto out_unlock;
+	}
+
+	key.objectid = backref->inum;
+	key.type = BTRFS_EXTENT_DATA_KEY;
+	key.offset = backref->file_pos;
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0) {
+		goto out_free_path;
+	} else if (ret > 0) {
+		ret = 0;
+		goto out_free_path;
+	}
+
+	extent = btrfs_item_ptr(path->nodes[0], path->slots[0],
+				struct btrfs_file_extent_item);
+
+	if (btrfs_file_extent_generation(path->nodes[0], extent) !=
+	    backref->generation)
+		goto out_free_path;
+
+	btrfs_release_path(path);
+
+	start = backref->file_pos;
+	if (backref->extent_offset < old->extent_offset + old->offset)
+		start += old->extent_offset + old->offset -
+			 backref->extent_offset;
+
+	len = min(backref->extent_offset + backref->num_bytes,
+		  old->extent_offset + old->offset + old->len);
+	len -= max(backref->extent_offset, old->extent_offset + old->offset);
+
+	ret = btrfs_drop_extents(trans, root, inode, start,
+				 start + len, 1);
+	if (ret)
+		goto out_free_path;
+again:
+	key.objectid = btrfs_ino(inode);
+	key.type = BTRFS_EXTENT_DATA_KEY;
+	key.offset = start;
+
+	if (merge) {
+		struct btrfs_file_extent_item *fi;
+		u64 extent_len;
+		struct btrfs_key found_key;
+
+		ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+		if (ret < 0)
+			goto out_free_path;
+
+		path->slots[0]--;
+		leaf = path->nodes[0];
+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+		fi = btrfs_item_ptr(leaf, path->slots[0],
+				    struct btrfs_file_extent_item);
+		extent_len = btrfs_file_extent_num_bytes(leaf, fi);
+
+		if (btrfs_file_extent_disk_bytenr(leaf, fi) == new->bytenr &&
+		    btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_REG &&
+		    !btrfs_file_extent_compression(leaf, fi) &&
+		    !btrfs_file_extent_encryption(leaf, fi) &&
+		    !btrfs_file_extent_other_encoding(leaf, fi) &&
+		    extent_len + found_key.offset == start) {
+			btrfs_set_file_extent_num_bytes(leaf, fi,
+							extent_len + len);
+			btrfs_mark_buffer_dirty(leaf);
+			inode_add_bytes(inode, len);
+
+			ret = 1;
+			goto out_free_path;
+		} else {
+			merge = false;
+			btrfs_release_path(path);
+			goto again;
+		}
+	}
+
+	ret = btrfs_insert_empty_item(trans, root, path, &key,
+					sizeof(*extent));
+	if (ret) {
+		btrfs_abort_transaction(trans, root, ret);
+		goto out_free_path;
+	}
+
+	leaf = path->nodes[0];
+	item = btrfs_item_ptr(leaf, path->slots[0],
+				struct btrfs_file_extent_item);
+	btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr);
+	btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len);
+	btrfs_set_file_extent_offset(leaf, item, start - new->file_pos);
+	btrfs_set_file_extent_num_bytes(leaf, item, len);
+	btrfs_set_file_extent_ram_bytes(leaf, item, new->len);
+	btrfs_set_file_extent_generation(leaf, item, trans->transid);
+	btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
+	btrfs_set_file_extent_compression(leaf, item, new->compress_type);
+	btrfs_set_file_extent_encryption(leaf, item, 0);
+	btrfs_set_file_extent_other_encoding(leaf, item, 0);
+
+	btrfs_mark_buffer_dirty(leaf);
+	inode_add_bytes(inode, len);
+
+	ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
+			new->disk_len, 0,
+			backref->root_id, backref->inum,
+			new->file_pos, 0);	/* start - extent_offset */
+	if (ret) {
+		btrfs_abort_transaction(trans, root, ret);
+		goto out_free_path;
+	}
+
+	ret = 1;
+out_free_path:
+	btrfs_release_path(path);
+	btrfs_end_transaction(trans, root);
+out_unlock:
+	unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
+			     &cached, GFP_NOFS);
+	iput(inode);
+	return ret;
+}
+
+static void relink_file_extents(struct new_sa_defrag_extent *new)
+{
+	struct btrfs_path *path;
+	struct old_sa_defrag_extent *old, *tmp;
+	struct sa_defrag_extent_backref *backref;
+	struct sa_defrag_extent_backref *prev = NULL;
+	struct inode *inode;
+	struct btrfs_root *root;
+	struct rb_node *node;
+	int ret;
+
+	inode = new->inode;
+	root = BTRFS_I(inode)->root;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return;
+
+	if (!record_extent_backrefs(path, new)) {
+		btrfs_free_path(path);
+		goto out;
+	}
+	btrfs_release_path(path);
+
+	while (1) {
+		node = rb_first(&new->root);
+		if (!node)
+			break;
+		rb_erase(node, &new->root);
+
+		backref = rb_entry(node, struct sa_defrag_extent_backref, node);
+
+		ret = relink_extent_backref(path, prev, backref);
+		WARN_ON(ret < 0);
+
+		kfree(prev);
+
+		if (ret == 1)
+			prev = backref;
+		else
+			prev = NULL;
+		cond_resched();
+	}
+	kfree(prev);
+
+	btrfs_free_path(path);
+
+	list_for_each_entry_safe(old, tmp, &new->head, list) {
+		list_del(&old->list);
+		kfree(old);
+	}
+out:
+	atomic_dec(&root->fs_info->defrag_running);
+	wake_up(&root->fs_info->transaction_wait);
+
+	kfree(new);
+}
+
+static struct new_sa_defrag_extent *
+record_old_file_extents(struct inode *inode,
+			struct btrfs_ordered_extent *ordered)
+{
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_path *path;
+	struct btrfs_key key;
+	struct old_sa_defrag_extent *old, *tmp;
+	struct new_sa_defrag_extent *new;
+	int ret;
+
+	new = kmalloc(sizeof(*new), GFP_NOFS);
+	if (!new)
+		return NULL;
+
+	new->inode = inode;
+	new->file_pos = ordered->file_offset;
+	new->len = ordered->len;
+	new->bytenr = ordered->start;
+	new->disk_len = ordered->disk_len;
+	new->compress_type = ordered->compress_type;
+	new->root = RB_ROOT;
+	INIT_LIST_HEAD(&new->head);
+
+	path = btrfs_alloc_path();
+	if (!path)
+		goto out_kfree;
+
+	key.objectid = btrfs_ino(inode);
+	key.type = BTRFS_EXTENT_DATA_KEY;
+	key.offset = new->file_pos;
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0)
+		goto out_free_path;
+	if (ret > 0 && path->slots[0] > 0)
+		path->slots[0]--;
+
+	/* find out all the old extents for the file range */
+	while (1) {
+		struct btrfs_file_extent_item *extent;
+		struct extent_buffer *l;
+		int slot;
+		u64 num_bytes;
+		u64 offset;
+		u64 end;
+
+		l = path->nodes[0];
+		slot = path->slots[0];
+
+		if (slot >= btrfs_header_nritems(l)) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret < 0)
+				goto out_free_list;
+			else if (ret > 0)
+				break;
+			continue;
+		}
+
+		btrfs_item_key_to_cpu(l, &key, slot);
+
+		if (key.objectid != btrfs_ino(inode))
+			break;
+		if (key.type != BTRFS_EXTENT_DATA_KEY)
+			break;
+		if (key.offset >= new->file_pos + new->len)
+			break;
+
+		extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item);
+
+		num_bytes = btrfs_file_extent_num_bytes(l, extent);
+		if (key.offset + num_bytes < new->file_pos)
+			goto next;
+
+		old = kmalloc(sizeof(*old), GFP_NOFS);
+		if (!old)
+			goto out_free_list;
+
+		offset = max(new->file_pos, key.offset);
+		end = min(new->file_pos + new->len, key.offset + num_bytes);
+
+		old->bytenr = btrfs_file_extent_disk_bytenr(l, extent);
+		BUG_ON(!old->bytenr);
+		old->extent_offset = btrfs_file_extent_offset(l, extent);
+		old->offset = offset - key.offset;
+		old->len = end - offset;
+		old->new = new;
+		old->count = 0;
+		list_add_tail(&old->list, &new->head);
+next:
+		path->slots[0]++;
+		cond_resched();
+	}
+
+	btrfs_free_path(path);
+	atomic_inc(&root->fs_info->defrag_running);
+
+	return new;
+
+out_free_list:
+	list_for_each_entry_safe(old, tmp, &new->head, list) {
+		list_del(&old->list);
+		kfree(old);
+	}
+out_free_path:
+	btrfs_free_path(path);
+out_kfree:
+	kfree(new);
+	return NULL;
+}
+
 /*
  * helper function for btrfs_finish_ordered_io, this
  * just reads in some of the csum leaves to prime them into ram
@@ -1856,6 +2451,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 	struct btrfs_trans_handle *trans = NULL;
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 	struct extent_state *cached_state = NULL;
+	struct new_sa_defrag_extent *new = NULL;
 	int compress_type = 0;
 	int ret;
 	bool nolock;
@@ -1892,6 +2488,15 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 			 ordered_extent->file_offset + ordered_extent->len - 1,
 			 0, &cached_state);
 
+	ret = test_range_bit(io_tree, ordered_extent->file_offset,
+			ordered_extent->file_offset + ordered_extent->len - 1,
+			EXTENT_DEFRAG, 1, cached_state);
+	if (ret && btrfs_root_last_snapshot(&root->root_item) >=
+						BTRFS_I(inode)->generation) {
+		/* the inode is shared */
+		new = record_old_file_extents(inode, ordered_extent);
+	}
+
 	if (nolock)
 		trans = btrfs_join_transaction_nolock(root);
 	else
@@ -1965,6 +2570,10 @@ out:
 	 */
 	btrfs_remove_ordered_extent(inode, ordered_extent);
 
+	/* for snapshot-aware defrag */
+	if (new)
+		relink_file_extents(new);
+
 	/* once for us */
 	btrfs_put_ordered_extent(ordered_extent);
 	/* once for the tree */
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/2] Btrfs: make snapshot-aware defrag as a mount option
  2012-10-27 10:28 [PATCH 1/2 v4] Btrfs: snapshot-aware defrag Liu Bo
@ 2012-10-27 10:28 ` Liu Bo
  2012-10-30 23:31   ` David Sterba
  2012-11-01 14:43   ` Chris Mason
  2012-10-29 20:06 ` [PATCH 1/2 v4] Btrfs: snapshot-aware defrag Mitch Harder
  2012-10-31 12:13 ` Itaru Kitayama
  2 siblings, 2 replies; 20+ messages in thread
From: Liu Bo @ 2012-10-27 10:28 UTC (permalink / raw)
  To: linux-btrfs

This feature works on our crucial write endio path, so if we've got
lots of fragments to process, it will be kind of a disaster to the
performance, so I make such a change.

One can benifit from it while mounting with '-o snap_aware_defrag'.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
---
 fs/btrfs/ctree.h |    1 +
 fs/btrfs/inode.c |   16 ++++++++++------
 fs/btrfs/ioctl.c |    5 +++--
 fs/btrfs/super.c |   12 ++++++++++--
 4 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 926c9ff..f9cd9c9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1756,6 +1756,7 @@ struct btrfs_ioctl_defrag_range_args {
 #define BTRFS_MOUNT_CHECK_INTEGRITY	(1 << 20)
 #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
 #define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR	(1 << 22)
+#define BTRFS_MOUNT_SA_DEFRAG		(1 << 23)
 
 #define btrfs_clear_opt(o, opt)		((o) &= ~BTRFS_MOUNT_##opt)
 #define btrfs_set_opt(o, opt)		((o) |= BTRFS_MOUNT_##opt)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 35e6993..069499e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2488,13 +2488,17 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 			 ordered_extent->file_offset + ordered_extent->len - 1,
 			 0, &cached_state);
 
-	ret = test_range_bit(io_tree, ordered_extent->file_offset,
-			ordered_extent->file_offset + ordered_extent->len - 1,
-			EXTENT_DEFRAG, 1, cached_state);
-	if (ret && btrfs_root_last_snapshot(&root->root_item) >=
+	if (btrfs_test_opt(root, SA_DEFRAG)) {
+		ret = test_range_bit(io_tree, ordered_extent->file_offset,
+				     ordered_extent->file_offset +
+				     ordered_extent->len - 1,
+				     EXTENT_DEFRAG, 1, cached_state);
+		if (ret &&
+		    btrfs_root_last_snapshot(&root->root_item) >=
 						BTRFS_I(inode)->generation) {
-		/* the inode is shared */
-		new = record_old_file_extents(inode, ordered_extent);
+			/* the inode is shared */
+			new = record_old_file_extents(inode, ordered_extent);
+		}
 	}
 
 	if (nolock)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 6116880..1367165 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1058,8 +1058,9 @@ again:
 	}
 
 
-	set_extent_defrag(&BTRFS_I(inode)->io_tree, page_start, page_end - 1,
-			  &cached_state, GFP_NOFS);
+	if (btrfs_test_opt(BTRFS_I(inode)->root, SA_DEFRAG))
+		set_extent_defrag(&BTRFS_I(inode)->io_tree, page_start,
+				  page_end - 1, &cached_state, GFP_NOFS);
 
 	unlock_extent_cached(&BTRFS_I(inode)->io_tree,
 			     page_start, page_end - 1, &cached_state,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 915ac14..24eac5f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -308,8 +308,8 @@ enum {
 	Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
 	Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
 	Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
-	Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache,
-	Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
+	Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_sa_defrag,
+	Opt_inode_cache, Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
 	Opt_check_integrity, Opt_check_integrity_including_extent_data,
 	Opt_check_integrity_print_mask, Opt_fatal_errors,
 	Opt_err,
@@ -344,6 +344,7 @@ static match_table_t tokens = {
 	{Opt_enospc_debug, "enospc_debug"},
 	{Opt_subvolrootid, "subvolrootid=%d"},
 	{Opt_defrag, "autodefrag"},
+	{Opt_sa_defrag, "snap_aware_defrag"},
 	{Opt_inode_cache, "inode_cache"},
 	{Opt_no_space_cache, "nospace_cache"},
 	{Opt_recovery, "recovery"},
@@ -564,6 +565,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
 			printk(KERN_INFO "btrfs: enabling auto defrag\n");
 			btrfs_set_opt(info->mount_opt, AUTO_DEFRAG);
 			break;
+		case Opt_sa_defrag:
+			printk(KERN_INFO "btrfs: enabling snapshot-aware"
+			       " defrag\n");
+			btrfs_set_opt(info->mount_opt, SA_DEFRAG);
+			break;
 		case Opt_recovery:
 			printk(KERN_INFO "btrfs: enabling auto recovery\n");
 			btrfs_set_opt(info->mount_opt, RECOVERY);
@@ -935,6 +941,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
 		seq_puts(seq, ",enospc_debug");
 	if (btrfs_test_opt(root, AUTO_DEFRAG))
 		seq_puts(seq, ",autodefrag");
+	if (btrfs_test_opt(root, SA_DEFRAG))
+		seq_puts(seq, ",snap_aware_defrag");
 	if (btrfs_test_opt(root, INODE_MAP_CACHE))
 		seq_puts(seq, ",inode_cache");
 	if (btrfs_test_opt(root, SKIP_BALANCE))
-- 
1.7.7.6


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2 v4] Btrfs: snapshot-aware defrag
  2012-10-27 10:28 [PATCH 1/2 v4] Btrfs: snapshot-aware defrag Liu Bo
  2012-10-27 10:28 ` [PATCH 2/2] Btrfs: make snapshot-aware defrag as a mount option Liu Bo
@ 2012-10-29 20:06 ` Mitch Harder
  2012-10-30  1:20   ` Liu Bo
  2012-10-31 12:13 ` Itaru Kitayama
  2 siblings, 1 reply; 20+ messages in thread
From: Mitch Harder @ 2012-10-29 20:06 UTC (permalink / raw)
  To: Liu Bo; +Cc: linux-btrfs

On Sat, Oct 27, 2012 at 5:28 AM, Liu Bo <bo.li.liu@oracle.com> wrote:
> This comes from one of btrfs's project ideas,
> As we defragment files, we break any sharing from other snapshots.
> The balancing code will preserve the sharing, and defrag needs to grow this
> as well.
>
> Now we're able to fill the blank with this patch, in which we make full use of
> backref walking stuff.
>
> Here is the basic idea,
> o  set the writeback ranges started by defragment with flag EXTENT_DEFRAG
> o  at endio, after we finish updating fs tree, we use backref walking to find
>    all parents of the ranges and re-link them with the new COWed file layout by
>    adding corresponding backrefs.
>
> Originally patch by Li Zefan <lizf@cn.fujitsu.com>
> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
> ---
> v3->v4:
>       - fix duplicated refs bugs detected by mounting with autodefrag, thanks
>         for the bug report from Mitch and Chris.
>

I'm picking up many WARN_ON messages while testing this patch.

I'm testing a snapshot script that uses kernel git sources along with
some git manipulations.

The kernel is a 3.6.4 kernel merged with the latest for-linus branch.

I mounted with -o compress-force=lzo,autodefrag.

I also have the second patch in this set (Btrfs: make snapshot-aware
defrag as a mount option).  However, I did not mount with
'snap_aware_defrag'.

I did not find any corrupted data, and the partition passes a btrfsck
without error after these warnings were observed.

Here's a summary of the WARN_ON messages:

$ cat local/dmesg-3.6.4-x+ | grep "WARNING:"
[  610.407561] WARNING: at fs/btrfs/inode.c:7779
btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
[  610.407757] WARNING: at fs/btrfs/inode.c:7780
btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
[  610.407929] WARNING: at fs/btrfs/inode.c:7782
btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
[  661.211849] WARNING: at fs/btrfs/inode.c:7779
btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
[  661.212004] WARNING: at fs/btrfs/inode.c:7780
btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
[  661.212236] WARNING: at fs/btrfs/inode.c:7782
btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
[  719.882942] WARNING: at fs/btrfs/inode.c:7779
btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
[  719.883112] WARNING: at fs/btrfs/inode.c:7780
btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
[  719.883232] WARNING: at fs/btrfs/inode.c:7782
btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
[  786.978869] WARNING: at fs/btrfs/inode.c:7779
btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
[  786.979003] WARNING: at fs/btrfs/inode.c:7780
btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
[  786.979140] WARNING: at fs/btrfs/inode.c:7782
btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
[  845.605176] WARNING: at fs/btrfs/inode.c:7779
btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
[  845.605323] WARNING: at fs/btrfs/inode.c:7780
btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
[  845.605445] WARNING: at fs/btrfs/inode.c:7782
btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
[  912.300307] WARNING: at fs/btrfs/inode.c:7779
btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
[  912.300454] WARNING: at fs/btrfs/inode.c:7780
btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
[  912.300577] WARNING: at fs/btrfs/inode.c:7782
btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
[  968.835873] WARNING: at fs/btrfs/inode.c:7779
btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
[  968.836032] WARNING: at fs/btrfs/inode.c:7780
btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
[  968.836156] WARNING: at fs/btrfs/inode.c:7782
btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
[ 1023.778160] WARNING: at fs/btrfs/inode.c:7779
btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
[ 1023.778316] WARNING: at fs/btrfs/inode.c:7780
btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
[ 1023.778435] WARNING: at fs/btrfs/inode.c:7782
btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
[ 1064.342768] WARNING: at fs/btrfs/inode.c:7779
btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
[ 1064.342914] WARNING: at fs/btrfs/inode.c:7780
btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
[ 1064.343112] WARNING: at fs/btrfs/inode.c:7782
btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
[ 1177.892047] WARNING: at fs/btrfs/inode.c:7779
btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
[ 1177.892189] WARNING: at fs/btrfs/inode.c:7780
btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
[ 1177.892312] WARNING: at fs/btrfs/inode.c:7782
btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
[ 1281.951715] WARNING: at fs/btrfs/inode.c:7779
btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
[ 1281.951857] WARNING: at fs/btrfs/inode.c:7780
btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
[ 1281.951978] WARNING: at fs/btrfs/inode.c:7782
btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
[ 1282.804376] WARNING: at fs/btrfs/inode.c:7779
btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
[ 1282.804524] WARNING: at fs/btrfs/inode.c:7780
btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
[ 1282.804645] WARNING: at fs/btrfs/inode.c:7782
btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
[ 1351.187114] WARNING: at fs/btrfs/inode.c:7779
btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
[ 1351.187263] WARNING: at fs/btrfs/inode.c:7780
btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
[ 1351.187391] WARNING: at fs/btrfs/inode.c:7782
btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
[ 1399.022919] WARNING: at fs/btrfs/inode.c:7779
btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
[ 1399.023086] WARNING: at fs/btrfs/inode.c:7780
btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
[ 1399.023206] WARNING: at fs/btrfs/inode.c:7782
btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
[ 1483.724756] WARNING: at fs/btrfs/inode.c:7779
btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
[ 1483.724895] WARNING: at fs/btrfs/inode.c:7780
btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
[ 1483.725031] WARNING: at fs/btrfs/inode.c:7782
btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
[ 1523.081643] WARNING: at fs/btrfs/inode.c:7779
btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
[ 1523.081792] WARNING: at fs/btrfs/inode.c:7780
btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
[ 1523.081922] WARNING: at fs/btrfs/inode.c:7782
btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
[ 1555.993160] WARNING: at fs/btrfs/inode.c:7779
btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
[ 1555.993299] WARNING: at fs/btrfs/inode.c:7780
btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
[ 1555.993420] WARNING: at fs/btrfs/inode.c:7782
btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
[ 1556.252181] WARNING: at fs/btrfs/inode.c:7779
btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
[ 1556.252312] WARNING: at fs/btrfs/inode.c:7780
btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
[ 1556.252427] WARNING: at fs/btrfs/inode.c:7782
btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
[ 1557.218561] WARNING: at fs/btrfs/inode.c:7779
btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
[ 1557.218691] WARNING: at fs/btrfs/inode.c:7780
btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
[ 1557.218805] WARNING: at fs/btrfs/inode.c:7782
btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
[ 1557.895069] WARNING: at fs/btrfs/extent-tree.c:4417
btrfs_free_block_groups+0x2ad/0x34c [btrfs]()
[ 1557.895261] WARNING: at fs/btrfs/extent-tree.c:4418
btrfs_free_block_groups+0x347/0x34c [btrfs]()
[ 1557.895435] WARNING: at fs/btrfs/extent-tree.c:7687
btrfs_free_block_groups+0x23e/0x34c [btrfs]()
[ 1557.895619] WARNING: at fs/btrfs/extent-tree.c:7687
btrfs_free_block_groups+0x23e/0x34c [btrfs]()

Here's a snip of the code that has the WARN_ONs in inode.c:

void btrfs_destroy_inode(struct inode *inode)
{
        struct btrfs_ordered_extent *ordered;
        struct btrfs_root *root = BTRFS_I(inode)->root;

        WARN_ON(!hlist_empty(&inode->i_dentry));
        WARN_ON(inode->i_data.nrpages);
        WARN_ON(BTRFS_I(inode)->outstanding_extents);  // line 7779
        WARN_ON(BTRFS_I(inode)->reserved_extents);  // line 7780
        WARN_ON(BTRFS_I(inode)->delalloc_bytes); // line 7781
        WARN_ON(BTRFS_I(inode)->csum_bytes); // line 7782

Here's the dmesg output for just the last set of errors (the other
WARN_ONs had similar backtraces):

[ 1557.218529] ------------[ cut here ]------------
[ 1557.218561] WARNING: at fs/btrfs/inode.c:7779
btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
[ 1557.218564] Hardware name: OptiPlex 745
[ 1557.218566] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
mfd_core xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
btrfs libcrc32c
[ 1557.218582] Pid: 5728, comm: umount Tainted: G        W    3.6.4-x+ #3
[ 1557.218584] Call Trace:
[ 1557.218592]  [<ffffffff810350ea>] warn_slowpath_common+0x74/0xa2
[ 1557.218596]  [<ffffffff81035132>] warn_slowpath_null+0x1a/0x1c
[ 1557.218612]  [<ffffffffa00414c9>] btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]
[ 1557.218618]  [<ffffffff8113ef00>] destroy_inode+0x3c/0x5f
[ 1557.218622]  [<ffffffff8113f045>] evict+0x122/0x1ac
[ 1557.218626]  [<ffffffff8113f619>] dispose_list+0x3a/0x49
[ 1557.218629]  [<ffffffff8113fd27>] evict_inodes+0x106/0x115
[ 1557.218635]  [<ffffffff81127c32>] generic_shutdown_super+0x4c/0xdc
[ 1557.218639]  [<ffffffff81127d44>] kill_anon_super+0x16/0x24
[ 1557.218650]  [<ffffffffa000ad48>] btrfs_kill_super+0x1a/0x8f [btrfs]
[ 1557.218654]  [<ffffffff81127f68>] deactivate_locked_super+0x34/0x5a
[ 1557.218658]  [<ffffffff81128b60>] deactivate_super+0x4e/0x66
[ 1557.218663]  [<ffffffff81142924>] mntput_no_expire+0xf7/0x14d
[ 1557.218666]  [<ffffffff811437ad>] sys_umount+0x63/0x37a
[ 1557.218672]  [<ffffffff81824c12>] system_call_fastpath+0x16/0x1b
[ 1557.218674] ---[ end trace 29a44c3c46b1a1b9 ]---
[ 1557.218676] ------------[ cut here ]------------
[ 1557.218691] WARNING: at fs/btrfs/inode.c:7780
btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
[ 1557.218693] Hardware name: OptiPlex 745
[ 1557.218694] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
mfd_core xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
btrfs libcrc32c
[ 1557.218708] Pid: 5728, comm: umount Tainted: G        W    3.6.4-x+ #3
[ 1557.218709] Call Trace:
[ 1557.218714]  [<ffffffff810350ea>] warn_slowpath_common+0x74/0xa2
[ 1557.218717]  [<ffffffff81035132>] warn_slowpath_null+0x1a/0x1c
[ 1557.218733]  [<ffffffffa00414b3>] btrfs_destroy_inode+0x296/0x2e6 [btrfs]
[ 1557.218737]  [<ffffffff8113ef00>] destroy_inode+0x3c/0x5f
[ 1557.218741]  [<ffffffff8113f045>] evict+0x122/0x1ac
[ 1557.218744]  [<ffffffff8113f619>] dispose_list+0x3a/0x49
[ 1557.218748]  [<ffffffff8113fd27>] evict_inodes+0x106/0x115
[ 1557.218752]  [<ffffffff81127c32>] generic_shutdown_super+0x4c/0xdc
[ 1557.218757]  [<ffffffff81127d44>] kill_anon_super+0x16/0x24
[ 1557.218767]  [<ffffffffa000ad48>] btrfs_kill_super+0x1a/0x8f [btrfs]
[ 1557.218771]  [<ffffffff81127f68>] deactivate_locked_super+0x34/0x5a
[ 1557.218775]  [<ffffffff81128b60>] deactivate_super+0x4e/0x66
[ 1557.218779]  [<ffffffff81142924>] mntput_no_expire+0xf7/0x14d
[ 1557.218783]  [<ffffffff811437ad>] sys_umount+0x63/0x37a
[ 1557.218787]  [<ffffffff81824c12>] system_call_fastpath+0x16/0x1b
[ 1557.218789] ---[ end trace 29a44c3c46b1a1ba ]---
[ 1557.218791] ------------[ cut here ]------------
[ 1557.218805] WARNING: at fs/btrfs/inode.c:7782
btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
[ 1557.218807] Hardware name: OptiPlex 745
[ 1557.218809] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
mfd_core xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
btrfs libcrc32c
[ 1557.218822] Pid: 5728, comm: umount Tainted: G        W    3.6.4-x+ #3
[ 1557.218823] Call Trace:
[ 1557.218827]  [<ffffffff810350ea>] warn_slowpath_common+0x74/0xa2
[ 1557.218831]  [<ffffffff81035132>] warn_slowpath_null+0x1a/0x1c
[ 1557.218847]  [<ffffffffa0041487>] btrfs_destroy_inode+0x26a/0x2e6 [btrfs]
[ 1557.218851]  [<ffffffff8113ef00>] destroy_inode+0x3c/0x5f
[ 1557.218854]  [<ffffffff8113f045>] evict+0x122/0x1ac
[ 1557.218858]  [<ffffffff8113f619>] dispose_list+0x3a/0x49
[ 1557.218862]  [<ffffffff8113fd27>] evict_inodes+0x106/0x115
[ 1557.218866]  [<ffffffff81127c32>] generic_shutdown_super+0x4c/0xdc
[ 1557.218870]  [<ffffffff81127d44>] kill_anon_super+0x16/0x24
[ 1557.218881]  [<ffffffffa000ad48>] btrfs_kill_super+0x1a/0x8f [btrfs]
[ 1557.218885]  [<ffffffff81127f68>] deactivate_locked_super+0x34/0x5a
[ 1557.218889]  [<ffffffff81128b60>] deactivate_super+0x4e/0x66
[ 1557.218893]  [<ffffffff81142924>] mntput_no_expire+0xf7/0x14d
[ 1557.218897]  [<ffffffff811437ad>] sys_umount+0x63/0x37a
[ 1557.218901]  [<ffffffff81824c12>] system_call_fastpath+0x16/0x1b
[ 1557.218903] ---[ end trace 29a44c3c46b1a1bb ]---
[ 1557.895037] ------------[ cut here ]------------
[ 1557.895069] WARNING: at fs/btrfs/extent-tree.c:4417
btrfs_free_block_groups+0x2ad/0x34c [btrfs]()
[ 1557.895073] Hardware name: OptiPlex 745
[ 1557.895075] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
mfd_core xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
btrfs libcrc32c
[ 1557.895097] Pid: 5728, comm: umount Tainted: G        W    3.6.4-x+ #3
[ 1557.895100] Call Trace:
[ 1557.895112]  [<ffffffff810350ea>] warn_slowpath_common+0x74/0xa2
[ 1557.895118]  [<ffffffff81035132>] warn_slowpath_null+0x1a/0x1c
[ 1557.895136]  [<ffffffffa0022769>] btrfs_free_block_groups+0x2ad/0x34c [btrfs]
[ 1557.895157]  [<ffffffffa002e953>] close_ctree+0x1bc/0x2f1 [btrfs]
[ 1557.895165]  [<ffffffff8113fd27>] ? evict_inodes+0x106/0x115
[ 1557.895179]  [<ffffffffa0007075>] btrfs_put_super+0x19/0x1b [btrfs]
[ 1557.895187]  [<ffffffff81127c41>] generic_shutdown_super+0x5b/0xdc
[ 1557.895193]  [<ffffffff81127d44>] kill_anon_super+0x16/0x24
[ 1557.895208]  [<ffffffffa000ad48>] btrfs_kill_super+0x1a/0x8f [btrfs]
[ 1557.895214]  [<ffffffff81127f68>] deactivate_locked_super+0x34/0x5a
[ 1557.895220]  [<ffffffff81128b60>] deactivate_super+0x4e/0x66
[ 1557.895226]  [<ffffffff81142924>] mntput_no_expire+0xf7/0x14d
[ 1557.895231]  [<ffffffff811437ad>] sys_umount+0x63/0x37a
[ 1557.895239]  [<ffffffff81824c12>] system_call_fastpath+0x16/0x1b
[ 1557.895243] ---[ end trace 29a44c3c46b1a1bc ]---
[ 1557.895245] ------------[ cut here ]------------
[ 1557.895261] WARNING: at fs/btrfs/extent-tree.c:4418
btrfs_free_block_groups+0x347/0x34c [btrfs]()
[ 1557.895264] Hardware name: OptiPlex 745
[ 1557.895266] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
mfd_core xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
btrfs libcrc32c
[ 1557.895285] Pid: 5728, comm: umount Tainted: G        W    3.6.4-x+ #3
[ 1557.895287] Call Trace:
[ 1557.895294]  [<ffffffff810350ea>] warn_slowpath_common+0x74/0xa2
[ 1557.895299]  [<ffffffff81035132>] warn_slowpath_null+0x1a/0x1c
[ 1557.895317]  [<ffffffffa0022803>] btrfs_free_block_groups+0x347/0x34c [btrfs]
[ 1557.895337]  [<ffffffffa002e953>] close_ctree+0x1bc/0x2f1 [btrfs]
[ 1557.895343]  [<ffffffff8113fd27>] ? evict_inodes+0x106/0x115
[ 1557.895358]  [<ffffffffa0007075>] btrfs_put_super+0x19/0x1b [btrfs]
[ 1557.895364]  [<ffffffff81127c41>] generic_shutdown_super+0x5b/0xdc
[ 1557.895370]  [<ffffffff81127d44>] kill_anon_super+0x16/0x24
[ 1557.895385]  [<ffffffffa000ad48>] btrfs_kill_super+0x1a/0x8f [btrfs]
[ 1557.895391]  [<ffffffff81127f68>] deactivate_locked_super+0x34/0x5a
[ 1557.895397]  [<ffffffff81128b60>] deactivate_super+0x4e/0x66
[ 1557.895402]  [<ffffffff81142924>] mntput_no_expire+0xf7/0x14d
[ 1557.895407]  [<ffffffff811437ad>] sys_umount+0x63/0x37a
[ 1557.895413]  [<ffffffff81824c12>] system_call_fastpath+0x16/0x1b
[ 1557.895417] ---[ end trace 29a44c3c46b1a1bd ]---
[ 1557.895419] ------------[ cut here ]------------
[ 1557.895435] WARNING: at fs/btrfs/extent-tree.c:7687
btrfs_free_block_groups+0x23e/0x34c [btrfs]()
[ 1557.895438] Hardware name: OptiPlex 745
[ 1557.895440] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
mfd_core xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
btrfs libcrc32c
[ 1557.895458] Pid: 5728, comm: umount Tainted: G        W    3.6.4-x+ #3
[ 1557.895461] Call Trace:
[ 1557.895467]  [<ffffffff810350ea>] warn_slowpath_common+0x74/0xa2
[ 1557.895472]  [<ffffffff81035132>] warn_slowpath_null+0x1a/0x1c
[ 1557.895489]  [<ffffffffa00226fa>] btrfs_free_block_groups+0x23e/0x34c [btrfs]
[ 1557.895510]  [<ffffffffa002e953>] close_ctree+0x1bc/0x2f1 [btrfs]
[ 1557.895516]  [<ffffffff8113fd27>] ? evict_inodes+0x106/0x115
[ 1557.895533]  [<ffffffffa0007075>] btrfs_put_super+0x19/0x1b [btrfs]
[ 1557.895540]  [<ffffffff81127c41>] generic_shutdown_super+0x5b/0xdc
[ 1557.895547]  [<ffffffff81127d44>] kill_anon_super+0x16/0x24
[ 1557.895561]  [<ffffffffa000ad48>] btrfs_kill_super+0x1a/0x8f [btrfs]
[ 1557.895567]  [<ffffffff81127f68>] deactivate_locked_super+0x34/0x5a
[ 1557.895573]  [<ffffffff81128b60>] deactivate_super+0x4e/0x66
[ 1557.895578]  [<ffffffff81142924>] mntput_no_expire+0xf7/0x14d
[ 1557.895583]  [<ffffffff811437ad>] sys_umount+0x63/0x37a
[ 1557.895588]  [<ffffffff81824c12>] system_call_fastpath+0x16/0x1b
[ 1557.895591] ---[ end trace 29a44c3c46b1a1be ]---
[ 1557.895595] space_info 1 has 806678528 free, is not full
[ 1557.895600] space_info total=4303355904, used=3496611840, pinned=0,
reserved=0, may_use=2998272, readonly=65536
[ 1557.895603] ------------[ cut here ]------------
[ 1557.895619] WARNING: at fs/btrfs/extent-tree.c:7687
btrfs_free_block_groups+0x23e/0x34c [btrfs]()
[ 1557.895622] Hardware name: OptiPlex 745
[ 1557.895624] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
mfd_core xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
btrfs libcrc32c
[ 1557.895643] Pid: 5728, comm: umount Tainted: G        W    3.6.4-x+ #3
[ 1557.895646] Call Trace:
[ 1557.895652]  [<ffffffff810350ea>] warn_slowpath_common+0x74/0xa2
[ 1557.895656]  [<ffffffff81035132>] warn_slowpath_null+0x1a/0x1c
[ 1557.895673]  [<ffffffffa00226fa>] btrfs_free_block_groups+0x23e/0x34c [btrfs]
[ 1557.895693]  [<ffffffffa002e953>] close_ctree+0x1bc/0x2f1 [btrfs]
[ 1557.895699]  [<ffffffff8113fd27>] ? evict_inodes+0x106/0x115
[ 1557.895716]  [<ffffffffa0007075>] btrfs_put_super+0x19/0x1b [btrfs]
[ 1557.895723]  [<ffffffff81127c41>] generic_shutdown_super+0x5b/0xdc
[ 1557.895729]  [<ffffffff81127d44>] kill_anon_super+0x16/0x24
[ 1557.895743]  [<ffffffffa000ad48>] btrfs_kill_super+0x1a/0x8f [btrfs]
[ 1557.895749]  [<ffffffff81127f68>] deactivate_locked_super+0x34/0x5a
[ 1557.895754]  [<ffffffff81128b60>] deactivate_super+0x4e/0x66
[ 1557.895760]  [<ffffffff81142924>] mntput_no_expire+0xf7/0x14d
[ 1557.895764]  [<ffffffff811437ad>] sys_umount+0x63/0x37a
[ 1557.895770]  [<ffffffff81824c12>] system_call_fastpath+0x16/0x1b
[ 1557.895773] ---[ end trace 29a44c3c46b1a1bf ]---
[ 1557.895776] space_info 4 has 92979200 free, is not full
[ 1557.895780] space_info total=813694976, used=720715776, pinned=0,
reserved=0, may_use=26738688, readonly=0

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2 v4] Btrfs: snapshot-aware defrag
  2012-10-29 20:06 ` [PATCH 1/2 v4] Btrfs: snapshot-aware defrag Mitch Harder
@ 2012-10-30  1:20   ` Liu Bo
  2012-10-30 20:59     ` Mitch Harder
  0 siblings, 1 reply; 20+ messages in thread
From: Liu Bo @ 2012-10-30  1:20 UTC (permalink / raw)
  To: Mitch Harder; +Cc: linux-btrfs

On 10/30/2012 04:06 AM, Mitch Harder wrote:
> On Sat, Oct 27, 2012 at 5:28 AM, Liu Bo <bo.li.liu@oracle.com> wrote:
>> This comes from one of btrfs's project ideas,
>> As we defragment files, we break any sharing from other snapshots.
>> The balancing code will preserve the sharing, and defrag needs to grow this
>> as well.
>>
>> Now we're able to fill the blank with this patch, in which we make full use of
>> backref walking stuff.
>>
>> Here is the basic idea,
>> o  set the writeback ranges started by defragment with flag EXTENT_DEFRAG
>> o  at endio, after we finish updating fs tree, we use backref walking to find
>>    all parents of the ranges and re-link them with the new COWed file layout by
>>    adding corresponding backrefs.
>>
>> Originally patch by Li Zefan <lizf@cn.fujitsu.com>
>> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
>> ---
>> v3->v4:
>>       - fix duplicated refs bugs detected by mounting with autodefrag, thanks
>>         for the bug report from Mitch and Chris.
>>
> 
> I'm picking up many WARN_ON messages while testing this patch.
> 
> I'm testing a snapshot script that uses kernel git sources along with
> some git manipulations.
> 
> The kernel is a 3.6.4 kernel merged with the latest for-linus branch.
> 
> I mounted with -o compress-force=lzo,autodefrag.
> 
> I also have the second patch in this set (Btrfs: make snapshot-aware
> defrag as a mount option).  However, I did not mount with
> 'snap_aware_defrag'.
> 
> I did not find any corrupted data, and the partition passes a btrfsck
> without error after these warnings were observed.
> 

Hi Mitch,

Well, good report, but I don't think it has anything to do with this patch(since you
didn't mount with 'snap_aware_defrag' :)

After going through the below messages, the bug comes from the space side where we
must have mis-used our reservation somehow.

So can you show me your script so that I can give it a shot to reproduce locally?

thanks,
liubo


> Here's a summary of the WARN_ON messages:
> 
> $ cat local/dmesg-3.6.4-x+ | grep "WARNING:"
> [  610.407561] WARNING: at fs/btrfs/inode.c:7779
> btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
> [  610.407757] WARNING: at fs/btrfs/inode.c:7780
> btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
> [  610.407929] WARNING: at fs/btrfs/inode.c:7782
> btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
> [  661.211849] WARNING: at fs/btrfs/inode.c:7779
> btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
> [  661.212004] WARNING: at fs/btrfs/inode.c:7780
> btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
> [  661.212236] WARNING: at fs/btrfs/inode.c:7782
> btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
> [  719.882942] WARNING: at fs/btrfs/inode.c:7779
> btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
> [  719.883112] WARNING: at fs/btrfs/inode.c:7780
> btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
> [  719.883232] WARNING: at fs/btrfs/inode.c:7782
> btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
> [  786.978869] WARNING: at fs/btrfs/inode.c:7779
> btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
> [  786.979003] WARNING: at fs/btrfs/inode.c:7780
> btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
> [  786.979140] WARNING: at fs/btrfs/inode.c:7782
> btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
> [  845.605176] WARNING: at fs/btrfs/inode.c:7779
> btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
> [  845.605323] WARNING: at fs/btrfs/inode.c:7780
> btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
> [  845.605445] WARNING: at fs/btrfs/inode.c:7782
> btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
> [  912.300307] WARNING: at fs/btrfs/inode.c:7779
> btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
> [  912.300454] WARNING: at fs/btrfs/inode.c:7780
> btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
> [  912.300577] WARNING: at fs/btrfs/inode.c:7782
> btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
> [  968.835873] WARNING: at fs/btrfs/inode.c:7779
> btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
> [  968.836032] WARNING: at fs/btrfs/inode.c:7780
> btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
> [  968.836156] WARNING: at fs/btrfs/inode.c:7782
> btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
> [ 1023.778160] WARNING: at fs/btrfs/inode.c:7779
> btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
> [ 1023.778316] WARNING: at fs/btrfs/inode.c:7780
> btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
> [ 1023.778435] WARNING: at fs/btrfs/inode.c:7782
> btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
> [ 1064.342768] WARNING: at fs/btrfs/inode.c:7779
> btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
> [ 1064.342914] WARNING: at fs/btrfs/inode.c:7780
> btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
> [ 1064.343112] WARNING: at fs/btrfs/inode.c:7782
> btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
> [ 1177.892047] WARNING: at fs/btrfs/inode.c:7779
> btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
> [ 1177.892189] WARNING: at fs/btrfs/inode.c:7780
> btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
> [ 1177.892312] WARNING: at fs/btrfs/inode.c:7782
> btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
> [ 1281.951715] WARNING: at fs/btrfs/inode.c:7779
> btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
> [ 1281.951857] WARNING: at fs/btrfs/inode.c:7780
> btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
> [ 1281.951978] WARNING: at fs/btrfs/inode.c:7782
> btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
> [ 1282.804376] WARNING: at fs/btrfs/inode.c:7779
> btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
> [ 1282.804524] WARNING: at fs/btrfs/inode.c:7780
> btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
> [ 1282.804645] WARNING: at fs/btrfs/inode.c:7782
> btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
> [ 1351.187114] WARNING: at fs/btrfs/inode.c:7779
> btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
> [ 1351.187263] WARNING: at fs/btrfs/inode.c:7780
> btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
> [ 1351.187391] WARNING: at fs/btrfs/inode.c:7782
> btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
> [ 1399.022919] WARNING: at fs/btrfs/inode.c:7779
> btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
> [ 1399.023086] WARNING: at fs/btrfs/inode.c:7780
> btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
> [ 1399.023206] WARNING: at fs/btrfs/inode.c:7782
> btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
> [ 1483.724756] WARNING: at fs/btrfs/inode.c:7779
> btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
> [ 1483.724895] WARNING: at fs/btrfs/inode.c:7780
> btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
> [ 1483.725031] WARNING: at fs/btrfs/inode.c:7782
> btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
> [ 1523.081643] WARNING: at fs/btrfs/inode.c:7779
> btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
> [ 1523.081792] WARNING: at fs/btrfs/inode.c:7780
> btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
> [ 1523.081922] WARNING: at fs/btrfs/inode.c:7782
> btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
> [ 1555.993160] WARNING: at fs/btrfs/inode.c:7779
> btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
> [ 1555.993299] WARNING: at fs/btrfs/inode.c:7780
> btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
> [ 1555.993420] WARNING: at fs/btrfs/inode.c:7782
> btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
> [ 1556.252181] WARNING: at fs/btrfs/inode.c:7779
> btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
> [ 1556.252312] WARNING: at fs/btrfs/inode.c:7780
> btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
> [ 1556.252427] WARNING: at fs/btrfs/inode.c:7782
> btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
> [ 1557.218561] WARNING: at fs/btrfs/inode.c:7779
> btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
> [ 1557.218691] WARNING: at fs/btrfs/inode.c:7780
> btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
> [ 1557.218805] WARNING: at fs/btrfs/inode.c:7782
> btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
> [ 1557.895069] WARNING: at fs/btrfs/extent-tree.c:4417
> btrfs_free_block_groups+0x2ad/0x34c [btrfs]()
> [ 1557.895261] WARNING: at fs/btrfs/extent-tree.c:4418
> btrfs_free_block_groups+0x347/0x34c [btrfs]()
> [ 1557.895435] WARNING: at fs/btrfs/extent-tree.c:7687
> btrfs_free_block_groups+0x23e/0x34c [btrfs]()
> [ 1557.895619] WARNING: at fs/btrfs/extent-tree.c:7687
> btrfs_free_block_groups+0x23e/0x34c [btrfs]()
> 
> Here's a snip of the code that has the WARN_ONs in inode.c:
> 
> void btrfs_destroy_inode(struct inode *inode)
> {
>         struct btrfs_ordered_extent *ordered;
>         struct btrfs_root *root = BTRFS_I(inode)->root;
> 
>         WARN_ON(!hlist_empty(&inode->i_dentry));
>         WARN_ON(inode->i_data.nrpages);
>         WARN_ON(BTRFS_I(inode)->outstanding_extents);  // line 7779
>         WARN_ON(BTRFS_I(inode)->reserved_extents);  // line 7780
>         WARN_ON(BTRFS_I(inode)->delalloc_bytes); // line 7781
>         WARN_ON(BTRFS_I(inode)->csum_bytes); // line 7782
> 
> Here's the dmesg output for just the last set of errors (the other
> WARN_ONs had similar backtraces):
> 
> [ 1557.218529] ------------[ cut here ]------------
> [ 1557.218561] WARNING: at fs/btrfs/inode.c:7779
> btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]()
> [ 1557.218564] Hardware name: OptiPlex 745
> [ 1557.218566] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
> mfd_core xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
> btrfs libcrc32c
> [ 1557.218582] Pid: 5728, comm: umount Tainted: G        W    3.6.4-x+ #3
> [ 1557.218584] Call Trace:
> [ 1557.218592]  [<ffffffff810350ea>] warn_slowpath_common+0x74/0xa2
> [ 1557.218596]  [<ffffffff81035132>] warn_slowpath_null+0x1a/0x1c
> [ 1557.218612]  [<ffffffffa00414c9>] btrfs_destroy_inode+0x2ac/0x2e6 [btrfs]
> [ 1557.218618]  [<ffffffff8113ef00>] destroy_inode+0x3c/0x5f
> [ 1557.218622]  [<ffffffff8113f045>] evict+0x122/0x1ac
> [ 1557.218626]  [<ffffffff8113f619>] dispose_list+0x3a/0x49
> [ 1557.218629]  [<ffffffff8113fd27>] evict_inodes+0x106/0x115
> [ 1557.218635]  [<ffffffff81127c32>] generic_shutdown_super+0x4c/0xdc
> [ 1557.218639]  [<ffffffff81127d44>] kill_anon_super+0x16/0x24
> [ 1557.218650]  [<ffffffffa000ad48>] btrfs_kill_super+0x1a/0x8f [btrfs]
> [ 1557.218654]  [<ffffffff81127f68>] deactivate_locked_super+0x34/0x5a
> [ 1557.218658]  [<ffffffff81128b60>] deactivate_super+0x4e/0x66
> [ 1557.218663]  [<ffffffff81142924>] mntput_no_expire+0xf7/0x14d
> [ 1557.218666]  [<ffffffff811437ad>] sys_umount+0x63/0x37a
> [ 1557.218672]  [<ffffffff81824c12>] system_call_fastpath+0x16/0x1b
> [ 1557.218674] ---[ end trace 29a44c3c46b1a1b9 ]---
> [ 1557.218676] ------------[ cut here ]------------
> [ 1557.218691] WARNING: at fs/btrfs/inode.c:7780
> btrfs_destroy_inode+0x296/0x2e6 [btrfs]()
> [ 1557.218693] Hardware name: OptiPlex 745
> [ 1557.218694] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
> mfd_core xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
> btrfs libcrc32c
> [ 1557.218708] Pid: 5728, comm: umount Tainted: G        W    3.6.4-x+ #3
> [ 1557.218709] Call Trace:
> [ 1557.218714]  [<ffffffff810350ea>] warn_slowpath_common+0x74/0xa2
> [ 1557.218717]  [<ffffffff81035132>] warn_slowpath_null+0x1a/0x1c
> [ 1557.218733]  [<ffffffffa00414b3>] btrfs_destroy_inode+0x296/0x2e6 [btrfs]
> [ 1557.218737]  [<ffffffff8113ef00>] destroy_inode+0x3c/0x5f
> [ 1557.218741]  [<ffffffff8113f045>] evict+0x122/0x1ac
> [ 1557.218744]  [<ffffffff8113f619>] dispose_list+0x3a/0x49
> [ 1557.218748]  [<ffffffff8113fd27>] evict_inodes+0x106/0x115
> [ 1557.218752]  [<ffffffff81127c32>] generic_shutdown_super+0x4c/0xdc
> [ 1557.218757]  [<ffffffff81127d44>] kill_anon_super+0x16/0x24
> [ 1557.218767]  [<ffffffffa000ad48>] btrfs_kill_super+0x1a/0x8f [btrfs]
> [ 1557.218771]  [<ffffffff81127f68>] deactivate_locked_super+0x34/0x5a
> [ 1557.218775]  [<ffffffff81128b60>] deactivate_super+0x4e/0x66
> [ 1557.218779]  [<ffffffff81142924>] mntput_no_expire+0xf7/0x14d
> [ 1557.218783]  [<ffffffff811437ad>] sys_umount+0x63/0x37a
> [ 1557.218787]  [<ffffffff81824c12>] system_call_fastpath+0x16/0x1b
> [ 1557.218789] ---[ end trace 29a44c3c46b1a1ba ]---
> [ 1557.218791] ------------[ cut here ]------------
> [ 1557.218805] WARNING: at fs/btrfs/inode.c:7782
> btrfs_destroy_inode+0x26a/0x2e6 [btrfs]()
> [ 1557.218807] Hardware name: OptiPlex 745
> [ 1557.218809] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
> mfd_core xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
> btrfs libcrc32c
> [ 1557.218822] Pid: 5728, comm: umount Tainted: G        W    3.6.4-x+ #3
> [ 1557.218823] Call Trace:
> [ 1557.218827]  [<ffffffff810350ea>] warn_slowpath_common+0x74/0xa2
> [ 1557.218831]  [<ffffffff81035132>] warn_slowpath_null+0x1a/0x1c
> [ 1557.218847]  [<ffffffffa0041487>] btrfs_destroy_inode+0x26a/0x2e6 [btrfs]
> [ 1557.218851]  [<ffffffff8113ef00>] destroy_inode+0x3c/0x5f
> [ 1557.218854]  [<ffffffff8113f045>] evict+0x122/0x1ac
> [ 1557.218858]  [<ffffffff8113f619>] dispose_list+0x3a/0x49
> [ 1557.218862]  [<ffffffff8113fd27>] evict_inodes+0x106/0x115
> [ 1557.218866]  [<ffffffff81127c32>] generic_shutdown_super+0x4c/0xdc
> [ 1557.218870]  [<ffffffff81127d44>] kill_anon_super+0x16/0x24
> [ 1557.218881]  [<ffffffffa000ad48>] btrfs_kill_super+0x1a/0x8f [btrfs]
> [ 1557.218885]  [<ffffffff81127f68>] deactivate_locked_super+0x34/0x5a
> [ 1557.218889]  [<ffffffff81128b60>] deactivate_super+0x4e/0x66
> [ 1557.218893]  [<ffffffff81142924>] mntput_no_expire+0xf7/0x14d
> [ 1557.218897]  [<ffffffff811437ad>] sys_umount+0x63/0x37a
> [ 1557.218901]  [<ffffffff81824c12>] system_call_fastpath+0x16/0x1b
> [ 1557.218903] ---[ end trace 29a44c3c46b1a1bb ]---
> [ 1557.895037] ------------[ cut here ]------------
> [ 1557.895069] WARNING: at fs/btrfs/extent-tree.c:4417
> btrfs_free_block_groups+0x2ad/0x34c [btrfs]()
> [ 1557.895073] Hardware name: OptiPlex 745
> [ 1557.895075] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
> mfd_core xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
> btrfs libcrc32c
> [ 1557.895097] Pid: 5728, comm: umount Tainted: G        W    3.6.4-x+ #3
> [ 1557.895100] Call Trace:
> [ 1557.895112]  [<ffffffff810350ea>] warn_slowpath_common+0x74/0xa2
> [ 1557.895118]  [<ffffffff81035132>] warn_slowpath_null+0x1a/0x1c
> [ 1557.895136]  [<ffffffffa0022769>] btrfs_free_block_groups+0x2ad/0x34c [btrfs]
> [ 1557.895157]  [<ffffffffa002e953>] close_ctree+0x1bc/0x2f1 [btrfs]
> [ 1557.895165]  [<ffffffff8113fd27>] ? evict_inodes+0x106/0x115
> [ 1557.895179]  [<ffffffffa0007075>] btrfs_put_super+0x19/0x1b [btrfs]
> [ 1557.895187]  [<ffffffff81127c41>] generic_shutdown_super+0x5b/0xdc
> [ 1557.895193]  [<ffffffff81127d44>] kill_anon_super+0x16/0x24
> [ 1557.895208]  [<ffffffffa000ad48>] btrfs_kill_super+0x1a/0x8f [btrfs]
> [ 1557.895214]  [<ffffffff81127f68>] deactivate_locked_super+0x34/0x5a
> [ 1557.895220]  [<ffffffff81128b60>] deactivate_super+0x4e/0x66
> [ 1557.895226]  [<ffffffff81142924>] mntput_no_expire+0xf7/0x14d
> [ 1557.895231]  [<ffffffff811437ad>] sys_umount+0x63/0x37a
> [ 1557.895239]  [<ffffffff81824c12>] system_call_fastpath+0x16/0x1b
> [ 1557.895243] ---[ end trace 29a44c3c46b1a1bc ]---
> [ 1557.895245] ------------[ cut here ]------------
> [ 1557.895261] WARNING: at fs/btrfs/extent-tree.c:4418
> btrfs_free_block_groups+0x347/0x34c [btrfs]()
> [ 1557.895264] Hardware name: OptiPlex 745
> [ 1557.895266] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
> mfd_core xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
> btrfs libcrc32c
> [ 1557.895285] Pid: 5728, comm: umount Tainted: G        W    3.6.4-x+ #3
> [ 1557.895287] Call Trace:
> [ 1557.895294]  [<ffffffff810350ea>] warn_slowpath_common+0x74/0xa2
> [ 1557.895299]  [<ffffffff81035132>] warn_slowpath_null+0x1a/0x1c
> [ 1557.895317]  [<ffffffffa0022803>] btrfs_free_block_groups+0x347/0x34c [btrfs]
> [ 1557.895337]  [<ffffffffa002e953>] close_ctree+0x1bc/0x2f1 [btrfs]
> [ 1557.895343]  [<ffffffff8113fd27>] ? evict_inodes+0x106/0x115
> [ 1557.895358]  [<ffffffffa0007075>] btrfs_put_super+0x19/0x1b [btrfs]
> [ 1557.895364]  [<ffffffff81127c41>] generic_shutdown_super+0x5b/0xdc
> [ 1557.895370]  [<ffffffff81127d44>] kill_anon_super+0x16/0x24
> [ 1557.895385]  [<ffffffffa000ad48>] btrfs_kill_super+0x1a/0x8f [btrfs]
> [ 1557.895391]  [<ffffffff81127f68>] deactivate_locked_super+0x34/0x5a
> [ 1557.895397]  [<ffffffff81128b60>] deactivate_super+0x4e/0x66
> [ 1557.895402]  [<ffffffff81142924>] mntput_no_expire+0xf7/0x14d
> [ 1557.895407]  [<ffffffff811437ad>] sys_umount+0x63/0x37a
> [ 1557.895413]  [<ffffffff81824c12>] system_call_fastpath+0x16/0x1b
> [ 1557.895417] ---[ end trace 29a44c3c46b1a1bd ]---
> [ 1557.895419] ------------[ cut here ]------------
> [ 1557.895435] WARNING: at fs/btrfs/extent-tree.c:7687
> btrfs_free_block_groups+0x23e/0x34c [btrfs]()
> [ 1557.895438] Hardware name: OptiPlex 745
> [ 1557.895440] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
> mfd_core xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
> btrfs libcrc32c
> [ 1557.895458] Pid: 5728, comm: umount Tainted: G        W    3.6.4-x+ #3
> [ 1557.895461] Call Trace:
> [ 1557.895467]  [<ffffffff810350ea>] warn_slowpath_common+0x74/0xa2
> [ 1557.895472]  [<ffffffff81035132>] warn_slowpath_null+0x1a/0x1c
> [ 1557.895489]  [<ffffffffa00226fa>] btrfs_free_block_groups+0x23e/0x34c [btrfs]
> [ 1557.895510]  [<ffffffffa002e953>] close_ctree+0x1bc/0x2f1 [btrfs]
> [ 1557.895516]  [<ffffffff8113fd27>] ? evict_inodes+0x106/0x115
> [ 1557.895533]  [<ffffffffa0007075>] btrfs_put_super+0x19/0x1b [btrfs]
> [ 1557.895540]  [<ffffffff81127c41>] generic_shutdown_super+0x5b/0xdc
> [ 1557.895547]  [<ffffffff81127d44>] kill_anon_super+0x16/0x24
> [ 1557.895561]  [<ffffffffa000ad48>] btrfs_kill_super+0x1a/0x8f [btrfs]
> [ 1557.895567]  [<ffffffff81127f68>] deactivate_locked_super+0x34/0x5a
> [ 1557.895573]  [<ffffffff81128b60>] deactivate_super+0x4e/0x66
> [ 1557.895578]  [<ffffffff81142924>] mntput_no_expire+0xf7/0x14d
> [ 1557.895583]  [<ffffffff811437ad>] sys_umount+0x63/0x37a
> [ 1557.895588]  [<ffffffff81824c12>] system_call_fastpath+0x16/0x1b
> [ 1557.895591] ---[ end trace 29a44c3c46b1a1be ]---
> [ 1557.895595] space_info 1 has 806678528 free, is not full
> [ 1557.895600] space_info total=4303355904, used=3496611840, pinned=0,
> reserved=0, may_use=2998272, readonly=65536
> [ 1557.895603] ------------[ cut here ]------------
> [ 1557.895619] WARNING: at fs/btrfs/extent-tree.c:7687
> btrfs_free_block_groups+0x23e/0x34c [btrfs]()
> [ 1557.895622] Hardware name: OptiPlex 745
> [ 1557.895624] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
> mfd_core xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
> btrfs libcrc32c
> [ 1557.895643] Pid: 5728, comm: umount Tainted: G        W    3.6.4-x+ #3
> [ 1557.895646] Call Trace:
> [ 1557.895652]  [<ffffffff810350ea>] warn_slowpath_common+0x74/0xa2
> [ 1557.895656]  [<ffffffff81035132>] warn_slowpath_null+0x1a/0x1c
> [ 1557.895673]  [<ffffffffa00226fa>] btrfs_free_block_groups+0x23e/0x34c [btrfs]
> [ 1557.895693]  [<ffffffffa002e953>] close_ctree+0x1bc/0x2f1 [btrfs]
> [ 1557.895699]  [<ffffffff8113fd27>] ? evict_inodes+0x106/0x115
> [ 1557.895716]  [<ffffffffa0007075>] btrfs_put_super+0x19/0x1b [btrfs]
> [ 1557.895723]  [<ffffffff81127c41>] generic_shutdown_super+0x5b/0xdc
> [ 1557.895729]  [<ffffffff81127d44>] kill_anon_super+0x16/0x24
> [ 1557.895743]  [<ffffffffa000ad48>] btrfs_kill_super+0x1a/0x8f [btrfs]
> [ 1557.895749]  [<ffffffff81127f68>] deactivate_locked_super+0x34/0x5a
> [ 1557.895754]  [<ffffffff81128b60>] deactivate_super+0x4e/0x66
> [ 1557.895760]  [<ffffffff81142924>] mntput_no_expire+0xf7/0x14d
> [ 1557.895764]  [<ffffffff811437ad>] sys_umount+0x63/0x37a
> [ 1557.895770]  [<ffffffff81824c12>] system_call_fastpath+0x16/0x1b
> [ 1557.895773] ---[ end trace 29a44c3c46b1a1bf ]---
> [ 1557.895776] space_info 4 has 92979200 free, is not full
> [ 1557.895780] space_info total=813694976, used=720715776, pinned=0,
> reserved=0, may_use=26738688, readonly=0
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2 v4] Btrfs: snapshot-aware defrag
  2012-10-30  1:20   ` Liu Bo
@ 2012-10-30 20:59     ` Mitch Harder
  0 siblings, 0 replies; 20+ messages in thread
From: Mitch Harder @ 2012-10-30 20:59 UTC (permalink / raw)
  To: Liu Bo; +Cc: linux-btrfs

On Mon, Oct 29, 2012 at 8:20 PM, Liu Bo <bo.li.liu@oracle.com> wrote:
> On 10/30/2012 04:06 AM, Mitch Harder wrote:
>> On Sat, Oct 27, 2012 at 5:28 AM, Liu Bo <bo.li.liu@oracle.com> wrote:
>>> This comes from one of btrfs's project ideas,
>>> As we defragment files, we break any sharing from other snapshots.
>>> The balancing code will preserve the sharing, and defrag needs to grow this
>>> as well.
>>>
>>> Now we're able to fill the blank with this patch, in which we make full use of
>>> backref walking stuff.
>>>
>>> Here is the basic idea,
>>> o  set the writeback ranges started by defragment with flag EXTENT_DEFRAG
>>> o  at endio, after we finish updating fs tree, we use backref walking to find
>>>    all parents of the ranges and re-link them with the new COWed file layout by
>>>    adding corresponding backrefs.
>>>
>>> Originally patch by Li Zefan <lizf@cn.fujitsu.com>
>>> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
>>> ---
>>> v3->v4:
>>>       - fix duplicated refs bugs detected by mounting with autodefrag, thanks
>>>         for the bug report from Mitch and Chris.
>>>
>>
>> I'm picking up many WARN_ON messages while testing this patch.
>>
>> I'm testing a snapshot script that uses kernel git sources along with
>> some git manipulations.
>>
>> The kernel is a 3.6.4 kernel merged with the latest for-linus branch.
>>
>> I mounted with -o compress-force=lzo,autodefrag.
>>
>> I also have the second patch in this set (Btrfs: make snapshot-aware
>> defrag as a mount option).  However, I did not mount with
>> 'snap_aware_defrag'.
>>
>> I did not find any corrupted data, and the partition passes a btrfsck
>> without error after these warnings were observed.
>>
>
> Hi Mitch,
>
> Well, good report, but I don't think it has anything to do with this patch(since you
> didn't mount with 'snap_aware_defrag' :)
>

I've re-run my my testing script with a combination of no compression
and lzo compression, combined with no further options, only -o
autodefrag, and -o autodefrag,snap_aware_defrag.

I only get the WARN_ONs when I run with autodefrag only (no snap_aware_defrag).

My logs are clean when I avoid all defrag options, or use both
autodefrag and snap_aware_defrag.

> After going through the below messages, the bug comes from the space side where we
> must have mis-used our reservation somehow.
>
> So can you show me your script so that I can give it a shot to reproduce locally?
>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Btrfs: make snapshot-aware defrag as a mount option
  2012-10-27 10:28 ` [PATCH 2/2] Btrfs: make snapshot-aware defrag as a mount option Liu Bo
@ 2012-10-30 23:31   ` David Sterba
  2012-10-31  0:34     ` Liu Bo
  2012-11-01 14:43   ` Chris Mason
  1 sibling, 1 reply; 20+ messages in thread
From: David Sterba @ 2012-10-30 23:31 UTC (permalink / raw)
  To: Liu Bo; +Cc: linux-btrfs

On Sat, Oct 27, 2012 at 06:28:41PM +0800, Liu Bo wrote:
> This feature works on our crucial write endio path, so if we've got
> lots of fragments to process, it will be kind of a disaster to the
> performance, so I make such a change.
> 
> One can benifit from it while mounting with '-o snap_aware_defrag'.

I vote for a more fine grained control over this feature, ie.  via
'btrfs fi defrag', off by default (current behaviour). The defrag ioctl
is the only place that actually calls set_extent_defrag, so this will
not affect normal operation and is fully in hands of the user who runs
defrag.

Do you have a usecase for setting it through the mount option?

thanks,
david

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Btrfs: make snapshot-aware defrag as a mount option
  2012-10-30 23:31   ` David Sterba
@ 2012-10-31  0:34     ` Liu Bo
  2012-10-31  0:44       ` David Sterba
  0 siblings, 1 reply; 20+ messages in thread
From: Liu Bo @ 2012-10-31  0:34 UTC (permalink / raw)
  To: David Sterba; +Cc: linux-btrfs

On 10/31/2012 07:31 AM, David Sterba wrote:
> On Sat, Oct 27, 2012 at 06:28:41PM +0800, Liu Bo wrote:
>> This feature works on our crucial write endio path, so if we've got
>> lots of fragments to process, it will be kind of a disaster to the
>> performance, so I make such a change.
>>
>> One can benifit from it while mounting with '-o snap_aware_defrag'.
> 
> I vote for a more fine grained control over this feature, ie.  via
> 'btrfs fi defrag', off by default (current behaviour). The defrag ioctl
> is the only place that actually calls set_extent_defrag, so this will
> not affect normal operation and is fully in hands of the user who runs
> defrag.
> 

Besides 'btrfs fi defrag', mounting with autodefrag may also do the same thing.

But controlling by 'btrfs fi defrag' can actually be a good idea.

thanks,
liubo

> Do you have a usecase for setting it through the mount option?
> 
> thanks,
> david
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Btrfs: make snapshot-aware defrag as a mount option
  2012-10-31  0:34     ` Liu Bo
@ 2012-10-31  0:44       ` David Sterba
  2012-10-31 13:31         ` Liu Bo
  0 siblings, 1 reply; 20+ messages in thread
From: David Sterba @ 2012-10-31  0:44 UTC (permalink / raw)
  To: Liu Bo; +Cc: David Sterba, linux-btrfs

On Wed, Oct 31, 2012 at 08:34:38AM +0800, Liu Bo wrote:
> Besides 'btrfs fi defrag', mounting with autodefrag may also do the same thing.

Ok, autodefrag, good point. Then I suggest to make the snapshot-aware a
mode of autodefrag, not a separate option (because it would make no
sense other than an alias for "autodefrag=snapshotaware")


david

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2 v4] Btrfs: snapshot-aware defrag
  2012-10-27 10:28 [PATCH 1/2 v4] Btrfs: snapshot-aware defrag Liu Bo
  2012-10-27 10:28 ` [PATCH 2/2] Btrfs: make snapshot-aware defrag as a mount option Liu Bo
  2012-10-29 20:06 ` [PATCH 1/2 v4] Btrfs: snapshot-aware defrag Mitch Harder
@ 2012-10-31 12:13 ` Itaru Kitayama
  2012-10-31 12:55   ` Liu Bo
  2 siblings, 1 reply; 20+ messages in thread
From: Itaru Kitayama @ 2012-10-31 12:13 UTC (permalink / raw)
  To: Liu Bo; +Cc: linux-btrfs

Hi LiuBo:

I am seeing another warning with your patch applied btrfs-next.

[ 5224.531560] ------------[ cut here ]------------
[ 5224.531565] WARNING: at fs/btrfs/inode.c:2054
record_extent_backrefs+0x87/0xe0()
[ 5224.531567] Hardware name: Bochs
[ 5224.531568] Modules linked in: microcode ppdev psmouse nfsd nfs_acl
auth_rpcgss serio_raw nfs fscache lockd binfmt_misc sunrpc cirrus
parport_pc ttm drm_kms_helper drm sysimgblt i2c_piix4 sysfillrect
syscopyarea i2c_core lp parport floppy
[ 5224.531591] Pid: 2485, comm: btrfs-endio-wri Tainted: G        W
3.7.0-rc1-v11+ #53
[ 5224.531592] Call Trace:
[ 5224.531598]  [<ffffffff81061c63>] warn_slowpath_common+0x93/0xc0
[ 5224.531600]  [<ffffffff81061caa>] warn_slowpath_null+0x1a/0x20
[ 5224.531603]  [<ffffffff81322287>] record_extent_backrefs+0x87/0xe0
[ 5224.531606]  [<ffffffff8132d10b>] btrfs_finish_ordered_io+0x8bb/0xa80
[ 5224.531611]  [<ffffffff810ce300>] ? trace_hardirqs_off_caller+0xb0/0x140
[ 5224.531614]  [<ffffffff8132d2e5>] finish_ordered_fn+0x15/0x20
[ 5224.531617]  [<ffffffff8134beb7>] worker_loop+0x157/0x580
[ 5224.531620]  [<ffffffff8134bd60>] ? btrfs_queue_worker+0x2f0/0x2f0
[ 5224.531624]  [<ffffffff81090aa8>] kthread+0xe8/0xf0
[ 5224.531627]  [<ffffffff810ce3c2>] ? get_lock_stats+0x22/0x70
[ 5224.531630]  [<ffffffff810909c0>] ? kthread_create_on_node+0x160/0x160
[ 5224.531634]  [<ffffffff817c1c6c>] ret_from_fork+0x7c/0xb0
[ 5224.531636]  [<ffffffff810909c0>] ? kthread_create_on_node+0x160/0x160
[ 5224.531638] ---[ end trace 0256d2b5a195208c ]---

I've compared some of the old extents logical addresses with the corresponding
object ids and offsets from the extent tree; some are just 8k off from
the found extents
and some keys are totally off.

Itaru

On Sat, Oct 27, 2012 at 7:28 PM, Liu Bo <bo.li.liu@oracle.com> wrote:
> This comes from one of btrfs's project ideas,
> As we defragment files, we break any sharing from other snapshots.
> The balancing code will preserve the sharing, and defrag needs to grow this
> as well.
>
> Now we're able to fill the blank with this patch, in which we make full use of
> backref walking stuff.
>
> Here is the basic idea,
> o  set the writeback ranges started by defragment with flag EXTENT_DEFRAG
> o  at endio, after we finish updating fs tree, we use backref walking to find
>    all parents of the ranges and re-link them with the new COWed file layout by
>    adding corresponding backrefs.
>
> Originally patch by Li Zefan <lizf@cn.fujitsu.com>
> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
> ---
> v3->v4:
>       - fix duplicated refs bugs detected by mounting with autodefrag, thanks
>         for the bug report from Mitch and Chris.
>
>  fs/btrfs/inode.c |  609 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 609 insertions(+), 0 deletions(-)
>
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 85a1e50..35e6993 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -54,6 +54,7 @@
>  #include "locking.h"
>  #include "free-space-cache.h"
>  #include "inode-map.h"
> +#include "backref.h"
>
>  struct btrfs_iget_args {
>         u64 ino;
> @@ -1839,6 +1840,600 @@ out:
>         return ret;
>  }
>
> +/* snapshot-aware defrag */
> +struct sa_defrag_extent_backref {
> +       struct rb_node node;
> +       struct old_sa_defrag_extent *old;
> +       u64 root_id;
> +       u64 inum;
> +       u64 file_pos;
> +       u64 extent_offset;
> +       u64 num_bytes;
> +       u64 generation;
> +};
> +
> +struct old_sa_defrag_extent {
> +       struct list_head list;
> +       struct new_sa_defrag_extent *new;
> +
> +       u64 extent_offset;
> +       u64 bytenr;
> +       u64 offset;
> +       u64 len;
> +       int count;
> +};
> +
> +struct new_sa_defrag_extent {
> +       struct rb_root root;
> +       struct list_head head;
> +       struct btrfs_path *path;
> +       struct inode *inode;
> +       u64 file_pos;
> +       u64 len;
> +       u64 bytenr;
> +       u64 disk_len;
> +       u8 compress_type;
> +};
> +
> +static int backref_comp(struct sa_defrag_extent_backref *b1,
> +                       struct sa_defrag_extent_backref *b2)
> +{
> +       if (b1->root_id < b2->root_id)
> +               return -1;
> +       else if (b1->root_id > b2->root_id)
> +               return 1;
> +
> +       if (b1->inum < b2->inum)
> +               return -1;
> +       else if (b1->inum > b2->inum)
> +               return 1;
> +
> +       if (b1->file_pos < b2->file_pos)
> +               return -1;
> +       else if (b1->file_pos > b2->file_pos)
> +               return 1;
> +
> +       return 0;
> +}
> +
> +static void backref_insert(struct rb_root *root,
> +                          struct sa_defrag_extent_backref *backref)
> +{
> +       struct rb_node **p = &root->rb_node;
> +       struct rb_node *parent = NULL;
> +       struct sa_defrag_extent_backref *entry;
> +       int ret;
> +
> +       while (*p) {
> +               parent = *p;
> +               entry = rb_entry(parent, struct sa_defrag_extent_backref, node);
> +
> +               ret = backref_comp(backref, entry);
> +               if (ret < 0)
> +                       p = &(*p)->rb_left;
> +               else
> +                       /*
> +                        * Since space can be shared, so there can be
> +                        * some backrefs(extent tree to fs/file tree)
> +                        * whoes fs/file extents map to the same address.
> +                        * If so, we just put it after what we've found.
> +                        */
> +                       p = &(*p)->rb_right;
> +       }
> +
> +       rb_link_node(&backref->node, parent, p);
> +       rb_insert_color(&backref->node, root);
> +}
> +
> +/*
> + * Note the backref might has changed, and in this case we just return 0.
> + */
> +static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
> +                                      void *ctx)
> +{
> +       struct btrfs_file_extent_item *extent;
> +       struct btrfs_fs_info *fs_info;
> +       struct old_sa_defrag_extent *old = ctx;
> +       struct new_sa_defrag_extent *new = old->new;
> +       struct btrfs_path *path = new->path;
> +       struct btrfs_key key;
> +       struct btrfs_root *root;
> +       struct sa_defrag_extent_backref *backref;
> +       struct extent_buffer *leaf;
> +       struct inode *inode = new->inode;
> +       int slot;
> +       int ret;
> +       u64 extent_offset;
> +       u64 num_bytes;
> +
> +       if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
> +           inum == btrfs_ino(inode))
> +               return 0;
> +
> +       key.objectid = root_id;
> +       key.type = BTRFS_ROOT_ITEM_KEY;
> +       key.offset = (u64)-1;
> +
> +       fs_info = BTRFS_I(inode)->root->fs_info;
> +       root = btrfs_read_fs_root_no_name(fs_info, &key);
> +       if (IS_ERR(root)) {
> +               if (PTR_ERR(root) == -ENOENT)
> +                       return 0;
> +               WARN_ON(1);
> +               pr_debug("inum=%llu, offset=%llu, root_id=%llu\n",
> +                        inum, offset, root_id);
> +               return PTR_ERR(root);
> +       }
> +
> +       key.objectid = inum;
> +       key.type = BTRFS_EXTENT_DATA_KEY;
> +       if (offset > (u64)-1 << 32)
> +               key.offset = 0;
> +       else
> +               key.offset = offset;
> +
> +       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
> +       if (ret < 0) {
> +               WARN_ON(1);
> +               return ret;
> +       }
> +
> +       while (1) {
> +               cond_resched();
> +
> +               leaf = path->nodes[0];
> +               slot = path->slots[0];
> +
> +               if (slot >= btrfs_header_nritems(leaf)) {
> +                       ret = btrfs_next_leaf(root, path);
> +                       if (ret < 0) {
> +                               goto out;
> +                       } else if (ret > 0) {
> +                               ret = 0;
> +                               goto out;
> +                       }
> +                       continue;
> +               }
> +
> +               path->slots[0]++;
> +
> +               btrfs_item_key_to_cpu(leaf, &key, slot);
> +
> +               if (key.objectid > inum)
> +                       goto out;
> +
> +               if (key.objectid < inum || key.type != BTRFS_EXTENT_DATA_KEY)
> +                       continue;
> +
> +               extent = btrfs_item_ptr(leaf, slot,
> +                                       struct btrfs_file_extent_item);
> +
> +               if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
> +                       continue;
> +
> +               extent_offset = btrfs_file_extent_offset(leaf, extent);
> +               if (key.offset - extent_offset != offset)
> +                       continue;
> +
> +               num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
> +               if (extent_offset >= old->extent_offset + old->offset +
> +                   old->len || extent_offset + num_bytes <=
> +                   old->extent_offset + old->offset)
> +                       continue;
> +
> +               break;
> +       }
> +
> +       backref = kmalloc(sizeof(*backref), GFP_NOFS);
> +       if (!backref) {
> +               ret = -ENOENT;
> +               goto out;
> +       }
> +
> +       backref->root_id = root_id;
> +       backref->inum = inum;
> +       backref->file_pos = offset + extent_offset;
> +       backref->num_bytes = num_bytes;
> +       backref->extent_offset = extent_offset;
> +       backref->generation = btrfs_file_extent_generation(leaf, extent);
> +       backref->old = old;
> +       backref_insert(&new->root, backref);
> +       old->count++;
> +out:
> +       btrfs_release_path(path);
> +       WARN_ON(ret);
> +       return ret;
> +}
> +
> +static noinline bool record_extent_backrefs(struct btrfs_path *path,
> +                                  struct new_sa_defrag_extent *new)
> +{
> +       struct btrfs_fs_info *fs_info = BTRFS_I(new->inode)->root->fs_info;
> +       struct old_sa_defrag_extent *old, *tmp;
> +       int ret;
> +
> +       new->path = path;
> +
> +       list_for_each_entry_safe(old, tmp, &new->head, list) {
> +               ret = iterate_inodes_from_logical(old->bytenr, fs_info,
> +                                                 path, record_one_backref,
> +                                                 old);
> +               BUG_ON(ret < 0 && ret != -ENOENT);
> +
> +               /* no backref to be processed for this extent */
> +               if (!old->count) {
> +                       list_del(&old->list);
> +                       kfree(old);
> +               }
> +       }
> +
> +       if (list_empty(&new->head))
> +               return false;
> +
> +       return true;
> +}
> +
> +/*
> + * Note the backref might has changed, and in this case we just return 0.
> + */
> +static noinline int relink_extent_backref(struct btrfs_path *path,
> +                                struct sa_defrag_extent_backref *prev,
> +                                struct sa_defrag_extent_backref *backref)
> +{
> +       struct btrfs_file_extent_item *extent;
> +       struct btrfs_file_extent_item *item;
> +       struct btrfs_ordered_extent *ordered;
> +       struct btrfs_trans_handle *trans;
> +       struct btrfs_fs_info *fs_info;
> +       struct btrfs_root *root;
> +       struct btrfs_key key;
> +       struct extent_buffer *leaf;
> +       struct old_sa_defrag_extent *old = backref->old;
> +       struct new_sa_defrag_extent *new = old->new;
> +       struct inode *src_inode = new->inode;
> +       struct inode *inode;
> +       struct extent_state *cached = NULL;
> +       int ret = 0;
> +       u64 start;
> +       u64 len;
> +       u64 lock_start;
> +       u64 lock_end;
> +       bool merge = false;
> +
> +       if (prev && prev->root_id == backref->root_id &&
> +           prev->inum == backref->inum &&
> +           prev->file_pos + prev->num_bytes == backref->file_pos)
> +               merge = true;
> +
> +       key.objectid = backref->root_id;
> +       key.type = BTRFS_ROOT_ITEM_KEY;
> +       key.offset = (u64)-1;
> +
> +       fs_info = BTRFS_I(src_inode)->root->fs_info;
> +       root = btrfs_read_fs_root_no_name(fs_info, &key);
> +       if (IS_ERR(root)) {
> +               if (PTR_ERR(root) == -ENOENT)
> +                       return 0;
> +               return PTR_ERR(root);
> +       }
> +
> +       key.objectid = backref->inum;
> +       key.type = BTRFS_INODE_ITEM_KEY;
> +       key.offset = 0;
> +
> +       inode = btrfs_iget(fs_info->sb, &key, root, NULL);
> +       if (IS_ERR_OR_NULL(inode) || is_bad_inode(inode)) {
> +               if (inode && !IS_ERR(inode))
> +                       iput(inode);
> +               return 0;
> +       }
> +
> +       lock_start = backref->file_pos;
> +       lock_end = backref->file_pos + backref->num_bytes - 1;
> +       lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
> +                        0, &cached);
> +
> +       ordered = btrfs_lookup_first_ordered_extent(inode, lock_end);
> +       if (ordered) {
> +               btrfs_put_ordered_extent(ordered);
> +               goto out_unlock;
> +       }
> +
> +       trans = btrfs_join_transaction(root);
> +       if (IS_ERR(trans)) {
> +               ret = PTR_ERR(trans);
> +               goto out_unlock;
> +       }
> +
> +       key.objectid = backref->inum;
> +       key.type = BTRFS_EXTENT_DATA_KEY;
> +       key.offset = backref->file_pos;
> +
> +       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
> +       if (ret < 0) {
> +               goto out_free_path;
> +       } else if (ret > 0) {
> +               ret = 0;
> +               goto out_free_path;
> +       }
> +
> +       extent = btrfs_item_ptr(path->nodes[0], path->slots[0],
> +                               struct btrfs_file_extent_item);
> +
> +       if (btrfs_file_extent_generation(path->nodes[0], extent) !=
> +           backref->generation)
> +               goto out_free_path;
> +
> +       btrfs_release_path(path);
> +
> +       start = backref->file_pos;
> +       if (backref->extent_offset < old->extent_offset + old->offset)
> +               start += old->extent_offset + old->offset -
> +                        backref->extent_offset;
> +
> +       len = min(backref->extent_offset + backref->num_bytes,
> +                 old->extent_offset + old->offset + old->len);
> +       len -= max(backref->extent_offset, old->extent_offset + old->offset);
> +
> +       ret = btrfs_drop_extents(trans, root, inode, start,
> +                                start + len, 1);
> +       if (ret)
> +               goto out_free_path;
> +again:
> +       key.objectid = btrfs_ino(inode);
> +       key.type = BTRFS_EXTENT_DATA_KEY;
> +       key.offset = start;
> +
> +       if (merge) {
> +               struct btrfs_file_extent_item *fi;
> +               u64 extent_len;
> +               struct btrfs_key found_key;
> +
> +               ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
> +               if (ret < 0)
> +                       goto out_free_path;
> +
> +               path->slots[0]--;
> +               leaf = path->nodes[0];
> +               btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
> +
> +               fi = btrfs_item_ptr(leaf, path->slots[0],
> +                                   struct btrfs_file_extent_item);
> +               extent_len = btrfs_file_extent_num_bytes(leaf, fi);
> +
> +               if (btrfs_file_extent_disk_bytenr(leaf, fi) == new->bytenr &&
> +                   btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_REG &&
> +                   !btrfs_file_extent_compression(leaf, fi) &&
> +                   !btrfs_file_extent_encryption(leaf, fi) &&
> +                   !btrfs_file_extent_other_encoding(leaf, fi) &&
> +                   extent_len + found_key.offset == start) {
> +                       btrfs_set_file_extent_num_bytes(leaf, fi,
> +                                                       extent_len + len);
> +                       btrfs_mark_buffer_dirty(leaf);
> +                       inode_add_bytes(inode, len);
> +
> +                       ret = 1;
> +                       goto out_free_path;
> +               } else {
> +                       merge = false;
> +                       btrfs_release_path(path);
> +                       goto again;
> +               }
> +       }
> +
> +       ret = btrfs_insert_empty_item(trans, root, path, &key,
> +                                       sizeof(*extent));
> +       if (ret) {
> +               btrfs_abort_transaction(trans, root, ret);
> +               goto out_free_path;
> +       }
> +
> +       leaf = path->nodes[0];
> +       item = btrfs_item_ptr(leaf, path->slots[0],
> +                               struct btrfs_file_extent_item);
> +       btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr);
> +       btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len);
> +       btrfs_set_file_extent_offset(leaf, item, start - new->file_pos);
> +       btrfs_set_file_extent_num_bytes(leaf, item, len);
> +       btrfs_set_file_extent_ram_bytes(leaf, item, new->len);
> +       btrfs_set_file_extent_generation(leaf, item, trans->transid);
> +       btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
> +       btrfs_set_file_extent_compression(leaf, item, new->compress_type);
> +       btrfs_set_file_extent_encryption(leaf, item, 0);
> +       btrfs_set_file_extent_other_encoding(leaf, item, 0);
> +
> +       btrfs_mark_buffer_dirty(leaf);
> +       inode_add_bytes(inode, len);
> +
> +       ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
> +                       new->disk_len, 0,
> +                       backref->root_id, backref->inum,
> +                       new->file_pos, 0);      /* start - extent_offset */
> +       if (ret) {
> +               btrfs_abort_transaction(trans, root, ret);
> +               goto out_free_path;
> +       }
> +
> +       ret = 1;
> +out_free_path:
> +       btrfs_release_path(path);
> +       btrfs_end_transaction(trans, root);
> +out_unlock:
> +       unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
> +                            &cached, GFP_NOFS);
> +       iput(inode);
> +       return ret;
> +}
> +
> +static void relink_file_extents(struct new_sa_defrag_extent *new)
> +{
> +       struct btrfs_path *path;
> +       struct old_sa_defrag_extent *old, *tmp;
> +       struct sa_defrag_extent_backref *backref;
> +       struct sa_defrag_extent_backref *prev = NULL;
> +       struct inode *inode;
> +       struct btrfs_root *root;
> +       struct rb_node *node;
> +       int ret;
> +
> +       inode = new->inode;
> +       root = BTRFS_I(inode)->root;
> +
> +       path = btrfs_alloc_path();
> +       if (!path)
> +               return;
> +
> +       if (!record_extent_backrefs(path, new)) {
> +               btrfs_free_path(path);
> +               goto out;
> +       }
> +       btrfs_release_path(path);
> +
> +       while (1) {
> +               node = rb_first(&new->root);
> +               if (!node)
> +                       break;
> +               rb_erase(node, &new->root);
> +
> +               backref = rb_entry(node, struct sa_defrag_extent_backref, node);
> +
> +               ret = relink_extent_backref(path, prev, backref);
> +               WARN_ON(ret < 0);
> +
> +               kfree(prev);
> +
> +               if (ret == 1)
> +                       prev = backref;
> +               else
> +                       prev = NULL;
> +               cond_resched();
> +       }
> +       kfree(prev);
> +
> +       btrfs_free_path(path);
> +
> +       list_for_each_entry_safe(old, tmp, &new->head, list) {
> +               list_del(&old->list);
> +               kfree(old);
> +       }
> +out:
> +       atomic_dec(&root->fs_info->defrag_running);
> +       wake_up(&root->fs_info->transaction_wait);
> +
> +       kfree(new);
> +}
> +
> +static struct new_sa_defrag_extent *
> +record_old_file_extents(struct inode *inode,
> +                       struct btrfs_ordered_extent *ordered)
> +{
> +       struct btrfs_root *root = BTRFS_I(inode)->root;
> +       struct btrfs_path *path;
> +       struct btrfs_key key;
> +       struct old_sa_defrag_extent *old, *tmp;
> +       struct new_sa_defrag_extent *new;
> +       int ret;
> +
> +       new = kmalloc(sizeof(*new), GFP_NOFS);
> +       if (!new)
> +               return NULL;
> +
> +       new->inode = inode;
> +       new->file_pos = ordered->file_offset;
> +       new->len = ordered->len;
> +       new->bytenr = ordered->start;
> +       new->disk_len = ordered->disk_len;
> +       new->compress_type = ordered->compress_type;
> +       new->root = RB_ROOT;
> +       INIT_LIST_HEAD(&new->head);
> +
> +       path = btrfs_alloc_path();
> +       if (!path)
> +               goto out_kfree;
> +
> +       key.objectid = btrfs_ino(inode);
> +       key.type = BTRFS_EXTENT_DATA_KEY;
> +       key.offset = new->file_pos;
> +
> +       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
> +       if (ret < 0)
> +               goto out_free_path;
> +       if (ret > 0 && path->slots[0] > 0)
> +               path->slots[0]--;
> +
> +       /* find out all the old extents for the file range */
> +       while (1) {
> +               struct btrfs_file_extent_item *extent;
> +               struct extent_buffer *l;
> +               int slot;
> +               u64 num_bytes;
> +               u64 offset;
> +               u64 end;
> +
> +               l = path->nodes[0];
> +               slot = path->slots[0];
> +
> +               if (slot >= btrfs_header_nritems(l)) {
> +                       ret = btrfs_next_leaf(root, path);
> +                       if (ret < 0)
> +                               goto out_free_list;
> +                       else if (ret > 0)
> +                               break;
> +                       continue;
> +               }
> +
> +               btrfs_item_key_to_cpu(l, &key, slot);
> +
> +               if (key.objectid != btrfs_ino(inode))
> +                       break;
> +               if (key.type != BTRFS_EXTENT_DATA_KEY)
> +                       break;
> +               if (key.offset >= new->file_pos + new->len)
> +                       break;
> +
> +               extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item);
> +
> +               num_bytes = btrfs_file_extent_num_bytes(l, extent);
> +               if (key.offset + num_bytes < new->file_pos)
> +                       goto next;
> +
> +               old = kmalloc(sizeof(*old), GFP_NOFS);
> +               if (!old)
> +                       goto out_free_list;
> +
> +               offset = max(new->file_pos, key.offset);
> +               end = min(new->file_pos + new->len, key.offset + num_bytes);
> +
> +               old->bytenr = btrfs_file_extent_disk_bytenr(l, extent);
> +               BUG_ON(!old->bytenr);
> +               old->extent_offset = btrfs_file_extent_offset(l, extent);
> +               old->offset = offset - key.offset;
> +               old->len = end - offset;
> +               old->new = new;
> +               old->count = 0;
> +               list_add_tail(&old->list, &new->head);
> +next:
> +               path->slots[0]++;
> +               cond_resched();
> +       }
> +
> +       btrfs_free_path(path);
> +       atomic_inc(&root->fs_info->defrag_running);
> +
> +       return new;
> +
> +out_free_list:
> +       list_for_each_entry_safe(old, tmp, &new->head, list) {
> +               list_del(&old->list);
> +               kfree(old);
> +       }
> +out_free_path:
> +       btrfs_free_path(path);
> +out_kfree:
> +       kfree(new);
> +       return NULL;
> +}
> +
>  /*
>   * helper function for btrfs_finish_ordered_io, this
>   * just reads in some of the csum leaves to prime them into ram
> @@ -1856,6 +2451,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
>         struct btrfs_trans_handle *trans = NULL;
>         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
>         struct extent_state *cached_state = NULL;
> +       struct new_sa_defrag_extent *new = NULL;
>         int compress_type = 0;
>         int ret;
>         bool nolock;
> @@ -1892,6 +2488,15 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
>                          ordered_extent->file_offset + ordered_extent->len - 1,
>                          0, &cached_state);
>
> +       ret = test_range_bit(io_tree, ordered_extent->file_offset,
> +                       ordered_extent->file_offset + ordered_extent->len - 1,
> +                       EXTENT_DEFRAG, 1, cached_state);
> +       if (ret && btrfs_root_last_snapshot(&root->root_item) >=
> +                                               BTRFS_I(inode)->generation) {
> +               /* the inode is shared */
> +               new = record_old_file_extents(inode, ordered_extent);
> +       }
> +
>         if (nolock)
>                 trans = btrfs_join_transaction_nolock(root);
>         else
> @@ -1965,6 +2570,10 @@ out:
>          */
>         btrfs_remove_ordered_extent(inode, ordered_extent);
>
> +       /* for snapshot-aware defrag */
> +       if (new)
> +               relink_file_extents(new);
> +
>         /* once for us */
>         btrfs_put_ordered_extent(ordered_extent);
>         /* once for the tree */
> --
> 1.7.7.6
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2 v4] Btrfs: snapshot-aware defrag
  2012-10-31 12:13 ` Itaru Kitayama
@ 2012-10-31 12:55   ` Liu Bo
  2012-11-01 11:08     ` Itaru Kitayama
  0 siblings, 1 reply; 20+ messages in thread
From: Liu Bo @ 2012-10-31 12:55 UTC (permalink / raw)
  To: Itaru Kitayama; +Cc: linux-btrfs

On 10/31/2012 08:13 PM, Itaru Kitayama wrote:
> Hi LiuBo:
> 
> I am seeing another warning with your patch applied btrfs-next.
> 

Hi Itaru,

Thanks for testing, you seems to be using an old version, since in the new version
record_extent_backrefs() does not own a WARN_ON().

Could you please test it again with the new patches applied?

thanks,
liubo


> [ 5224.531560] ------------[ cut here ]------------
> [ 5224.531565] WARNING: at fs/btrfs/inode.c:2054
> record_extent_backrefs+0x87/0xe0()
> [ 5224.531567] Hardware name: Bochs
> [ 5224.531568] Modules linked in: microcode ppdev psmouse nfsd nfs_acl
> auth_rpcgss serio_raw nfs fscache lockd binfmt_misc sunrpc cirrus
> parport_pc ttm drm_kms_helper drm sysimgblt i2c_piix4 sysfillrect
> syscopyarea i2c_core lp parport floppy
> [ 5224.531591] Pid: 2485, comm: btrfs-endio-wri Tainted: G        W
> 3.7.0-rc1-v11+ #53
> [ 5224.531592] Call Trace:
> [ 5224.531598]  [<ffffffff81061c63>] warn_slowpath_common+0x93/0xc0
> [ 5224.531600]  [<ffffffff81061caa>] warn_slowpath_null+0x1a/0x20
> [ 5224.531603]  [<ffffffff81322287>] record_extent_backrefs+0x87/0xe0
> [ 5224.531606]  [<ffffffff8132d10b>] btrfs_finish_ordered_io+0x8bb/0xa80
> [ 5224.531611]  [<ffffffff810ce300>] ? trace_hardirqs_off_caller+0xb0/0x140
> [ 5224.531614]  [<ffffffff8132d2e5>] finish_ordered_fn+0x15/0x20
> [ 5224.531617]  [<ffffffff8134beb7>] worker_loop+0x157/0x580
> [ 5224.531620]  [<ffffffff8134bd60>] ? btrfs_queue_worker+0x2f0/0x2f0
> [ 5224.531624]  [<ffffffff81090aa8>] kthread+0xe8/0xf0
> [ 5224.531627]  [<ffffffff810ce3c2>] ? get_lock_stats+0x22/0x70
> [ 5224.531630]  [<ffffffff810909c0>] ? kthread_create_on_node+0x160/0x160
> [ 5224.531634]  [<ffffffff817c1c6c>] ret_from_fork+0x7c/0xb0
> [ 5224.531636]  [<ffffffff810909c0>] ? kthread_create_on_node+0x160/0x160
> [ 5224.531638] ---[ end trace 0256d2b5a195208c ]---
> 
> I've compared some of the old extents logical addresses with the corresponding
> object ids and offsets from the extent tree; some are just 8k off from
> the found extents
> and some keys are totally off.
> 
> Itaru
> 
> On Sat, Oct 27, 2012 at 7:28 PM, Liu Bo <bo.li.liu@oracle.com> wrote:
>> This comes from one of btrfs's project ideas,
>> As we defragment files, we break any sharing from other snapshots.
>> The balancing code will preserve the sharing, and defrag needs to grow this
>> as well.
>>
>> Now we're able to fill the blank with this patch, in which we make full use of
>> backref walking stuff.
>>
>> Here is the basic idea,
>> o  set the writeback ranges started by defragment with flag EXTENT_DEFRAG
>> o  at endio, after we finish updating fs tree, we use backref walking to find
>>    all parents of the ranges and re-link them with the new COWed file layout by
>>    adding corresponding backrefs.
>>
>> Originally patch by Li Zefan <lizf@cn.fujitsu.com>
>> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
>> ---
>> v3->v4:
>>       - fix duplicated refs bugs detected by mounting with autodefrag, thanks
>>         for the bug report from Mitch and Chris.
>>
>>  fs/btrfs/inode.c |  609 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 files changed, 609 insertions(+), 0 deletions(-)
>>
>> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
>> index 85a1e50..35e6993 100644
>> --- a/fs/btrfs/inode.c
>> +++ b/fs/btrfs/inode.c
>> @@ -54,6 +54,7 @@
>>  #include "locking.h"
>>  #include "free-space-cache.h"
>>  #include "inode-map.h"
>> +#include "backref.h"
>>
>>  struct btrfs_iget_args {
>>         u64 ino;
>> @@ -1839,6 +1840,600 @@ out:
>>         return ret;
>>  }
>>
>> +/* snapshot-aware defrag */
>> +struct sa_defrag_extent_backref {
>> +       struct rb_node node;
>> +       struct old_sa_defrag_extent *old;
>> +       u64 root_id;
>> +       u64 inum;
>> +       u64 file_pos;
>> +       u64 extent_offset;
>> +       u64 num_bytes;
>> +       u64 generation;
>> +};
>> +
>> +struct old_sa_defrag_extent {
>> +       struct list_head list;
>> +       struct new_sa_defrag_extent *new;
>> +
>> +       u64 extent_offset;
>> +       u64 bytenr;
>> +       u64 offset;
>> +       u64 len;
>> +       int count;
>> +};
>> +
>> +struct new_sa_defrag_extent {
>> +       struct rb_root root;
>> +       struct list_head head;
>> +       struct btrfs_path *path;
>> +       struct inode *inode;
>> +       u64 file_pos;
>> +       u64 len;
>> +       u64 bytenr;
>> +       u64 disk_len;
>> +       u8 compress_type;
>> +};
>> +
>> +static int backref_comp(struct sa_defrag_extent_backref *b1,
>> +                       struct sa_defrag_extent_backref *b2)
>> +{
>> +       if (b1->root_id < b2->root_id)
>> +               return -1;
>> +       else if (b1->root_id > b2->root_id)
>> +               return 1;
>> +
>> +       if (b1->inum < b2->inum)
>> +               return -1;
>> +       else if (b1->inum > b2->inum)
>> +               return 1;
>> +
>> +       if (b1->file_pos < b2->file_pos)
>> +               return -1;
>> +       else if (b1->file_pos > b2->file_pos)
>> +               return 1;
>> +
>> +       return 0;
>> +}
>> +
>> +static void backref_insert(struct rb_root *root,
>> +                          struct sa_defrag_extent_backref *backref)
>> +{
>> +       struct rb_node **p = &root->rb_node;
>> +       struct rb_node *parent = NULL;
>> +       struct sa_defrag_extent_backref *entry;
>> +       int ret;
>> +
>> +       while (*p) {
>> +               parent = *p;
>> +               entry = rb_entry(parent, struct sa_defrag_extent_backref, node);
>> +
>> +               ret = backref_comp(backref, entry);
>> +               if (ret < 0)
>> +                       p = &(*p)->rb_left;
>> +               else
>> +                       /*
>> +                        * Since space can be shared, so there can be
>> +                        * some backrefs(extent tree to fs/file tree)
>> +                        * whoes fs/file extents map to the same address.
>> +                        * If so, we just put it after what we've found.
>> +                        */
>> +                       p = &(*p)->rb_right;
>> +       }
>> +
>> +       rb_link_node(&backref->node, parent, p);
>> +       rb_insert_color(&backref->node, root);
>> +}
>> +
>> +/*
>> + * Note the backref might has changed, and in this case we just return 0.
>> + */
>> +static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
>> +                                      void *ctx)
>> +{
>> +       struct btrfs_file_extent_item *extent;
>> +       struct btrfs_fs_info *fs_info;
>> +       struct old_sa_defrag_extent *old = ctx;
>> +       struct new_sa_defrag_extent *new = old->new;
>> +       struct btrfs_path *path = new->path;
>> +       struct btrfs_key key;
>> +       struct btrfs_root *root;
>> +       struct sa_defrag_extent_backref *backref;
>> +       struct extent_buffer *leaf;
>> +       struct inode *inode = new->inode;
>> +       int slot;
>> +       int ret;
>> +       u64 extent_offset;
>> +       u64 num_bytes;
>> +
>> +       if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
>> +           inum == btrfs_ino(inode))
>> +               return 0;
>> +
>> +       key.objectid = root_id;
>> +       key.type = BTRFS_ROOT_ITEM_KEY;
>> +       key.offset = (u64)-1;
>> +
>> +       fs_info = BTRFS_I(inode)->root->fs_info;
>> +       root = btrfs_read_fs_root_no_name(fs_info, &key);
>> +       if (IS_ERR(root)) {
>> +               if (PTR_ERR(root) == -ENOENT)
>> +                       return 0;
>> +               WARN_ON(1);
>> +               pr_debug("inum=%llu, offset=%llu, root_id=%llu\n",
>> +                        inum, offset, root_id);
>> +               return PTR_ERR(root);
>> +       }
>> +
>> +       key.objectid = inum;
>> +       key.type = BTRFS_EXTENT_DATA_KEY;
>> +       if (offset > (u64)-1 << 32)
>> +               key.offset = 0;
>> +       else
>> +               key.offset = offset;
>> +
>> +       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
>> +       if (ret < 0) {
>> +               WARN_ON(1);
>> +               return ret;
>> +       }
>> +
>> +       while (1) {
>> +               cond_resched();
>> +
>> +               leaf = path->nodes[0];
>> +               slot = path->slots[0];
>> +
>> +               if (slot >= btrfs_header_nritems(leaf)) {
>> +                       ret = btrfs_next_leaf(root, path);
>> +                       if (ret < 0) {
>> +                               goto out;
>> +                       } else if (ret > 0) {
>> +                               ret = 0;
>> +                               goto out;
>> +                       }
>> +                       continue;
>> +               }
>> +
>> +               path->slots[0]++;
>> +
>> +               btrfs_item_key_to_cpu(leaf, &key, slot);
>> +
>> +               if (key.objectid > inum)
>> +                       goto out;
>> +
>> +               if (key.objectid < inum || key.type != BTRFS_EXTENT_DATA_KEY)
>> +                       continue;
>> +
>> +               extent = btrfs_item_ptr(leaf, slot,
>> +                                       struct btrfs_file_extent_item);
>> +
>> +               if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
>> +                       continue;
>> +
>> +               extent_offset = btrfs_file_extent_offset(leaf, extent);
>> +               if (key.offset - extent_offset != offset)
>> +                       continue;
>> +
>> +               num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
>> +               if (extent_offset >= old->extent_offset + old->offset +
>> +                   old->len || extent_offset + num_bytes <=
>> +                   old->extent_offset + old->offset)
>> +                       continue;
>> +
>> +               break;
>> +       }
>> +
>> +       backref = kmalloc(sizeof(*backref), GFP_NOFS);
>> +       if (!backref) {
>> +               ret = -ENOENT;
>> +               goto out;
>> +       }
>> +
>> +       backref->root_id = root_id;
>> +       backref->inum = inum;
>> +       backref->file_pos = offset + extent_offset;
>> +       backref->num_bytes = num_bytes;
>> +       backref->extent_offset = extent_offset;
>> +       backref->generation = btrfs_file_extent_generation(leaf, extent);
>> +       backref->old = old;
>> +       backref_insert(&new->root, backref);
>> +       old->count++;
>> +out:
>> +       btrfs_release_path(path);
>> +       WARN_ON(ret);
>> +       return ret;
>> +}
>> +
>> +static noinline bool record_extent_backrefs(struct btrfs_path *path,
>> +                                  struct new_sa_defrag_extent *new)
>> +{
>> +       struct btrfs_fs_info *fs_info = BTRFS_I(new->inode)->root->fs_info;
>> +       struct old_sa_defrag_extent *old, *tmp;
>> +       int ret;
>> +
>> +       new->path = path;
>> +
>> +       list_for_each_entry_safe(old, tmp, &new->head, list) {
>> +               ret = iterate_inodes_from_logical(old->bytenr, fs_info,
>> +                                                 path, record_one_backref,
>> +                                                 old);
>> +               BUG_ON(ret < 0 && ret != -ENOENT);
>> +
>> +               /* no backref to be processed for this extent */
>> +               if (!old->count) {
>> +                       list_del(&old->list);
>> +                       kfree(old);
>> +               }
>> +       }
>> +
>> +       if (list_empty(&new->head))
>> +               return false;
>> +
>> +       return true;
>> +}
>> +
>> +/*
>> + * Note the backref might has changed, and in this case we just return 0.
>> + */
>> +static noinline int relink_extent_backref(struct btrfs_path *path,
>> +                                struct sa_defrag_extent_backref *prev,
>> +                                struct sa_defrag_extent_backref *backref)
>> +{
>> +       struct btrfs_file_extent_item *extent;
>> +       struct btrfs_file_extent_item *item;
>> +       struct btrfs_ordered_extent *ordered;
>> +       struct btrfs_trans_handle *trans;
>> +       struct btrfs_fs_info *fs_info;
>> +       struct btrfs_root *root;
>> +       struct btrfs_key key;
>> +       struct extent_buffer *leaf;
>> +       struct old_sa_defrag_extent *old = backref->old;
>> +       struct new_sa_defrag_extent *new = old->new;
>> +       struct inode *src_inode = new->inode;
>> +       struct inode *inode;
>> +       struct extent_state *cached = NULL;
>> +       int ret = 0;
>> +       u64 start;
>> +       u64 len;
>> +       u64 lock_start;
>> +       u64 lock_end;
>> +       bool merge = false;
>> +
>> +       if (prev && prev->root_id == backref->root_id &&
>> +           prev->inum == backref->inum &&
>> +           prev->file_pos + prev->num_bytes == backref->file_pos)
>> +               merge = true;
>> +
>> +       key.objectid = backref->root_id;
>> +       key.type = BTRFS_ROOT_ITEM_KEY;
>> +       key.offset = (u64)-1;
>> +
>> +       fs_info = BTRFS_I(src_inode)->root->fs_info;
>> +       root = btrfs_read_fs_root_no_name(fs_info, &key);
>> +       if (IS_ERR(root)) {
>> +               if (PTR_ERR(root) == -ENOENT)
>> +                       return 0;
>> +               return PTR_ERR(root);
>> +       }
>> +
>> +       key.objectid = backref->inum;
>> +       key.type = BTRFS_INODE_ITEM_KEY;
>> +       key.offset = 0;
>> +
>> +       inode = btrfs_iget(fs_info->sb, &key, root, NULL);
>> +       if (IS_ERR_OR_NULL(inode) || is_bad_inode(inode)) {
>> +               if (inode && !IS_ERR(inode))
>> +                       iput(inode);
>> +               return 0;
>> +       }
>> +
>> +       lock_start = backref->file_pos;
>> +       lock_end = backref->file_pos + backref->num_bytes - 1;
>> +       lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
>> +                        0, &cached);
>> +
>> +       ordered = btrfs_lookup_first_ordered_extent(inode, lock_end);
>> +       if (ordered) {
>> +               btrfs_put_ordered_extent(ordered);
>> +               goto out_unlock;
>> +       }
>> +
>> +       trans = btrfs_join_transaction(root);
>> +       if (IS_ERR(trans)) {
>> +               ret = PTR_ERR(trans);
>> +               goto out_unlock;
>> +       }
>> +
>> +       key.objectid = backref->inum;
>> +       key.type = BTRFS_EXTENT_DATA_KEY;
>> +       key.offset = backref->file_pos;
>> +
>> +       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
>> +       if (ret < 0) {
>> +               goto out_free_path;
>> +       } else if (ret > 0) {
>> +               ret = 0;
>> +               goto out_free_path;
>> +       }
>> +
>> +       extent = btrfs_item_ptr(path->nodes[0], path->slots[0],
>> +                               struct btrfs_file_extent_item);
>> +
>> +       if (btrfs_file_extent_generation(path->nodes[0], extent) !=
>> +           backref->generation)
>> +               goto out_free_path;
>> +
>> +       btrfs_release_path(path);
>> +
>> +       start = backref->file_pos;
>> +       if (backref->extent_offset < old->extent_offset + old->offset)
>> +               start += old->extent_offset + old->offset -
>> +                        backref->extent_offset;
>> +
>> +       len = min(backref->extent_offset + backref->num_bytes,
>> +                 old->extent_offset + old->offset + old->len);
>> +       len -= max(backref->extent_offset, old->extent_offset + old->offset);
>> +
>> +       ret = btrfs_drop_extents(trans, root, inode, start,
>> +                                start + len, 1);
>> +       if (ret)
>> +               goto out_free_path;
>> +again:
>> +       key.objectid = btrfs_ino(inode);
>> +       key.type = BTRFS_EXTENT_DATA_KEY;
>> +       key.offset = start;
>> +
>> +       if (merge) {
>> +               struct btrfs_file_extent_item *fi;
>> +               u64 extent_len;
>> +               struct btrfs_key found_key;
>> +
>> +               ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
>> +               if (ret < 0)
>> +                       goto out_free_path;
>> +
>> +               path->slots[0]--;
>> +               leaf = path->nodes[0];
>> +               btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
>> +
>> +               fi = btrfs_item_ptr(leaf, path->slots[0],
>> +                                   struct btrfs_file_extent_item);
>> +               extent_len = btrfs_file_extent_num_bytes(leaf, fi);
>> +
>> +               if (btrfs_file_extent_disk_bytenr(leaf, fi) == new->bytenr &&
>> +                   btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_REG &&
>> +                   !btrfs_file_extent_compression(leaf, fi) &&
>> +                   !btrfs_file_extent_encryption(leaf, fi) &&
>> +                   !btrfs_file_extent_other_encoding(leaf, fi) &&
>> +                   extent_len + found_key.offset == start) {
>> +                       btrfs_set_file_extent_num_bytes(leaf, fi,
>> +                                                       extent_len + len);
>> +                       btrfs_mark_buffer_dirty(leaf);
>> +                       inode_add_bytes(inode, len);
>> +
>> +                       ret = 1;
>> +                       goto out_free_path;
>> +               } else {
>> +                       merge = false;
>> +                       btrfs_release_path(path);
>> +                       goto again;
>> +               }
>> +       }
>> +
>> +       ret = btrfs_insert_empty_item(trans, root, path, &key,
>> +                                       sizeof(*extent));
>> +       if (ret) {
>> +               btrfs_abort_transaction(trans, root, ret);
>> +               goto out_free_path;
>> +       }
>> +
>> +       leaf = path->nodes[0];
>> +       item = btrfs_item_ptr(leaf, path->slots[0],
>> +                               struct btrfs_file_extent_item);
>> +       btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr);
>> +       btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len);
>> +       btrfs_set_file_extent_offset(leaf, item, start - new->file_pos);
>> +       btrfs_set_file_extent_num_bytes(leaf, item, len);
>> +       btrfs_set_file_extent_ram_bytes(leaf, item, new->len);
>> +       btrfs_set_file_extent_generation(leaf, item, trans->transid);
>> +       btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
>> +       btrfs_set_file_extent_compression(leaf, item, new->compress_type);
>> +       btrfs_set_file_extent_encryption(leaf, item, 0);
>> +       btrfs_set_file_extent_other_encoding(leaf, item, 0);
>> +
>> +       btrfs_mark_buffer_dirty(leaf);
>> +       inode_add_bytes(inode, len);
>> +
>> +       ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
>> +                       new->disk_len, 0,
>> +                       backref->root_id, backref->inum,
>> +                       new->file_pos, 0);      /* start - extent_offset */
>> +       if (ret) {
>> +               btrfs_abort_transaction(trans, root, ret);
>> +               goto out_free_path;
>> +       }
>> +
>> +       ret = 1;
>> +out_free_path:
>> +       btrfs_release_path(path);
>> +       btrfs_end_transaction(trans, root);
>> +out_unlock:
>> +       unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
>> +                            &cached, GFP_NOFS);
>> +       iput(inode);
>> +       return ret;
>> +}
>> +
>> +static void relink_file_extents(struct new_sa_defrag_extent *new)
>> +{
>> +       struct btrfs_path *path;
>> +       struct old_sa_defrag_extent *old, *tmp;
>> +       struct sa_defrag_extent_backref *backref;
>> +       struct sa_defrag_extent_backref *prev = NULL;
>> +       struct inode *inode;
>> +       struct btrfs_root *root;
>> +       struct rb_node *node;
>> +       int ret;
>> +
>> +       inode = new->inode;
>> +       root = BTRFS_I(inode)->root;
>> +
>> +       path = btrfs_alloc_path();
>> +       if (!path)
>> +               return;
>> +
>> +       if (!record_extent_backrefs(path, new)) {
>> +               btrfs_free_path(path);
>> +               goto out;
>> +       }
>> +       btrfs_release_path(path);
>> +
>> +       while (1) {
>> +               node = rb_first(&new->root);
>> +               if (!node)
>> +                       break;
>> +               rb_erase(node, &new->root);
>> +
>> +               backref = rb_entry(node, struct sa_defrag_extent_backref, node);
>> +
>> +               ret = relink_extent_backref(path, prev, backref);
>> +               WARN_ON(ret < 0);
>> +
>> +               kfree(prev);
>> +
>> +               if (ret == 1)
>> +                       prev = backref;
>> +               else
>> +                       prev = NULL;
>> +               cond_resched();
>> +       }
>> +       kfree(prev);
>> +
>> +       btrfs_free_path(path);
>> +
>> +       list_for_each_entry_safe(old, tmp, &new->head, list) {
>> +               list_del(&old->list);
>> +               kfree(old);
>> +       }
>> +out:
>> +       atomic_dec(&root->fs_info->defrag_running);
>> +       wake_up(&root->fs_info->transaction_wait);
>> +
>> +       kfree(new);
>> +}
>> +
>> +static struct new_sa_defrag_extent *
>> +record_old_file_extents(struct inode *inode,
>> +                       struct btrfs_ordered_extent *ordered)
>> +{
>> +       struct btrfs_root *root = BTRFS_I(inode)->root;
>> +       struct btrfs_path *path;
>> +       struct btrfs_key key;
>> +       struct old_sa_defrag_extent *old, *tmp;
>> +       struct new_sa_defrag_extent *new;
>> +       int ret;
>> +
>> +       new = kmalloc(sizeof(*new), GFP_NOFS);
>> +       if (!new)
>> +               return NULL;
>> +
>> +       new->inode = inode;
>> +       new->file_pos = ordered->file_offset;
>> +       new->len = ordered->len;
>> +       new->bytenr = ordered->start;
>> +       new->disk_len = ordered->disk_len;
>> +       new->compress_type = ordered->compress_type;
>> +       new->root = RB_ROOT;
>> +       INIT_LIST_HEAD(&new->head);
>> +
>> +       path = btrfs_alloc_path();
>> +       if (!path)
>> +               goto out_kfree;
>> +
>> +       key.objectid = btrfs_ino(inode);
>> +       key.type = BTRFS_EXTENT_DATA_KEY;
>> +       key.offset = new->file_pos;
>> +
>> +       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
>> +       if (ret < 0)
>> +               goto out_free_path;
>> +       if (ret > 0 && path->slots[0] > 0)
>> +               path->slots[0]--;
>> +
>> +       /* find out all the old extents for the file range */
>> +       while (1) {
>> +               struct btrfs_file_extent_item *extent;
>> +               struct extent_buffer *l;
>> +               int slot;
>> +               u64 num_bytes;
>> +               u64 offset;
>> +               u64 end;
>> +
>> +               l = path->nodes[0];
>> +               slot = path->slots[0];
>> +
>> +               if (slot >= btrfs_header_nritems(l)) {
>> +                       ret = btrfs_next_leaf(root, path);
>> +                       if (ret < 0)
>> +                               goto out_free_list;
>> +                       else if (ret > 0)
>> +                               break;
>> +                       continue;
>> +               }
>> +
>> +               btrfs_item_key_to_cpu(l, &key, slot);
>> +
>> +               if (key.objectid != btrfs_ino(inode))
>> +                       break;
>> +               if (key.type != BTRFS_EXTENT_DATA_KEY)
>> +                       break;
>> +               if (key.offset >= new->file_pos + new->len)
>> +                       break;
>> +
>> +               extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item);
>> +
>> +               num_bytes = btrfs_file_extent_num_bytes(l, extent);
>> +               if (key.offset + num_bytes < new->file_pos)
>> +                       goto next;
>> +
>> +               old = kmalloc(sizeof(*old), GFP_NOFS);
>> +               if (!old)
>> +                       goto out_free_list;
>> +
>> +               offset = max(new->file_pos, key.offset);
>> +               end = min(new->file_pos + new->len, key.offset + num_bytes);
>> +
>> +               old->bytenr = btrfs_file_extent_disk_bytenr(l, extent);
>> +               BUG_ON(!old->bytenr);
>> +               old->extent_offset = btrfs_file_extent_offset(l, extent);
>> +               old->offset = offset - key.offset;
>> +               old->len = end - offset;
>> +               old->new = new;
>> +               old->count = 0;
>> +               list_add_tail(&old->list, &new->head);
>> +next:
>> +               path->slots[0]++;
>> +               cond_resched();
>> +       }
>> +
>> +       btrfs_free_path(path);
>> +       atomic_inc(&root->fs_info->defrag_running);
>> +
>> +       return new;
>> +
>> +out_free_list:
>> +       list_for_each_entry_safe(old, tmp, &new->head, list) {
>> +               list_del(&old->list);
>> +               kfree(old);
>> +       }
>> +out_free_path:
>> +       btrfs_free_path(path);
>> +out_kfree:
>> +       kfree(new);
>> +       return NULL;
>> +}
>> +
>>  /*
>>   * helper function for btrfs_finish_ordered_io, this
>>   * just reads in some of the csum leaves to prime them into ram
>> @@ -1856,6 +2451,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
>>         struct btrfs_trans_handle *trans = NULL;
>>         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
>>         struct extent_state *cached_state = NULL;
>> +       struct new_sa_defrag_extent *new = NULL;
>>         int compress_type = 0;
>>         int ret;
>>         bool nolock;
>> @@ -1892,6 +2488,15 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
>>                          ordered_extent->file_offset + ordered_extent->len - 1,
>>                          0, &cached_state);
>>
>> +       ret = test_range_bit(io_tree, ordered_extent->file_offset,
>> +                       ordered_extent->file_offset + ordered_extent->len - 1,
>> +                       EXTENT_DEFRAG, 1, cached_state);
>> +       if (ret && btrfs_root_last_snapshot(&root->root_item) >=
>> +                                               BTRFS_I(inode)->generation) {
>> +               /* the inode is shared */
>> +               new = record_old_file_extents(inode, ordered_extent);
>> +       }
>> +
>>         if (nolock)
>>                 trans = btrfs_join_transaction_nolock(root);
>>         else
>> @@ -1965,6 +2570,10 @@ out:
>>          */
>>         btrfs_remove_ordered_extent(inode, ordered_extent);
>>
>> +       /* for snapshot-aware defrag */
>> +       if (new)
>> +               relink_file_extents(new);
>> +
>>         /* once for us */
>>         btrfs_put_ordered_extent(ordered_extent);
>>         /* once for the tree */
>> --
>> 1.7.7.6
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Btrfs: make snapshot-aware defrag as a mount option
  2012-10-31  0:44       ` David Sterba
@ 2012-10-31 13:31         ` Liu Bo
  0 siblings, 0 replies; 20+ messages in thread
From: Liu Bo @ 2012-10-31 13:31 UTC (permalink / raw)
  To: David Sterba, linux-btrfs

On 10/31/2012 08:44 AM, David Sterba wrote:
> On Wed, Oct 31, 2012 at 08:34:38AM +0800, Liu Bo wrote:
>> Besides 'btrfs fi defrag', mounting with autodefrag may also do the same thing.
> 
> Ok, autodefrag, good point. Then I suggest to make the snapshot-aware a
> mode of autodefrag, not a separate option (because it would make no
> sense other than an alias for "autodefrag=snapshotaware")
> 

Hmm, you might be right.

But I have to say 'snapshot-aware defrag' is kind of trade-off.

1. The good case:
Say a file is full of fragments and make a snapshot based on file's root

        fs root, snapshot
       /        |        \
      /         |         \
| - - | ... | - - - | ... | - - - |
   p1           p2            p3

then we do a snapshot-aware defrag, it will be

fs root      snapshot
   \          /
    \        /
| - - - - - - - - |
 a whole new extent

We achieve the goal!


2. The bad case:
Say we have a file with an whole extent and a snapshot on it at the very first:

fs root      snapshot
   \          /
    \        /
| - - - - - - - - |

then, we write into part of the file, with COW it will be:

         
| - - || - - - || - - - |  ...  | - - - |
   p1      p2       p3            p2_new

(file in snapshot -> p1 + p2 + p3)
(file in fs root -> p1 + p2_new + p3)

then, we do a snapshot-aware defrag, it will be

| - - || - - - || - - - |  ...  | - - - |
   p1      p2      p3            p2_new

                  ||
                  VV

| - - |           | - - - |  ...            ... | - - | - - - | - - - |
   p1                 p3                             new extent

(file in snapshot -> p1 + p3 + middle of new extent)
(file in fs root -> new extent)

So we're making file in snapshot worse than before, although we get a good one for file in fs root.


thanks,
liubo

> 
> david
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2 v4] Btrfs: snapshot-aware defrag
  2012-10-31 12:55   ` Liu Bo
@ 2012-11-01 11:08     ` Itaru Kitayama
  2012-11-01 11:21       ` Liu Bo
  0 siblings, 1 reply; 20+ messages in thread
From: Itaru Kitayama @ 2012-11-01 11:08 UTC (permalink / raw)
  To: Liu Bo; +Cc: linux-btrfs

Hi Liubo,

I couldn't apply your V4 patch against the btrfs-next HEAD. Do you have
a github branch which I can checkout?

Thanks,

Itaru

On Wed, Oct 31, 2012 at 9:55 PM, Liu Bo <bo.li.liu@oracle.com> wrote:
> On 10/31/2012 08:13 PM, Itaru Kitayama wrote:
>> Hi LiuBo:
>>
>> I am seeing another warning with your patch applied btrfs-next.
>>
>
> Hi Itaru,
>
> Thanks for testing, you seems to be using an old version, since in the new version
> record_extent_backrefs() does not own a WARN_ON().
>
> Could you please test it again with the new patches applied?
>
> thanks,
> liubo
>
>
>> [ 5224.531560] ------------[ cut here ]------------
>> [ 5224.531565] WARNING: at fs/btrfs/inode.c:2054
>> record_extent_backrefs+0x87/0xe0()
>> [ 5224.531567] Hardware name: Bochs
>> [ 5224.531568] Modules linked in: microcode ppdev psmouse nfsd nfs_acl
>> auth_rpcgss serio_raw nfs fscache lockd binfmt_misc sunrpc cirrus
>> parport_pc ttm drm_kms_helper drm sysimgblt i2c_piix4 sysfillrect
>> syscopyarea i2c_core lp parport floppy
>> [ 5224.531591] Pid: 2485, comm: btrfs-endio-wri Tainted: G        W
>> 3.7.0-rc1-v11+ #53
>> [ 5224.531592] Call Trace:
>> [ 5224.531598]  [<ffffffff81061c63>] warn_slowpath_common+0x93/0xc0
>> [ 5224.531600]  [<ffffffff81061caa>] warn_slowpath_null+0x1a/0x20
>> [ 5224.531603]  [<ffffffff81322287>] record_extent_backrefs+0x87/0xe0
>> [ 5224.531606]  [<ffffffff8132d10b>] btrfs_finish_ordered_io+0x8bb/0xa80
>> [ 5224.531611]  [<ffffffff810ce300>] ? trace_hardirqs_off_caller+0xb0/0x140
>> [ 5224.531614]  [<ffffffff8132d2e5>] finish_ordered_fn+0x15/0x20
>> [ 5224.531617]  [<ffffffff8134beb7>] worker_loop+0x157/0x580
>> [ 5224.531620]  [<ffffffff8134bd60>] ? btrfs_queue_worker+0x2f0/0x2f0
>> [ 5224.531624]  [<ffffffff81090aa8>] kthread+0xe8/0xf0
>> [ 5224.531627]  [<ffffffff810ce3c2>] ? get_lock_stats+0x22/0x70
>> [ 5224.531630]  [<ffffffff810909c0>] ? kthread_create_on_node+0x160/0x160
>> [ 5224.531634]  [<ffffffff817c1c6c>] ret_from_fork+0x7c/0xb0
>> [ 5224.531636]  [<ffffffff810909c0>] ? kthread_create_on_node+0x160/0x160
>> [ 5224.531638] ---[ end trace 0256d2b5a195208c ]---
>>
>> I've compared some of the old extents logical addresses with the corresponding
>> object ids and offsets from the extent tree; some are just 8k off from
>> the found extents
>> and some keys are totally off.
>>
>> Itaru
>>
>> On Sat, Oct 27, 2012 at 7:28 PM, Liu Bo <bo.li.liu@oracle.com> wrote:
>>> This comes from one of btrfs's project ideas,
>>> As we defragment files, we break any sharing from other snapshots.
>>> The balancing code will preserve the sharing, and defrag needs to grow this
>>> as well.
>>>
>>> Now we're able to fill the blank with this patch, in which we make full use of
>>> backref walking stuff.
>>>
>>> Here is the basic idea,
>>> o  set the writeback ranges started by defragment with flag EXTENT_DEFRAG
>>> o  at endio, after we finish updating fs tree, we use backref walking to find
>>>    all parents of the ranges and re-link them with the new COWed file layout by
>>>    adding corresponding backrefs.
>>>
>>> Originally patch by Li Zefan <lizf@cn.fujitsu.com>
>>> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
>>> ---
>>> v3->v4:
>>>       - fix duplicated refs bugs detected by mounting with autodefrag, thanks
>>>         for the bug report from Mitch and Chris.
>>>
>>>  fs/btrfs/inode.c |  609 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>  1 files changed, 609 insertions(+), 0 deletions(-)
>>>
>>> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
>>> index 85a1e50..35e6993 100644
>>> --- a/fs/btrfs/inode.c
>>> +++ b/fs/btrfs/inode.c
>>> @@ -54,6 +54,7 @@
>>>  #include "locking.h"
>>>  #include "free-space-cache.h"
>>>  #include "inode-map.h"
>>> +#include "backref.h"
>>>
>>>  struct btrfs_iget_args {
>>>         u64 ino;
>>> @@ -1839,6 +1840,600 @@ out:
>>>         return ret;
>>>  }
>>>
>>> +/* snapshot-aware defrag */
>>> +struct sa_defrag_extent_backref {
>>> +       struct rb_node node;
>>> +       struct old_sa_defrag_extent *old;
>>> +       u64 root_id;
>>> +       u64 inum;
>>> +       u64 file_pos;
>>> +       u64 extent_offset;
>>> +       u64 num_bytes;
>>> +       u64 generation;
>>> +};
>>> +
>>> +struct old_sa_defrag_extent {
>>> +       struct list_head list;
>>> +       struct new_sa_defrag_extent *new;
>>> +
>>> +       u64 extent_offset;
>>> +       u64 bytenr;
>>> +       u64 offset;
>>> +       u64 len;
>>> +       int count;
>>> +};
>>> +
>>> +struct new_sa_defrag_extent {
>>> +       struct rb_root root;
>>> +       struct list_head head;
>>> +       struct btrfs_path *path;
>>> +       struct inode *inode;
>>> +       u64 file_pos;
>>> +       u64 len;
>>> +       u64 bytenr;
>>> +       u64 disk_len;
>>> +       u8 compress_type;
>>> +};
>>> +
>>> +static int backref_comp(struct sa_defrag_extent_backref *b1,
>>> +                       struct sa_defrag_extent_backref *b2)
>>> +{
>>> +       if (b1->root_id < b2->root_id)
>>> +               return -1;
>>> +       else if (b1->root_id > b2->root_id)
>>> +               return 1;
>>> +
>>> +       if (b1->inum < b2->inum)
>>> +               return -1;
>>> +       else if (b1->inum > b2->inum)
>>> +               return 1;
>>> +
>>> +       if (b1->file_pos < b2->file_pos)
>>> +               return -1;
>>> +       else if (b1->file_pos > b2->file_pos)
>>> +               return 1;
>>> +
>>> +       return 0;
>>> +}
>>> +
>>> +static void backref_insert(struct rb_root *root,
>>> +                          struct sa_defrag_extent_backref *backref)
>>> +{
>>> +       struct rb_node **p = &root->rb_node;
>>> +       struct rb_node *parent = NULL;
>>> +       struct sa_defrag_extent_backref *entry;
>>> +       int ret;
>>> +
>>> +       while (*p) {
>>> +               parent = *p;
>>> +               entry = rb_entry(parent, struct sa_defrag_extent_backref, node);
>>> +
>>> +               ret = backref_comp(backref, entry);
>>> +               if (ret < 0)
>>> +                       p = &(*p)->rb_left;
>>> +               else
>>> +                       /*
>>> +                        * Since space can be shared, so there can be
>>> +                        * some backrefs(extent tree to fs/file tree)
>>> +                        * whoes fs/file extents map to the same address.
>>> +                        * If so, we just put it after what we've found.
>>> +                        */
>>> +                       p = &(*p)->rb_right;
>>> +       }
>>> +
>>> +       rb_link_node(&backref->node, parent, p);
>>> +       rb_insert_color(&backref->node, root);
>>> +}
>>> +
>>> +/*
>>> + * Note the backref might has changed, and in this case we just return 0.
>>> + */
>>> +static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
>>> +                                      void *ctx)
>>> +{
>>> +       struct btrfs_file_extent_item *extent;
>>> +       struct btrfs_fs_info *fs_info;
>>> +       struct old_sa_defrag_extent *old = ctx;
>>> +       struct new_sa_defrag_extent *new = old->new;
>>> +       struct btrfs_path *path = new->path;
>>> +       struct btrfs_key key;
>>> +       struct btrfs_root *root;
>>> +       struct sa_defrag_extent_backref *backref;
>>> +       struct extent_buffer *leaf;
>>> +       struct inode *inode = new->inode;
>>> +       int slot;
>>> +       int ret;
>>> +       u64 extent_offset;
>>> +       u64 num_bytes;
>>> +
>>> +       if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
>>> +           inum == btrfs_ino(inode))
>>> +               return 0;
>>> +
>>> +       key.objectid = root_id;
>>> +       key.type = BTRFS_ROOT_ITEM_KEY;
>>> +       key.offset = (u64)-1;
>>> +
>>> +       fs_info = BTRFS_I(inode)->root->fs_info;
>>> +       root = btrfs_read_fs_root_no_name(fs_info, &key);
>>> +       if (IS_ERR(root)) {
>>> +               if (PTR_ERR(root) == -ENOENT)
>>> +                       return 0;
>>> +               WARN_ON(1);
>>> +               pr_debug("inum=%llu, offset=%llu, root_id=%llu\n",
>>> +                        inum, offset, root_id);
>>> +               return PTR_ERR(root);
>>> +       }
>>> +
>>> +       key.objectid = inum;
>>> +       key.type = BTRFS_EXTENT_DATA_KEY;
>>> +       if (offset > (u64)-1 << 32)
>>> +               key.offset = 0;
>>> +       else
>>> +               key.offset = offset;
>>> +
>>> +       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
>>> +       if (ret < 0) {
>>> +               WARN_ON(1);
>>> +               return ret;
>>> +       }
>>> +
>>> +       while (1) {
>>> +               cond_resched();
>>> +
>>> +               leaf = path->nodes[0];
>>> +               slot = path->slots[0];
>>> +
>>> +               if (slot >= btrfs_header_nritems(leaf)) {
>>> +                       ret = btrfs_next_leaf(root, path);
>>> +                       if (ret < 0) {
>>> +                               goto out;
>>> +                       } else if (ret > 0) {
>>> +                               ret = 0;
>>> +                               goto out;
>>> +                       }
>>> +                       continue;
>>> +               }
>>> +
>>> +               path->slots[0]++;
>>> +
>>> +               btrfs_item_key_to_cpu(leaf, &key, slot);
>>> +
>>> +               if (key.objectid > inum)
>>> +                       goto out;
>>> +
>>> +               if (key.objectid < inum || key.type != BTRFS_EXTENT_DATA_KEY)
>>> +                       continue;
>>> +
>>> +               extent = btrfs_item_ptr(leaf, slot,
>>> +                                       struct btrfs_file_extent_item);
>>> +
>>> +               if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
>>> +                       continue;
>>> +
>>> +               extent_offset = btrfs_file_extent_offset(leaf, extent);
>>> +               if (key.offset - extent_offset != offset)
>>> +                       continue;
>>> +
>>> +               num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
>>> +               if (extent_offset >= old->extent_offset + old->offset +
>>> +                   old->len || extent_offset + num_bytes <=
>>> +                   old->extent_offset + old->offset)
>>> +                       continue;
>>> +
>>> +               break;
>>> +       }
>>> +
>>> +       backref = kmalloc(sizeof(*backref), GFP_NOFS);
>>> +       if (!backref) {
>>> +               ret = -ENOENT;
>>> +               goto out;
>>> +       }
>>> +
>>> +       backref->root_id = root_id;
>>> +       backref->inum = inum;
>>> +       backref->file_pos = offset + extent_offset;
>>> +       backref->num_bytes = num_bytes;
>>> +       backref->extent_offset = extent_offset;
>>> +       backref->generation = btrfs_file_extent_generation(leaf, extent);
>>> +       backref->old = old;
>>> +       backref_insert(&new->root, backref);
>>> +       old->count++;
>>> +out:
>>> +       btrfs_release_path(path);
>>> +       WARN_ON(ret);
>>> +       return ret;
>>> +}
>>> +
>>> +static noinline bool record_extent_backrefs(struct btrfs_path *path,
>>> +                                  struct new_sa_defrag_extent *new)
>>> +{
>>> +       struct btrfs_fs_info *fs_info = BTRFS_I(new->inode)->root->fs_info;
>>> +       struct old_sa_defrag_extent *old, *tmp;
>>> +       int ret;
>>> +
>>> +       new->path = path;
>>> +
>>> +       list_for_each_entry_safe(old, tmp, &new->head, list) {
>>> +               ret = iterate_inodes_from_logical(old->bytenr, fs_info,
>>> +                                                 path, record_one_backref,
>>> +                                                 old);
>>> +               BUG_ON(ret < 0 && ret != -ENOENT);
>>> +
>>> +               /* no backref to be processed for this extent */
>>> +               if (!old->count) {
>>> +                       list_del(&old->list);
>>> +                       kfree(old);
>>> +               }
>>> +       }
>>> +
>>> +       if (list_empty(&new->head))
>>> +               return false;
>>> +
>>> +       return true;
>>> +}
>>> +
>>> +/*
>>> + * Note the backref might has changed, and in this case we just return 0.
>>> + */
>>> +static noinline int relink_extent_backref(struct btrfs_path *path,
>>> +                                struct sa_defrag_extent_backref *prev,
>>> +                                struct sa_defrag_extent_backref *backref)
>>> +{
>>> +       struct btrfs_file_extent_item *extent;
>>> +       struct btrfs_file_extent_item *item;
>>> +       struct btrfs_ordered_extent *ordered;
>>> +       struct btrfs_trans_handle *trans;
>>> +       struct btrfs_fs_info *fs_info;
>>> +       struct btrfs_root *root;
>>> +       struct btrfs_key key;
>>> +       struct extent_buffer *leaf;
>>> +       struct old_sa_defrag_extent *old = backref->old;
>>> +       struct new_sa_defrag_extent *new = old->new;
>>> +       struct inode *src_inode = new->inode;
>>> +       struct inode *inode;
>>> +       struct extent_state *cached = NULL;
>>> +       int ret = 0;
>>> +       u64 start;
>>> +       u64 len;
>>> +       u64 lock_start;
>>> +       u64 lock_end;
>>> +       bool merge = false;
>>> +
>>> +       if (prev && prev->root_id == backref->root_id &&
>>> +           prev->inum == backref->inum &&
>>> +           prev->file_pos + prev->num_bytes == backref->file_pos)
>>> +               merge = true;
>>> +
>>> +       key.objectid = backref->root_id;
>>> +       key.type = BTRFS_ROOT_ITEM_KEY;
>>> +       key.offset = (u64)-1;
>>> +
>>> +       fs_info = BTRFS_I(src_inode)->root->fs_info;
>>> +       root = btrfs_read_fs_root_no_name(fs_info, &key);
>>> +       if (IS_ERR(root)) {
>>> +               if (PTR_ERR(root) == -ENOENT)
>>> +                       return 0;
>>> +               return PTR_ERR(root);
>>> +       }
>>> +
>>> +       key.objectid = backref->inum;
>>> +       key.type = BTRFS_INODE_ITEM_KEY;
>>> +       key.offset = 0;
>>> +
>>> +       inode = btrfs_iget(fs_info->sb, &key, root, NULL);
>>> +       if (IS_ERR_OR_NULL(inode) || is_bad_inode(inode)) {
>>> +               if (inode && !IS_ERR(inode))
>>> +                       iput(inode);
>>> +               return 0;
>>> +       }
>>> +
>>> +       lock_start = backref->file_pos;
>>> +       lock_end = backref->file_pos + backref->num_bytes - 1;
>>> +       lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
>>> +                        0, &cached);
>>> +
>>> +       ordered = btrfs_lookup_first_ordered_extent(inode, lock_end);
>>> +       if (ordered) {
>>> +               btrfs_put_ordered_extent(ordered);
>>> +               goto out_unlock;
>>> +       }
>>> +
>>> +       trans = btrfs_join_transaction(root);
>>> +       if (IS_ERR(trans)) {
>>> +               ret = PTR_ERR(trans);
>>> +               goto out_unlock;
>>> +       }
>>> +
>>> +       key.objectid = backref->inum;
>>> +       key.type = BTRFS_EXTENT_DATA_KEY;
>>> +       key.offset = backref->file_pos;
>>> +
>>> +       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
>>> +       if (ret < 0) {
>>> +               goto out_free_path;
>>> +       } else if (ret > 0) {
>>> +               ret = 0;
>>> +               goto out_free_path;
>>> +       }
>>> +
>>> +       extent = btrfs_item_ptr(path->nodes[0], path->slots[0],
>>> +                               struct btrfs_file_extent_item);
>>> +
>>> +       if (btrfs_file_extent_generation(path->nodes[0], extent) !=
>>> +           backref->generation)
>>> +               goto out_free_path;
>>> +
>>> +       btrfs_release_path(path);
>>> +
>>> +       start = backref->file_pos;
>>> +       if (backref->extent_offset < old->extent_offset + old->offset)
>>> +               start += old->extent_offset + old->offset -
>>> +                        backref->extent_offset;
>>> +
>>> +       len = min(backref->extent_offset + backref->num_bytes,
>>> +                 old->extent_offset + old->offset + old->len);
>>> +       len -= max(backref->extent_offset, old->extent_offset + old->offset);
>>> +
>>> +       ret = btrfs_drop_extents(trans, root, inode, start,
>>> +                                start + len, 1);
>>> +       if (ret)
>>> +               goto out_free_path;
>>> +again:
>>> +       key.objectid = btrfs_ino(inode);
>>> +       key.type = BTRFS_EXTENT_DATA_KEY;
>>> +       key.offset = start;
>>> +
>>> +       if (merge) {
>>> +               struct btrfs_file_extent_item *fi;
>>> +               u64 extent_len;
>>> +               struct btrfs_key found_key;
>>> +
>>> +               ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
>>> +               if (ret < 0)
>>> +                       goto out_free_path;
>>> +
>>> +               path->slots[0]--;
>>> +               leaf = path->nodes[0];
>>> +               btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
>>> +
>>> +               fi = btrfs_item_ptr(leaf, path->slots[0],
>>> +                                   struct btrfs_file_extent_item);
>>> +               extent_len = btrfs_file_extent_num_bytes(leaf, fi);
>>> +
>>> +               if (btrfs_file_extent_disk_bytenr(leaf, fi) == new->bytenr &&
>>> +                   btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_REG &&
>>> +                   !btrfs_file_extent_compression(leaf, fi) &&
>>> +                   !btrfs_file_extent_encryption(leaf, fi) &&
>>> +                   !btrfs_file_extent_other_encoding(leaf, fi) &&
>>> +                   extent_len + found_key.offset == start) {
>>> +                       btrfs_set_file_extent_num_bytes(leaf, fi,
>>> +                                                       extent_len + len);
>>> +                       btrfs_mark_buffer_dirty(leaf);
>>> +                       inode_add_bytes(inode, len);
>>> +
>>> +                       ret = 1;
>>> +                       goto out_free_path;
>>> +               } else {
>>> +                       merge = false;
>>> +                       btrfs_release_path(path);
>>> +                       goto again;
>>> +               }
>>> +       }
>>> +
>>> +       ret = btrfs_insert_empty_item(trans, root, path, &key,
>>> +                                       sizeof(*extent));
>>> +       if (ret) {
>>> +               btrfs_abort_transaction(trans, root, ret);
>>> +               goto out_free_path;
>>> +       }
>>> +
>>> +       leaf = path->nodes[0];
>>> +       item = btrfs_item_ptr(leaf, path->slots[0],
>>> +                               struct btrfs_file_extent_item);
>>> +       btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr);
>>> +       btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len);
>>> +       btrfs_set_file_extent_offset(leaf, item, start - new->file_pos);
>>> +       btrfs_set_file_extent_num_bytes(leaf, item, len);
>>> +       btrfs_set_file_extent_ram_bytes(leaf, item, new->len);
>>> +       btrfs_set_file_extent_generation(leaf, item, trans->transid);
>>> +       btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
>>> +       btrfs_set_file_extent_compression(leaf, item, new->compress_type);
>>> +       btrfs_set_file_extent_encryption(leaf, item, 0);
>>> +       btrfs_set_file_extent_other_encoding(leaf, item, 0);
>>> +
>>> +       btrfs_mark_buffer_dirty(leaf);
>>> +       inode_add_bytes(inode, len);
>>> +
>>> +       ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
>>> +                       new->disk_len, 0,
>>> +                       backref->root_id, backref->inum,
>>> +                       new->file_pos, 0);      /* start - extent_offset */
>>> +       if (ret) {
>>> +               btrfs_abort_transaction(trans, root, ret);
>>> +               goto out_free_path;
>>> +       }
>>> +
>>> +       ret = 1;
>>> +out_free_path:
>>> +       btrfs_release_path(path);
>>> +       btrfs_end_transaction(trans, root);
>>> +out_unlock:
>>> +       unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
>>> +                            &cached, GFP_NOFS);
>>> +       iput(inode);
>>> +       return ret;
>>> +}
>>> +
>>> +static void relink_file_extents(struct new_sa_defrag_extent *new)
>>> +{
>>> +       struct btrfs_path *path;
>>> +       struct old_sa_defrag_extent *old, *tmp;
>>> +       struct sa_defrag_extent_backref *backref;
>>> +       struct sa_defrag_extent_backref *prev = NULL;
>>> +       struct inode *inode;
>>> +       struct btrfs_root *root;
>>> +       struct rb_node *node;
>>> +       int ret;
>>> +
>>> +       inode = new->inode;
>>> +       root = BTRFS_I(inode)->root;
>>> +
>>> +       path = btrfs_alloc_path();
>>> +       if (!path)
>>> +               return;
>>> +
>>> +       if (!record_extent_backrefs(path, new)) {
>>> +               btrfs_free_path(path);
>>> +               goto out;
>>> +       }
>>> +       btrfs_release_path(path);
>>> +
>>> +       while (1) {
>>> +               node = rb_first(&new->root);
>>> +               if (!node)
>>> +                       break;
>>> +               rb_erase(node, &new->root);
>>> +
>>> +               backref = rb_entry(node, struct sa_defrag_extent_backref, node);
>>> +
>>> +               ret = relink_extent_backref(path, prev, backref);
>>> +               WARN_ON(ret < 0);
>>> +
>>> +               kfree(prev);
>>> +
>>> +               if (ret == 1)
>>> +                       prev = backref;
>>> +               else
>>> +                       prev = NULL;
>>> +               cond_resched();
>>> +       }
>>> +       kfree(prev);
>>> +
>>> +       btrfs_free_path(path);
>>> +
>>> +       list_for_each_entry_safe(old, tmp, &new->head, list) {
>>> +               list_del(&old->list);
>>> +               kfree(old);
>>> +       }
>>> +out:
>>> +       atomic_dec(&root->fs_info->defrag_running);
>>> +       wake_up(&root->fs_info->transaction_wait);
>>> +
>>> +       kfree(new);
>>> +}
>>> +
>>> +static struct new_sa_defrag_extent *
>>> +record_old_file_extents(struct inode *inode,
>>> +                       struct btrfs_ordered_extent *ordered)
>>> +{
>>> +       struct btrfs_root *root = BTRFS_I(inode)->root;
>>> +       struct btrfs_path *path;
>>> +       struct btrfs_key key;
>>> +       struct old_sa_defrag_extent *old, *tmp;
>>> +       struct new_sa_defrag_extent *new;
>>> +       int ret;
>>> +
>>> +       new = kmalloc(sizeof(*new), GFP_NOFS);
>>> +       if (!new)
>>> +               return NULL;
>>> +
>>> +       new->inode = inode;
>>> +       new->file_pos = ordered->file_offset;
>>> +       new->len = ordered->len;
>>> +       new->bytenr = ordered->start;
>>> +       new->disk_len = ordered->disk_len;
>>> +       new->compress_type = ordered->compress_type;
>>> +       new->root = RB_ROOT;
>>> +       INIT_LIST_HEAD(&new->head);
>>> +
>>> +       path = btrfs_alloc_path();
>>> +       if (!path)
>>> +               goto out_kfree;
>>> +
>>> +       key.objectid = btrfs_ino(inode);
>>> +       key.type = BTRFS_EXTENT_DATA_KEY;
>>> +       key.offset = new->file_pos;
>>> +
>>> +       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
>>> +       if (ret < 0)
>>> +               goto out_free_path;
>>> +       if (ret > 0 && path->slots[0] > 0)
>>> +               path->slots[0]--;
>>> +
>>> +       /* find out all the old extents for the file range */
>>> +       while (1) {
>>> +               struct btrfs_file_extent_item *extent;
>>> +               struct extent_buffer *l;
>>> +               int slot;
>>> +               u64 num_bytes;
>>> +               u64 offset;
>>> +               u64 end;
>>> +
>>> +               l = path->nodes[0];
>>> +               slot = path->slots[0];
>>> +
>>> +               if (slot >= btrfs_header_nritems(l)) {
>>> +                       ret = btrfs_next_leaf(root, path);
>>> +                       if (ret < 0)
>>> +                               goto out_free_list;
>>> +                       else if (ret > 0)
>>> +                               break;
>>> +                       continue;
>>> +               }
>>> +
>>> +               btrfs_item_key_to_cpu(l, &key, slot);
>>> +
>>> +               if (key.objectid != btrfs_ino(inode))
>>> +                       break;
>>> +               if (key.type != BTRFS_EXTENT_DATA_KEY)
>>> +                       break;
>>> +               if (key.offset >= new->file_pos + new->len)
>>> +                       break;
>>> +
>>> +               extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item);
>>> +
>>> +               num_bytes = btrfs_file_extent_num_bytes(l, extent);
>>> +               if (key.offset + num_bytes < new->file_pos)
>>> +                       goto next;
>>> +
>>> +               old = kmalloc(sizeof(*old), GFP_NOFS);
>>> +               if (!old)
>>> +                       goto out_free_list;
>>> +
>>> +               offset = max(new->file_pos, key.offset);
>>> +               end = min(new->file_pos + new->len, key.offset + num_bytes);
>>> +
>>> +               old->bytenr = btrfs_file_extent_disk_bytenr(l, extent);
>>> +               BUG_ON(!old->bytenr);
>>> +               old->extent_offset = btrfs_file_extent_offset(l, extent);
>>> +               old->offset = offset - key.offset;
>>> +               old->len = end - offset;
>>> +               old->new = new;
>>> +               old->count = 0;
>>> +               list_add_tail(&old->list, &new->head);
>>> +next:
>>> +               path->slots[0]++;
>>> +               cond_resched();
>>> +       }
>>> +
>>> +       btrfs_free_path(path);
>>> +       atomic_inc(&root->fs_info->defrag_running);
>>> +
>>> +       return new;
>>> +
>>> +out_free_list:
>>> +       list_for_each_entry_safe(old, tmp, &new->head, list) {
>>> +               list_del(&old->list);
>>> +               kfree(old);
>>> +       }
>>> +out_free_path:
>>> +       btrfs_free_path(path);
>>> +out_kfree:
>>> +       kfree(new);
>>> +       return NULL;
>>> +}
>>> +
>>>  /*
>>>   * helper function for btrfs_finish_ordered_io, this
>>>   * just reads in some of the csum leaves to prime them into ram
>>> @@ -1856,6 +2451,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
>>>         struct btrfs_trans_handle *trans = NULL;
>>>         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
>>>         struct extent_state *cached_state = NULL;
>>> +       struct new_sa_defrag_extent *new = NULL;
>>>         int compress_type = 0;
>>>         int ret;
>>>         bool nolock;
>>> @@ -1892,6 +2488,15 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
>>>                          ordered_extent->file_offset + ordered_extent->len - 1,
>>>                          0, &cached_state);
>>>
>>> +       ret = test_range_bit(io_tree, ordered_extent->file_offset,
>>> +                       ordered_extent->file_offset + ordered_extent->len - 1,
>>> +                       EXTENT_DEFRAG, 1, cached_state);
>>> +       if (ret && btrfs_root_last_snapshot(&root->root_item) >=
>>> +                                               BTRFS_I(inode)->generation) {
>>> +               /* the inode is shared */
>>> +               new = record_old_file_extents(inode, ordered_extent);
>>> +       }
>>> +
>>>         if (nolock)
>>>                 trans = btrfs_join_transaction_nolock(root);
>>>         else
>>> @@ -1965,6 +2570,10 @@ out:
>>>          */
>>>         btrfs_remove_ordered_extent(inode, ordered_extent);
>>>
>>> +       /* for snapshot-aware defrag */
>>> +       if (new)
>>> +               relink_file_extents(new);
>>> +
>>>         /* once for us */
>>>         btrfs_put_ordered_extent(ordered_extent);
>>>         /* once for the tree */
>>> --
>>> 1.7.7.6
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2 v4] Btrfs: snapshot-aware defrag
  2012-11-01 11:08     ` Itaru Kitayama
@ 2012-11-01 11:21       ` Liu Bo
  2012-11-01 14:05         ` Itaru Kitayama
  2012-12-12 19:37         ` Mitch Harder
  0 siblings, 2 replies; 20+ messages in thread
From: Liu Bo @ 2012-11-01 11:21 UTC (permalink / raw)
  To: Itaru Kitayama; +Cc: linux-btrfs

On Thu, Nov 01, 2012 at 08:08:52PM +0900, Itaru Kitayama wrote:
> Hi Liubo,
> 
> I couldn't apply your V4 patch against the btrfs-next HEAD. Do you have
> a github branch which I can checkout?
> 

The current btrfs-next HEAD actually have included this v4 patch, so
just pull btrfs-next and give it a shot :)

thanks,
liubo

> Thanks,
> 
> Itaru
> 
> On Wed, Oct 31, 2012 at 9:55 PM, Liu Bo <bo.li.liu@oracle.com> wrote:
> > On 10/31/2012 08:13 PM, Itaru Kitayama wrote:
> >> Hi LiuBo:
> >>
> >> I am seeing another warning with your patch applied btrfs-next.
> >>
> >
> > Hi Itaru,
> >
> > Thanks for testing, you seems to be using an old version, since in the new version
> > record_extent_backrefs() does not own a WARN_ON().
> >
> > Could you please test it again with the new patches applied?
> >
> > thanks,
> > liubo
> >
> >
> >> [ 5224.531560] ------------[ cut here ]------------
> >> [ 5224.531565] WARNING: at fs/btrfs/inode.c:2054
> >> record_extent_backrefs+0x87/0xe0()
> >> [ 5224.531567] Hardware name: Bochs
> >> [ 5224.531568] Modules linked in: microcode ppdev psmouse nfsd nfs_acl
> >> auth_rpcgss serio_raw nfs fscache lockd binfmt_misc sunrpc cirrus
> >> parport_pc ttm drm_kms_helper drm sysimgblt i2c_piix4 sysfillrect
> >> syscopyarea i2c_core lp parport floppy
> >> [ 5224.531591] Pid: 2485, comm: btrfs-endio-wri Tainted: G        W
> >> 3.7.0-rc1-v11+ #53
> >> [ 5224.531592] Call Trace:
> >> [ 5224.531598]  [<ffffffff81061c63>] warn_slowpath_common+0x93/0xc0
> >> [ 5224.531600]  [<ffffffff81061caa>] warn_slowpath_null+0x1a/0x20
> >> [ 5224.531603]  [<ffffffff81322287>] record_extent_backrefs+0x87/0xe0
> >> [ 5224.531606]  [<ffffffff8132d10b>] btrfs_finish_ordered_io+0x8bb/0xa80
> >> [ 5224.531611]  [<ffffffff810ce300>] ? trace_hardirqs_off_caller+0xb0/0x140
> >> [ 5224.531614]  [<ffffffff8132d2e5>] finish_ordered_fn+0x15/0x20
> >> [ 5224.531617]  [<ffffffff8134beb7>] worker_loop+0x157/0x580
> >> [ 5224.531620]  [<ffffffff8134bd60>] ? btrfs_queue_worker+0x2f0/0x2f0
> >> [ 5224.531624]  [<ffffffff81090aa8>] kthread+0xe8/0xf0
> >> [ 5224.531627]  [<ffffffff810ce3c2>] ? get_lock_stats+0x22/0x70
> >> [ 5224.531630]  [<ffffffff810909c0>] ? kthread_create_on_node+0x160/0x160
> >> [ 5224.531634]  [<ffffffff817c1c6c>] ret_from_fork+0x7c/0xb0
> >> [ 5224.531636]  [<ffffffff810909c0>] ? kthread_create_on_node+0x160/0x160
> >> [ 5224.531638] ---[ end trace 0256d2b5a195208c ]---
> >>
> >> I've compared some of the old extents logical addresses with the corresponding
> >> object ids and offsets from the extent tree; some are just 8k off from
> >> the found extents
> >> and some keys are totally off.
> >>
> >> Itaru
> >>
> >> On Sat, Oct 27, 2012 at 7:28 PM, Liu Bo <bo.li.liu@oracle.com> wrote:
> >>> This comes from one of btrfs's project ideas,
> >>> As we defragment files, we break any sharing from other snapshots.
> >>> The balancing code will preserve the sharing, and defrag needs to grow this
> >>> as well.
> >>>
> >>> Now we're able to fill the blank with this patch, in which we make full use of
> >>> backref walking stuff.
> >>>
> >>> Here is the basic idea,
> >>> o  set the writeback ranges started by defragment with flag EXTENT_DEFRAG
> >>> o  at endio, after we finish updating fs tree, we use backref walking to find
> >>>    all parents of the ranges and re-link them with the new COWed file layout by
> >>>    adding corresponding backrefs.
> >>>
> >>> Originally patch by Li Zefan <lizf@cn.fujitsu.com>
> >>> Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
> >>> ---
> >>> v3->v4:
> >>>       - fix duplicated refs bugs detected by mounting with autodefrag, thanks
> >>>         for the bug report from Mitch and Chris.
> >>>
> >>>  fs/btrfs/inode.c |  609 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>>  1 files changed, 609 insertions(+), 0 deletions(-)
> >>>
> >>> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> >>> index 85a1e50..35e6993 100644
> >>> --- a/fs/btrfs/inode.c
> >>> +++ b/fs/btrfs/inode.c
> >>> @@ -54,6 +54,7 @@
> >>>  #include "locking.h"
> >>>  #include "free-space-cache.h"
> >>>  #include "inode-map.h"
> >>> +#include "backref.h"
> >>>
> >>>  struct btrfs_iget_args {
> >>>         u64 ino;
> >>> @@ -1839,6 +1840,600 @@ out:
> >>>         return ret;
> >>>  }
> >>>
> >>> +/* snapshot-aware defrag */
> >>> +struct sa_defrag_extent_backref {
> >>> +       struct rb_node node;
> >>> +       struct old_sa_defrag_extent *old;
> >>> +       u64 root_id;
> >>> +       u64 inum;
> >>> +       u64 file_pos;
> >>> +       u64 extent_offset;
> >>> +       u64 num_bytes;
> >>> +       u64 generation;
> >>> +};
> >>> +
> >>> +struct old_sa_defrag_extent {
> >>> +       struct list_head list;
> >>> +       struct new_sa_defrag_extent *new;
> >>> +
> >>> +       u64 extent_offset;
> >>> +       u64 bytenr;
> >>> +       u64 offset;
> >>> +       u64 len;
> >>> +       int count;
> >>> +};
> >>> +
> >>> +struct new_sa_defrag_extent {
> >>> +       struct rb_root root;
> >>> +       struct list_head head;
> >>> +       struct btrfs_path *path;
> >>> +       struct inode *inode;
> >>> +       u64 file_pos;
> >>> +       u64 len;
> >>> +       u64 bytenr;
> >>> +       u64 disk_len;
> >>> +       u8 compress_type;
> >>> +};
> >>> +
> >>> +static int backref_comp(struct sa_defrag_extent_backref *b1,
> >>> +                       struct sa_defrag_extent_backref *b2)
> >>> +{
> >>> +       if (b1->root_id < b2->root_id)
> >>> +               return -1;
> >>> +       else if (b1->root_id > b2->root_id)
> >>> +               return 1;
> >>> +
> >>> +       if (b1->inum < b2->inum)
> >>> +               return -1;
> >>> +       else if (b1->inum > b2->inum)
> >>> +               return 1;
> >>> +
> >>> +       if (b1->file_pos < b2->file_pos)
> >>> +               return -1;
> >>> +       else if (b1->file_pos > b2->file_pos)
> >>> +               return 1;
> >>> +
> >>> +       return 0;
> >>> +}
> >>> +
> >>> +static void backref_insert(struct rb_root *root,
> >>> +                          struct sa_defrag_extent_backref *backref)
> >>> +{
> >>> +       struct rb_node **p = &root->rb_node;
> >>> +       struct rb_node *parent = NULL;
> >>> +       struct sa_defrag_extent_backref *entry;
> >>> +       int ret;
> >>> +
> >>> +       while (*p) {
> >>> +               parent = *p;
> >>> +               entry = rb_entry(parent, struct sa_defrag_extent_backref, node);
> >>> +
> >>> +               ret = backref_comp(backref, entry);
> >>> +               if (ret < 0)
> >>> +                       p = &(*p)->rb_left;
> >>> +               else
> >>> +                       /*
> >>> +                        * Since space can be shared, so there can be
> >>> +                        * some backrefs(extent tree to fs/file tree)
> >>> +                        * whoes fs/file extents map to the same address.
> >>> +                        * If so, we just put it after what we've found.
> >>> +                        */
> >>> +                       p = &(*p)->rb_right;
> >>> +       }
> >>> +
> >>> +       rb_link_node(&backref->node, parent, p);
> >>> +       rb_insert_color(&backref->node, root);
> >>> +}
> >>> +
> >>> +/*
> >>> + * Note the backref might has changed, and in this case we just return 0.
> >>> + */
> >>> +static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
> >>> +                                      void *ctx)
> >>> +{
> >>> +       struct btrfs_file_extent_item *extent;
> >>> +       struct btrfs_fs_info *fs_info;
> >>> +       struct old_sa_defrag_extent *old = ctx;
> >>> +       struct new_sa_defrag_extent *new = old->new;
> >>> +       struct btrfs_path *path = new->path;
> >>> +       struct btrfs_key key;
> >>> +       struct btrfs_root *root;
> >>> +       struct sa_defrag_extent_backref *backref;
> >>> +       struct extent_buffer *leaf;
> >>> +       struct inode *inode = new->inode;
> >>> +       int slot;
> >>> +       int ret;
> >>> +       u64 extent_offset;
> >>> +       u64 num_bytes;
> >>> +
> >>> +       if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
> >>> +           inum == btrfs_ino(inode))
> >>> +               return 0;
> >>> +
> >>> +       key.objectid = root_id;
> >>> +       key.type = BTRFS_ROOT_ITEM_KEY;
> >>> +       key.offset = (u64)-1;
> >>> +
> >>> +       fs_info = BTRFS_I(inode)->root->fs_info;
> >>> +       root = btrfs_read_fs_root_no_name(fs_info, &key);
> >>> +       if (IS_ERR(root)) {
> >>> +               if (PTR_ERR(root) == -ENOENT)
> >>> +                       return 0;
> >>> +               WARN_ON(1);
> >>> +               pr_debug("inum=%llu, offset=%llu, root_id=%llu\n",
> >>> +                        inum, offset, root_id);
> >>> +               return PTR_ERR(root);
> >>> +       }
> >>> +
> >>> +       key.objectid = inum;
> >>> +       key.type = BTRFS_EXTENT_DATA_KEY;
> >>> +       if (offset > (u64)-1 << 32)
> >>> +               key.offset = 0;
> >>> +       else
> >>> +               key.offset = offset;
> >>> +
> >>> +       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
> >>> +       if (ret < 0) {
> >>> +               WARN_ON(1);
> >>> +               return ret;
> >>> +       }
> >>> +
> >>> +       while (1) {
> >>> +               cond_resched();
> >>> +
> >>> +               leaf = path->nodes[0];
> >>> +               slot = path->slots[0];
> >>> +
> >>> +               if (slot >= btrfs_header_nritems(leaf)) {
> >>> +                       ret = btrfs_next_leaf(root, path);
> >>> +                       if (ret < 0) {
> >>> +                               goto out;
> >>> +                       } else if (ret > 0) {
> >>> +                               ret = 0;
> >>> +                               goto out;
> >>> +                       }
> >>> +                       continue;
> >>> +               }
> >>> +
> >>> +               path->slots[0]++;
> >>> +
> >>> +               btrfs_item_key_to_cpu(leaf, &key, slot);
> >>> +
> >>> +               if (key.objectid > inum)
> >>> +                       goto out;
> >>> +
> >>> +               if (key.objectid < inum || key.type != BTRFS_EXTENT_DATA_KEY)
> >>> +                       continue;
> >>> +
> >>> +               extent = btrfs_item_ptr(leaf, slot,
> >>> +                                       struct btrfs_file_extent_item);
> >>> +
> >>> +               if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
> >>> +                       continue;
> >>> +
> >>> +               extent_offset = btrfs_file_extent_offset(leaf, extent);
> >>> +               if (key.offset - extent_offset != offset)
> >>> +                       continue;
> >>> +
> >>> +               num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
> >>> +               if (extent_offset >= old->extent_offset + old->offset +
> >>> +                   old->len || extent_offset + num_bytes <=
> >>> +                   old->extent_offset + old->offset)
> >>> +                       continue;
> >>> +
> >>> +               break;
> >>> +       }
> >>> +
> >>> +       backref = kmalloc(sizeof(*backref), GFP_NOFS);
> >>> +       if (!backref) {
> >>> +               ret = -ENOENT;
> >>> +               goto out;
> >>> +       }
> >>> +
> >>> +       backref->root_id = root_id;
> >>> +       backref->inum = inum;
> >>> +       backref->file_pos = offset + extent_offset;
> >>> +       backref->num_bytes = num_bytes;
> >>> +       backref->extent_offset = extent_offset;
> >>> +       backref->generation = btrfs_file_extent_generation(leaf, extent);
> >>> +       backref->old = old;
> >>> +       backref_insert(&new->root, backref);
> >>> +       old->count++;
> >>> +out:
> >>> +       btrfs_release_path(path);
> >>> +       WARN_ON(ret);
> >>> +       return ret;
> >>> +}
> >>> +
> >>> +static noinline bool record_extent_backrefs(struct btrfs_path *path,
> >>> +                                  struct new_sa_defrag_extent *new)
> >>> +{
> >>> +       struct btrfs_fs_info *fs_info = BTRFS_I(new->inode)->root->fs_info;
> >>> +       struct old_sa_defrag_extent *old, *tmp;
> >>> +       int ret;
> >>> +
> >>> +       new->path = path;
> >>> +
> >>> +       list_for_each_entry_safe(old, tmp, &new->head, list) {
> >>> +               ret = iterate_inodes_from_logical(old->bytenr, fs_info,
> >>> +                                                 path, record_one_backref,
> >>> +                                                 old);
> >>> +               BUG_ON(ret < 0 && ret != -ENOENT);
> >>> +
> >>> +               /* no backref to be processed for this extent */
> >>> +               if (!old->count) {
> >>> +                       list_del(&old->list);
> >>> +                       kfree(old);
> >>> +               }
> >>> +       }
> >>> +
> >>> +       if (list_empty(&new->head))
> >>> +               return false;
> >>> +
> >>> +       return true;
> >>> +}
> >>> +
> >>> +/*
> >>> + * Note the backref might has changed, and in this case we just return 0.
> >>> + */
> >>> +static noinline int relink_extent_backref(struct btrfs_path *path,
> >>> +                                struct sa_defrag_extent_backref *prev,
> >>> +                                struct sa_defrag_extent_backref *backref)
> >>> +{
> >>> +       struct btrfs_file_extent_item *extent;
> >>> +       struct btrfs_file_extent_item *item;
> >>> +       struct btrfs_ordered_extent *ordered;
> >>> +       struct btrfs_trans_handle *trans;
> >>> +       struct btrfs_fs_info *fs_info;
> >>> +       struct btrfs_root *root;
> >>> +       struct btrfs_key key;
> >>> +       struct extent_buffer *leaf;
> >>> +       struct old_sa_defrag_extent *old = backref->old;
> >>> +       struct new_sa_defrag_extent *new = old->new;
> >>> +       struct inode *src_inode = new->inode;
> >>> +       struct inode *inode;
> >>> +       struct extent_state *cached = NULL;
> >>> +       int ret = 0;
> >>> +       u64 start;
> >>> +       u64 len;
> >>> +       u64 lock_start;
> >>> +       u64 lock_end;
> >>> +       bool merge = false;
> >>> +
> >>> +       if (prev && prev->root_id == backref->root_id &&
> >>> +           prev->inum == backref->inum &&
> >>> +           prev->file_pos + prev->num_bytes == backref->file_pos)
> >>> +               merge = true;
> >>> +
> >>> +       key.objectid = backref->root_id;
> >>> +       key.type = BTRFS_ROOT_ITEM_KEY;
> >>> +       key.offset = (u64)-1;
> >>> +
> >>> +       fs_info = BTRFS_I(src_inode)->root->fs_info;
> >>> +       root = btrfs_read_fs_root_no_name(fs_info, &key);
> >>> +       if (IS_ERR(root)) {
> >>> +               if (PTR_ERR(root) == -ENOENT)
> >>> +                       return 0;
> >>> +               return PTR_ERR(root);
> >>> +       }
> >>> +
> >>> +       key.objectid = backref->inum;
> >>> +       key.type = BTRFS_INODE_ITEM_KEY;
> >>> +       key.offset = 0;
> >>> +
> >>> +       inode = btrfs_iget(fs_info->sb, &key, root, NULL);
> >>> +       if (IS_ERR_OR_NULL(inode) || is_bad_inode(inode)) {
> >>> +               if (inode && !IS_ERR(inode))
> >>> +                       iput(inode);
> >>> +               return 0;
> >>> +       }
> >>> +
> >>> +       lock_start = backref->file_pos;
> >>> +       lock_end = backref->file_pos + backref->num_bytes - 1;
> >>> +       lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
> >>> +                        0, &cached);
> >>> +
> >>> +       ordered = btrfs_lookup_first_ordered_extent(inode, lock_end);
> >>> +       if (ordered) {
> >>> +               btrfs_put_ordered_extent(ordered);
> >>> +               goto out_unlock;
> >>> +       }
> >>> +
> >>> +       trans = btrfs_join_transaction(root);
> >>> +       if (IS_ERR(trans)) {
> >>> +               ret = PTR_ERR(trans);
> >>> +               goto out_unlock;
> >>> +       }
> >>> +
> >>> +       key.objectid = backref->inum;
> >>> +       key.type = BTRFS_EXTENT_DATA_KEY;
> >>> +       key.offset = backref->file_pos;
> >>> +
> >>> +       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
> >>> +       if (ret < 0) {
> >>> +               goto out_free_path;
> >>> +       } else if (ret > 0) {
> >>> +               ret = 0;
> >>> +               goto out_free_path;
> >>> +       }
> >>> +
> >>> +       extent = btrfs_item_ptr(path->nodes[0], path->slots[0],
> >>> +                               struct btrfs_file_extent_item);
> >>> +
> >>> +       if (btrfs_file_extent_generation(path->nodes[0], extent) !=
> >>> +           backref->generation)
> >>> +               goto out_free_path;
> >>> +
> >>> +       btrfs_release_path(path);
> >>> +
> >>> +       start = backref->file_pos;
> >>> +       if (backref->extent_offset < old->extent_offset + old->offset)
> >>> +               start += old->extent_offset + old->offset -
> >>> +                        backref->extent_offset;
> >>> +
> >>> +       len = min(backref->extent_offset + backref->num_bytes,
> >>> +                 old->extent_offset + old->offset + old->len);
> >>> +       len -= max(backref->extent_offset, old->extent_offset + old->offset);
> >>> +
> >>> +       ret = btrfs_drop_extents(trans, root, inode, start,
> >>> +                                start + len, 1);
> >>> +       if (ret)
> >>> +               goto out_free_path;
> >>> +again:
> >>> +       key.objectid = btrfs_ino(inode);
> >>> +       key.type = BTRFS_EXTENT_DATA_KEY;
> >>> +       key.offset = start;
> >>> +
> >>> +       if (merge) {
> >>> +               struct btrfs_file_extent_item *fi;
> >>> +               u64 extent_len;
> >>> +               struct btrfs_key found_key;
> >>> +
> >>> +               ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
> >>> +               if (ret < 0)
> >>> +                       goto out_free_path;
> >>> +
> >>> +               path->slots[0]--;
> >>> +               leaf = path->nodes[0];
> >>> +               btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
> >>> +
> >>> +               fi = btrfs_item_ptr(leaf, path->slots[0],
> >>> +                                   struct btrfs_file_extent_item);
> >>> +               extent_len = btrfs_file_extent_num_bytes(leaf, fi);
> >>> +
> >>> +               if (btrfs_file_extent_disk_bytenr(leaf, fi) == new->bytenr &&
> >>> +                   btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_REG &&
> >>> +                   !btrfs_file_extent_compression(leaf, fi) &&
> >>> +                   !btrfs_file_extent_encryption(leaf, fi) &&
> >>> +                   !btrfs_file_extent_other_encoding(leaf, fi) &&
> >>> +                   extent_len + found_key.offset == start) {
> >>> +                       btrfs_set_file_extent_num_bytes(leaf, fi,
> >>> +                                                       extent_len + len);
> >>> +                       btrfs_mark_buffer_dirty(leaf);
> >>> +                       inode_add_bytes(inode, len);
> >>> +
> >>> +                       ret = 1;
> >>> +                       goto out_free_path;
> >>> +               } else {
> >>> +                       merge = false;
> >>> +                       btrfs_release_path(path);
> >>> +                       goto again;
> >>> +               }
> >>> +       }
> >>> +
> >>> +       ret = btrfs_insert_empty_item(trans, root, path, &key,
> >>> +                                       sizeof(*extent));
> >>> +       if (ret) {
> >>> +               btrfs_abort_transaction(trans, root, ret);
> >>> +               goto out_free_path;
> >>> +       }
> >>> +
> >>> +       leaf = path->nodes[0];
> >>> +       item = btrfs_item_ptr(leaf, path->slots[0],
> >>> +                               struct btrfs_file_extent_item);
> >>> +       btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr);
> >>> +       btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len);
> >>> +       btrfs_set_file_extent_offset(leaf, item, start - new->file_pos);
> >>> +       btrfs_set_file_extent_num_bytes(leaf, item, len);
> >>> +       btrfs_set_file_extent_ram_bytes(leaf, item, new->len);
> >>> +       btrfs_set_file_extent_generation(leaf, item, trans->transid);
> >>> +       btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
> >>> +       btrfs_set_file_extent_compression(leaf, item, new->compress_type);
> >>> +       btrfs_set_file_extent_encryption(leaf, item, 0);
> >>> +       btrfs_set_file_extent_other_encoding(leaf, item, 0);
> >>> +
> >>> +       btrfs_mark_buffer_dirty(leaf);
> >>> +       inode_add_bytes(inode, len);
> >>> +
> >>> +       ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
> >>> +                       new->disk_len, 0,
> >>> +                       backref->root_id, backref->inum,
> >>> +                       new->file_pos, 0);      /* start - extent_offset */
> >>> +       if (ret) {
> >>> +               btrfs_abort_transaction(trans, root, ret);
> >>> +               goto out_free_path;
> >>> +       }
> >>> +
> >>> +       ret = 1;
> >>> +out_free_path:
> >>> +       btrfs_release_path(path);
> >>> +       btrfs_end_transaction(trans, root);
> >>> +out_unlock:
> >>> +       unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
> >>> +                            &cached, GFP_NOFS);
> >>> +       iput(inode);
> >>> +       return ret;
> >>> +}
> >>> +
> >>> +static void relink_file_extents(struct new_sa_defrag_extent *new)
> >>> +{
> >>> +       struct btrfs_path *path;
> >>> +       struct old_sa_defrag_extent *old, *tmp;
> >>> +       struct sa_defrag_extent_backref *backref;
> >>> +       struct sa_defrag_extent_backref *prev = NULL;
> >>> +       struct inode *inode;
> >>> +       struct btrfs_root *root;
> >>> +       struct rb_node *node;
> >>> +       int ret;
> >>> +
> >>> +       inode = new->inode;
> >>> +       root = BTRFS_I(inode)->root;
> >>> +
> >>> +       path = btrfs_alloc_path();
> >>> +       if (!path)
> >>> +               return;
> >>> +
> >>> +       if (!record_extent_backrefs(path, new)) {
> >>> +               btrfs_free_path(path);
> >>> +               goto out;
> >>> +       }
> >>> +       btrfs_release_path(path);
> >>> +
> >>> +       while (1) {
> >>> +               node = rb_first(&new->root);
> >>> +               if (!node)
> >>> +                       break;
> >>> +               rb_erase(node, &new->root);
> >>> +
> >>> +               backref = rb_entry(node, struct sa_defrag_extent_backref, node);
> >>> +
> >>> +               ret = relink_extent_backref(path, prev, backref);
> >>> +               WARN_ON(ret < 0);
> >>> +
> >>> +               kfree(prev);
> >>> +
> >>> +               if (ret == 1)
> >>> +                       prev = backref;
> >>> +               else
> >>> +                       prev = NULL;
> >>> +               cond_resched();
> >>> +       }
> >>> +       kfree(prev);
> >>> +
> >>> +       btrfs_free_path(path);
> >>> +
> >>> +       list_for_each_entry_safe(old, tmp, &new->head, list) {
> >>> +               list_del(&old->list);
> >>> +               kfree(old);
> >>> +       }
> >>> +out:
> >>> +       atomic_dec(&root->fs_info->defrag_running);
> >>> +       wake_up(&root->fs_info->transaction_wait);
> >>> +
> >>> +       kfree(new);
> >>> +}
> >>> +
> >>> +static struct new_sa_defrag_extent *
> >>> +record_old_file_extents(struct inode *inode,
> >>> +                       struct btrfs_ordered_extent *ordered)
> >>> +{
> >>> +       struct btrfs_root *root = BTRFS_I(inode)->root;
> >>> +       struct btrfs_path *path;
> >>> +       struct btrfs_key key;
> >>> +       struct old_sa_defrag_extent *old, *tmp;
> >>> +       struct new_sa_defrag_extent *new;
> >>> +       int ret;
> >>> +
> >>> +       new = kmalloc(sizeof(*new), GFP_NOFS);
> >>> +       if (!new)
> >>> +               return NULL;
> >>> +
> >>> +       new->inode = inode;
> >>> +       new->file_pos = ordered->file_offset;
> >>> +       new->len = ordered->len;
> >>> +       new->bytenr = ordered->start;
> >>> +       new->disk_len = ordered->disk_len;
> >>> +       new->compress_type = ordered->compress_type;
> >>> +       new->root = RB_ROOT;
> >>> +       INIT_LIST_HEAD(&new->head);
> >>> +
> >>> +       path = btrfs_alloc_path();
> >>> +       if (!path)
> >>> +               goto out_kfree;
> >>> +
> >>> +       key.objectid = btrfs_ino(inode);
> >>> +       key.type = BTRFS_EXTENT_DATA_KEY;
> >>> +       key.offset = new->file_pos;
> >>> +
> >>> +       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
> >>> +       if (ret < 0)
> >>> +               goto out_free_path;
> >>> +       if (ret > 0 && path->slots[0] > 0)
> >>> +               path->slots[0]--;
> >>> +
> >>> +       /* find out all the old extents for the file range */
> >>> +       while (1) {
> >>> +               struct btrfs_file_extent_item *extent;
> >>> +               struct extent_buffer *l;
> >>> +               int slot;
> >>> +               u64 num_bytes;
> >>> +               u64 offset;
> >>> +               u64 end;
> >>> +
> >>> +               l = path->nodes[0];
> >>> +               slot = path->slots[0];
> >>> +
> >>> +               if (slot >= btrfs_header_nritems(l)) {
> >>> +                       ret = btrfs_next_leaf(root, path);
> >>> +                       if (ret < 0)
> >>> +                               goto out_free_list;
> >>> +                       else if (ret > 0)
> >>> +                               break;
> >>> +                       continue;
> >>> +               }
> >>> +
> >>> +               btrfs_item_key_to_cpu(l, &key, slot);
> >>> +
> >>> +               if (key.objectid != btrfs_ino(inode))
> >>> +                       break;
> >>> +               if (key.type != BTRFS_EXTENT_DATA_KEY)
> >>> +                       break;
> >>> +               if (key.offset >= new->file_pos + new->len)
> >>> +                       break;
> >>> +
> >>> +               extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item);
> >>> +
> >>> +               num_bytes = btrfs_file_extent_num_bytes(l, extent);
> >>> +               if (key.offset + num_bytes < new->file_pos)
> >>> +                       goto next;
> >>> +
> >>> +               old = kmalloc(sizeof(*old), GFP_NOFS);
> >>> +               if (!old)
> >>> +                       goto out_free_list;
> >>> +
> >>> +               offset = max(new->file_pos, key.offset);
> >>> +               end = min(new->file_pos + new->len, key.offset + num_bytes);
> >>> +
> >>> +               old->bytenr = btrfs_file_extent_disk_bytenr(l, extent);
> >>> +               BUG_ON(!old->bytenr);
> >>> +               old->extent_offset = btrfs_file_extent_offset(l, extent);
> >>> +               old->offset = offset - key.offset;
> >>> +               old->len = end - offset;
> >>> +               old->new = new;
> >>> +               old->count = 0;
> >>> +               list_add_tail(&old->list, &new->head);
> >>> +next:
> >>> +               path->slots[0]++;
> >>> +               cond_resched();
> >>> +       }
> >>> +
> >>> +       btrfs_free_path(path);
> >>> +       atomic_inc(&root->fs_info->defrag_running);
> >>> +
> >>> +       return new;
> >>> +
> >>> +out_free_list:
> >>> +       list_for_each_entry_safe(old, tmp, &new->head, list) {
> >>> +               list_del(&old->list);
> >>> +               kfree(old);
> >>> +       }
> >>> +out_free_path:
> >>> +       btrfs_free_path(path);
> >>> +out_kfree:
> >>> +       kfree(new);
> >>> +       return NULL;
> >>> +}
> >>> +
> >>>  /*
> >>>   * helper function for btrfs_finish_ordered_io, this
> >>>   * just reads in some of the csum leaves to prime them into ram
> >>> @@ -1856,6 +2451,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
> >>>         struct btrfs_trans_handle *trans = NULL;
> >>>         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
> >>>         struct extent_state *cached_state = NULL;
> >>> +       struct new_sa_defrag_extent *new = NULL;
> >>>         int compress_type = 0;
> >>>         int ret;
> >>>         bool nolock;
> >>> @@ -1892,6 +2488,15 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
> >>>                          ordered_extent->file_offset + ordered_extent->len - 1,
> >>>                          0, &cached_state);
> >>>
> >>> +       ret = test_range_bit(io_tree, ordered_extent->file_offset,
> >>> +                       ordered_extent->file_offset + ordered_extent->len - 1,
> >>> +                       EXTENT_DEFRAG, 1, cached_state);
> >>> +       if (ret && btrfs_root_last_snapshot(&root->root_item) >=
> >>> +                                               BTRFS_I(inode)->generation) {
> >>> +               /* the inode is shared */
> >>> +               new = record_old_file_extents(inode, ordered_extent);
> >>> +       }
> >>> +
> >>>         if (nolock)
> >>>                 trans = btrfs_join_transaction_nolock(root);
> >>>         else
> >>> @@ -1965,6 +2570,10 @@ out:
> >>>          */
> >>>         btrfs_remove_ordered_extent(inode, ordered_extent);
> >>>
> >>> +       /* for snapshot-aware defrag */
> >>> +       if (new)
> >>> +               relink_file_extents(new);
> >>> +
> >>>         /* once for us */
> >>>         btrfs_put_ordered_extent(ordered_extent);
> >>>         /* once for the tree */
> >>> --
> >>> 1.7.7.6
> >>>
> >>> --
> >>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> >>> the body of a message to majordomo@vger.kernel.org
> >>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2 v4] Btrfs: snapshot-aware defrag
  2012-11-01 11:21       ` Liu Bo
@ 2012-11-01 14:05         ` Itaru Kitayama
  2012-11-01 16:01           ` Liu Bo
  2012-12-12 19:37         ` Mitch Harder
  1 sibling, 1 reply; 20+ messages in thread
From: Itaru Kitayama @ 2012-11-01 14:05 UTC (permalink / raw)
  To: Liu Bo; +Cc: linux-btrfs

Hi Liubo:

The V4 leaves only warnings from btrfs_destroy_inode(). So, you think
it's normal
an "old" extent recorded can be removed from the extent tree by the time
relink_file_extents() invoked?

Itaru

On Thu, Nov 1, 2012 at 8:21 PM, Liu Bo <bo.li.liu@oracle.com> wrote:

> The current btrfs-next HEAD actually have included this v4 patch, so
> just pull btrfs-next and give it a shot :)
>
> thanks,
> liubo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Btrfs: make snapshot-aware defrag as a mount option
  2012-10-27 10:28 ` [PATCH 2/2] Btrfs: make snapshot-aware defrag as a mount option Liu Bo
  2012-10-30 23:31   ` David Sterba
@ 2012-11-01 14:43   ` Chris Mason
  2012-11-01 15:49     ` Liu Bo
  1 sibling, 1 reply; 20+ messages in thread
From: Chris Mason @ 2012-11-01 14:43 UTC (permalink / raw)
  To: Liu Bo; +Cc: linux-btrfs

On Sat, Oct 27, 2012 at 04:28:41AM -0600, Liu Bo wrote:
> This feature works on our crucial write endio path, so if we've got
> lots of fragments to process, it will be kind of a disaster to the
> performance, so I make such a change.
> 
> One can benifit from it while mounting with '-o snap_aware_defrag'.

I think we should always prefer to maintain snapshot cloning as much as
possible, and have a specific option to defrag that makes it break the
clone in favor of removing fragmentation.

So, please keep the snapshot aware defrag the default ;)

Thanks for taking these patches up again!

-chris

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Btrfs: make snapshot-aware defrag as a mount option
  2012-11-01 14:43   ` Chris Mason
@ 2012-11-01 15:49     ` Liu Bo
  0 siblings, 0 replies; 20+ messages in thread
From: Liu Bo @ 2012-11-01 15:49 UTC (permalink / raw)
  To: Chris Mason, linux-btrfs

On 11/01/2012 10:43 PM, Chris Mason wrote:
> On Sat, Oct 27, 2012 at 04:28:41AM -0600, Liu Bo wrote:
>> This feature works on our crucial write endio path, so if we've got
>> lots of fragments to process, it will be kind of a disaster to the
>> performance, so I make such a change.
>>
>> One can benifit from it while mounting with '-o snap_aware_defrag'.
> 
> I think we should always prefer to maintain snapshot cloning as much as
> possible, and have a specific option to defrag that makes it break the
> clone in favor of removing fragmentation.
> 

Oh yeah, so I was considering the existing btrfs partitions who have already
broke the cloning relationship.

> So, please keep the snapshot aware defrag the default ;)
> 

All right, that'd be nice, just drop this patch.

thanks,
liubo

> Thanks for taking these patches up again!
> 
> -chris
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2 v4] Btrfs: snapshot-aware defrag
  2012-11-01 14:05         ` Itaru Kitayama
@ 2012-11-01 16:01           ` Liu Bo
       [not found]             ` <CANW9uyt9qE9384WnQq5ggZ2hb-DbahZe8KY5-WXRFSKTiedekg@mail.gmail.com>
  0 siblings, 1 reply; 20+ messages in thread
From: Liu Bo @ 2012-11-01 16:01 UTC (permalink / raw)
  To: Itaru Kitayama; +Cc: linux-btrfs

On 11/01/2012 10:05 PM, Itaru Kitayama wrote:
> Hi Liubo:
> 
> The V4 leaves only warnings from btrfs_destroy_inode(). So, you think
> it's normal
> an "old" extent recorded can be removed from the extent tree by the time
> relink_file_extents() invoked?
> 

Yeah, it could be if only we run delayed refs in time.

But I don't think that often happens since we run delayed refs when the amount
reaches its limit(64).

thanks,
liubo

> Itaru
> 
> On Thu, Nov 1, 2012 at 8:21 PM, Liu Bo <bo.li.liu@oracle.com> wrote:
> 
>> The current btrfs-next HEAD actually have included this v4 patch, so
>> just pull btrfs-next and give it a shot :)
>>
>> thanks,
>> liubo
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2 v4] Btrfs: snapshot-aware defrag
       [not found]             ` <CANW9uyt9qE9384WnQq5ggZ2hb-DbahZe8KY5-WXRFSKTiedekg@mail.gmail.com>
@ 2012-11-26 10:30               ` Liu Bo
  0 siblings, 0 replies; 20+ messages in thread
From: Liu Bo @ 2012-11-26 10:30 UTC (permalink / raw)
  To: Itaru Kitayama; +Cc: linux-btrfs

On Sun, Nov 25, 2012 at 12:48:33PM +0900, Itaru Kitayama wrote:
> Hi Liubo:
> 
> At the relinking stage, in some situations, looking up the extent tree with
> the extent logical start objectid key from one of the old extents fails,
> but the found previous item's objectid + offset is exactly the original
> search objectid. As you say, usually old extents are retrieved from the
> extent tree without a problem, but if the Delayed or some other mechanisms
> change extents layout in the finish ordered IO function, we need to take
> care of those rejected extents, not simply returning ENOENT. What do you
> think?

Hi Itaru san,

Thanks for the suggestion.

It's all right to ignore ENOENT.

An extent stands for a range of space, which owns an item in extent tree.

If old extents' item is not found in the extent tree, i.e. rejected extents,
it means that this range of space is not shared by any snapshots(the extent refs is now 0),
and so it's safe.

So the worst case is that due to COW, a file and its snapshots share nothing
after some time, the defrag on the file won't change its snapshots, but this
can be solved when we always with autodefrag and snapshot-aware defrag.

thanks,
liubo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2 v4] Btrfs: snapshot-aware defrag
  2012-11-01 11:21       ` Liu Bo
  2012-11-01 14:05         ` Itaru Kitayama
@ 2012-12-12 19:37         ` Mitch Harder
  2012-12-13  1:28           ` Liu Bo
  1 sibling, 1 reply; 20+ messages in thread
From: Mitch Harder @ 2012-12-12 19:37 UTC (permalink / raw)
  To: Itaru Kitayama, linux-btrfs, Liu Bo

On Thu, Nov 1, 2012 at 6:21 AM, Liu Bo <bo.li.liu@oracle.com> wrote:
> On Thu, Nov 01, 2012 at 08:08:52PM +0900, Itaru Kitayama wrote:
>> Hi Liubo,
>>
>> I couldn't apply your V4 patch against the btrfs-next HEAD. Do you have
>> a github branch which I can checkout?
>>
>
> The current btrfs-next HEAD actually have included this v4 patch, so
> just pull btrfs-next and give it a shot :)
>

I'm still seeing similar issues using Josef's current btrfs-next
branch (which still includes the v4 version of the snapshot-aware
defrag patches).

[44507.850693] ------------[ cut here ]------------
[44507.850728] WARNING: at fs/btrfs/inode.c:7755
btrfs_destroy_inode+0x231/0x2c4 [btrfs]()
[44507.850732] Hardware name: OptiPlex 745
[44507.850735] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
mfd_core lrw xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
btrfs libcrc32c
[44507.850753] Pid: 15719, comm: umount Tainted: G        W
3.7.0-btrfs-next+ #1
[44507.850756] Call Trace:
[44507.850766]  [<ffffffff810364da>] warn_slowpath_common+0x74/0xa2
[44507.850770]  [<ffffffff81036522>] warn_slowpath_null+0x1a/0x1c
[44507.850787]  [<ffffffffa0041e0e>] btrfs_destroy_inode+0x231/0x2c4 [btrfs]
[44507.850793]  [<ffffffff81141670>] destroy_inode+0x3c/0x5f
[44507.850797]  [<ffffffff811417b5>] evict+0x122/0x1ac
[44507.850800]  [<ffffffff81142016>] iput+0xed/0x169
[44507.850816]  [<ffffffffa0038c18>] btrfs_run_delayed_iputs+0xd6/0xf6 [btrfs]
[44507.850831]  [<ffffffffa002db75>] btrfs_commit_super+0x2c/0xfd [btrfs]
[44507.850845]  [<ffffffffa002f289>] close_ctree+0x2c1/0x300 [btrfs]
[44507.850850]  [<ffffffff811424c9>] ? evict_inodes+0x106/0x115
[44507.850861]  [<ffffffffa00070b1>] btrfs_put_super+0x19/0x1b [btrfs]
[44507.850866]  [<ffffffff8112b321>] generic_shutdown_super+0x5b/0xdc
[44507.850869]  [<ffffffff8112b424>] kill_anon_super+0x16/0x24
[44507.850880]  [<ffffffffa000ad98>] btrfs_kill_super+0x1a/0x8f [btrfs]
[44507.850884]  [<ffffffff8112b647>] deactivate_locked_super+0x33/0x6c
[44507.850887]  [<ffffffff8112c25f>] deactivate_super+0x4e/0x66
[44507.850892]  [<ffffffff81145e64>] mntput_no_expire+0xf7/0x14d
[44507.850896]  [<ffffffff81146ced>] sys_umount+0x63/0x37a
[44507.850901]  [<ffffffff8183e642>] system_call_fastpath+0x16/0x1b
[44507.850905] ---[ end trace ba14fbf3de68a237 ]---
[44507.850907] ------------[ cut here ]------------
[44507.850924] WARNING: at fs/btrfs/inode.c:7756
btrfs_destroy_inode+0x2b9/0x2c4 [btrfs]()
[44507.850927] Hardware name: OptiPlex 745
[44507.850930] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
mfd_core lrw xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
btrfs libcrc32c
[44507.850947] Pid: 15719, comm: umount Tainted: G        W
3.7.0-btrfs-next+ #1
[44507.850949] Call Trace:
[44507.850956]  [<ffffffff810364da>] warn_slowpath_common+0x74/0xa2
[44507.850961]  [<ffffffff81036522>] warn_slowpath_null+0x1a/0x1c
[44507.850978]  [<ffffffffa0041e96>] btrfs_destroy_inode+0x2b9/0x2c4 [btrfs]
[44507.850982]  [<ffffffff81141670>] destroy_inode+0x3c/0x5f
[44507.850986]  [<ffffffff811417b5>] evict+0x122/0x1ac
[44507.850990]  [<ffffffff81142016>] iput+0xed/0x169
[44507.851003]  [<ffffffffa0038c18>] btrfs_run_delayed_iputs+0xd6/0xf6 [btrfs]
[44507.851033]  [<ffffffffa002db75>] btrfs_commit_super+0x2c/0xfd [btrfs]
[44507.851048]  [<ffffffffa002f289>] close_ctree+0x2c1/0x300 [btrfs]
[44507.851052]  [<ffffffff811424c9>] ? evict_inodes+0x106/0x115
[44507.851063]  [<ffffffffa00070b1>] btrfs_put_super+0x19/0x1b [btrfs]
[44507.851066]  [<ffffffff8112b321>] generic_shutdown_super+0x5b/0xdc
[44507.851070]  [<ffffffff8112b424>] kill_anon_super+0x16/0x24
[44507.851080]  [<ffffffffa000ad98>] btrfs_kill_super+0x1a/0x8f [btrfs]
[44507.851084]  [<ffffffff8112b647>] deactivate_locked_super+0x33/0x6c
[44507.851087]  [<ffffffff8112c25f>] deactivate_super+0x4e/0x66
[44507.851091]  [<ffffffff81145e64>] mntput_no_expire+0xf7/0x14d
[44507.851095]  [<ffffffff81146ced>] sys_umount+0x63/0x37a
[44507.851099]  [<ffffffff8183e642>] system_call_fastpath+0x16/0x1b
[44507.851101] ---[ end trace ba14fbf3de68a238 ]---
[44507.851104] ------------[ cut here ]------------
[44507.851121] WARNING: at fs/btrfs/inode.c:7758
btrfs_destroy_inode+0x28d/0x2c4 [btrfs]()
[44507.851123] Hardware name: OptiPlex 745
[44507.851124] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
mfd_core lrw xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
btrfs libcrc32c
[44507.851140] Pid: 15719, comm: umount Tainted: G        W
3.7.0-btrfs-next+ #1
[44507.851142] Call Trace:
[44507.851148]  [<ffffffff810364da>] warn_slowpath_common+0x74/0xa2
[44507.851152]  [<ffffffff81036522>] warn_slowpath_null+0x1a/0x1c
[44507.851168]  [<ffffffffa0041e6a>] btrfs_destroy_inode+0x28d/0x2c4 [btrfs]
[44507.851172]  [<ffffffff81141670>] destroy_inode+0x3c/0x5f
[44507.851176]  [<ffffffff811417b5>] evict+0x122/0x1ac
[44507.851180]  [<ffffffff81142016>] iput+0xed/0x169
[44507.851195]  [<ffffffffa0038c18>] btrfs_run_delayed_iputs+0xd6/0xf6 [btrfs]
[44507.851209]  [<ffffffffa002db75>] btrfs_commit_super+0x2c/0xfd [btrfs]
[44507.851223]  [<ffffffffa002f289>] close_ctree+0x2c1/0x300 [btrfs]
[44507.851227]  [<ffffffff811424c9>] ? evict_inodes+0x106/0x115
[44507.851237]  [<ffffffffa00070b1>] btrfs_put_super+0x19/0x1b [btrfs]
[44507.851241]  [<ffffffff8112b321>] generic_shutdown_super+0x5b/0xdc
[44507.851245]  [<ffffffff8112b424>] kill_anon_super+0x16/0x24
[44507.851255]  [<ffffffffa000ad98>] btrfs_kill_super+0x1a/0x8f [btrfs]
[44507.851259]  [<ffffffff8112b647>] deactivate_locked_super+0x33/0x6c
[44507.851263]  [<ffffffff8112c25f>] deactivate_super+0x4e/0x66
[44507.851266]  [<ffffffff81145e64>] mntput_no_expire+0xf7/0x14d
[44507.851270]  [<ffffffff81146ced>] sys_umount+0x63/0x37a
[44507.851274]  [<ffffffff8183e642>] system_call_fastpath+0x16/0x1b
[44507.851277] ---[ end trace ba14fbf3de68a239 ]---
[44507.969039] ------------[ cut here ]------------
[44507.969069] WARNING: at fs/btrfs/extent-tree.c:4351
btrfs_free_block_groups+0x2ad/0x34c [btrfs]()
[44507.969074] Hardware name: OptiPlex 745
[44507.969078] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
mfd_core lrw xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
btrfs libcrc32c
[44507.969104] Pid: 15719, comm: umount Tainted: G        W
3.7.0-btrfs-next+ #1
[44507.969107] Call Trace:
[44507.969119]  [<ffffffff810364da>] warn_slowpath_common+0x74/0xa2
[44507.969125]  [<ffffffff81036522>] warn_slowpath_null+0x1a/0x1c
[44507.969144]  [<ffffffffa0022ec0>] btrfs_free_block_groups+0x2ad/0x34c [btrfs]
[44507.969166]  [<ffffffffa002f188>] close_ctree+0x1c0/0x300 [btrfs]
[44507.969173]  [<ffffffff811424c9>] ? evict_inodes+0x106/0x115
[44507.969189]  [<ffffffffa00070b1>] btrfs_put_super+0x19/0x1b [btrfs]
[44507.969195]  [<ffffffff8112b321>] generic_shutdown_super+0x5b/0xdc
[44507.969201]  [<ffffffff8112b424>] kill_anon_super+0x16/0x24
[44507.969215]  [<ffffffffa000ad98>] btrfs_kill_super+0x1a/0x8f [btrfs]
[44507.969221]  [<ffffffff8112b647>] deactivate_locked_super+0x33/0x6c
[44507.969226]  [<ffffffff8112c25f>] deactivate_super+0x4e/0x66
[44507.969232]  [<ffffffff81145e64>] mntput_no_expire+0xf7/0x14d
[44507.969237]  [<ffffffff81146ced>] sys_umount+0x63/0x37a
[44507.969245]  [<ffffffff8183e642>] system_call_fastpath+0x16/0x1b
[44507.969249] ---[ end trace ba14fbf3de68a23a ]---
[44507.969252] ------------[ cut here ]------------
[44507.969271] WARNING: at fs/btrfs/extent-tree.c:4352
btrfs_free_block_groups+0x347/0x34c [btrfs]()
[44507.969275] Hardware name: OptiPlex 745
[44507.969279] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
mfd_core lrw xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
btrfs libcrc32c
[44507.969302] Pid: 15719, comm: umount Tainted: G        W
3.7.0-btrfs-next+ #1
[44507.969306] Call Trace:
[44507.969315]  [<ffffffff810364da>] warn_slowpath_common+0x74/0xa2
[44507.969322]  [<ffffffff81036522>] warn_slowpath_null+0x1a/0x1c
[44507.969342]  [<ffffffffa0022f5a>] btrfs_free_block_groups+0x347/0x34c [btrfs]
[44507.969362]  [<ffffffffa002f188>] close_ctree+0x1c0/0x300 [btrfs]
[44507.969368]  [<ffffffff811424c9>] ? evict_inodes+0x106/0x115
[44507.969383]  [<ffffffffa00070b1>] btrfs_put_super+0x19/0x1b [btrfs]
[44507.969389]  [<ffffffff8112b321>] generic_shutdown_super+0x5b/0xdc
[44507.969394]  [<ffffffff8112b424>] kill_anon_super+0x16/0x24
[44507.969408]  [<ffffffffa000ad98>] btrfs_kill_super+0x1a/0x8f [btrfs]
[44507.969414]  [<ffffffff8112b647>] deactivate_locked_super+0x33/0x6c
[44507.969419]  [<ffffffff8112c25f>] deactivate_super+0x4e/0x66
[44507.969424]  [<ffffffff81145e64>] mntput_no_expire+0xf7/0x14d
[44507.969429]  [<ffffffff81146ced>] sys_umount+0x63/0x37a
[44507.969434]  [<ffffffff8183e642>] system_call_fastpath+0x16/0x1b
[44507.969439] ---[ end trace ba14fbf3de68a23b ]---

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2 v4] Btrfs: snapshot-aware defrag
  2012-12-12 19:37         ` Mitch Harder
@ 2012-12-13  1:28           ` Liu Bo
  0 siblings, 0 replies; 20+ messages in thread
From: Liu Bo @ 2012-12-13  1:28 UTC (permalink / raw)
  To: Mitch Harder; +Cc: Itaru Kitayama, linux-btrfs

On Wed, Dec 12, 2012 at 01:37:21PM -0600, Mitch Harder wrote:
> On Thu, Nov 1, 2012 at 6:21 AM, Liu Bo <bo.li.liu@oracle.com> wrote:
> > On Thu, Nov 01, 2012 at 08:08:52PM +0900, Itaru Kitayama wrote:
> >> Hi Liubo,
> >>
> >> I couldn't apply your V4 patch against the btrfs-next HEAD. Do you have
> >> a github branch which I can checkout?
> >>
> >
> > The current btrfs-next HEAD actually have included this v4 patch, so
> > just pull btrfs-next and give it a shot :)
> >
> 
> I'm still seeing similar issues using Josef's current btrfs-next
> branch (which still includes the v4 version of the snapshot-aware
> defrag patches).

Well, it seems that there is something wrong about delalloc reservation.

thanks,
liubo

> 
> [44507.850693] ------------[ cut here ]------------
> [44507.850728] WARNING: at fs/btrfs/inode.c:7755
> btrfs_destroy_inode+0x231/0x2c4 [btrfs]()
> [44507.850732] Hardware name: OptiPlex 745
> [44507.850735] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
> mfd_core lrw xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
> btrfs libcrc32c
> [44507.850753] Pid: 15719, comm: umount Tainted: G        W
> 3.7.0-btrfs-next+ #1
> [44507.850756] Call Trace:
> [44507.850766]  [<ffffffff810364da>] warn_slowpath_common+0x74/0xa2
> [44507.850770]  [<ffffffff81036522>] warn_slowpath_null+0x1a/0x1c
> [44507.850787]  [<ffffffffa0041e0e>] btrfs_destroy_inode+0x231/0x2c4 [btrfs]
> [44507.850793]  [<ffffffff81141670>] destroy_inode+0x3c/0x5f
> [44507.850797]  [<ffffffff811417b5>] evict+0x122/0x1ac
> [44507.850800]  [<ffffffff81142016>] iput+0xed/0x169
> [44507.850816]  [<ffffffffa0038c18>] btrfs_run_delayed_iputs+0xd6/0xf6 [btrfs]
> [44507.850831]  [<ffffffffa002db75>] btrfs_commit_super+0x2c/0xfd [btrfs]
> [44507.850845]  [<ffffffffa002f289>] close_ctree+0x2c1/0x300 [btrfs]
> [44507.850850]  [<ffffffff811424c9>] ? evict_inodes+0x106/0x115
> [44507.850861]  [<ffffffffa00070b1>] btrfs_put_super+0x19/0x1b [btrfs]
> [44507.850866]  [<ffffffff8112b321>] generic_shutdown_super+0x5b/0xdc
> [44507.850869]  [<ffffffff8112b424>] kill_anon_super+0x16/0x24
> [44507.850880]  [<ffffffffa000ad98>] btrfs_kill_super+0x1a/0x8f [btrfs]
> [44507.850884]  [<ffffffff8112b647>] deactivate_locked_super+0x33/0x6c
> [44507.850887]  [<ffffffff8112c25f>] deactivate_super+0x4e/0x66
> [44507.850892]  [<ffffffff81145e64>] mntput_no_expire+0xf7/0x14d
> [44507.850896]  [<ffffffff81146ced>] sys_umount+0x63/0x37a
> [44507.850901]  [<ffffffff8183e642>] system_call_fastpath+0x16/0x1b
> [44507.850905] ---[ end trace ba14fbf3de68a237 ]---
> [44507.850907] ------------[ cut here ]------------
> [44507.850924] WARNING: at fs/btrfs/inode.c:7756
> btrfs_destroy_inode+0x2b9/0x2c4 [btrfs]()
> [44507.850927] Hardware name: OptiPlex 745
> [44507.850930] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
> mfd_core lrw xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
> btrfs libcrc32c
> [44507.850947] Pid: 15719, comm: umount Tainted: G        W
> 3.7.0-btrfs-next+ #1
> [44507.850949] Call Trace:
> [44507.850956]  [<ffffffff810364da>] warn_slowpath_common+0x74/0xa2
> [44507.850961]  [<ffffffff81036522>] warn_slowpath_null+0x1a/0x1c
> [44507.850978]  [<ffffffffa0041e96>] btrfs_destroy_inode+0x2b9/0x2c4 [btrfs]
> [44507.850982]  [<ffffffff81141670>] destroy_inode+0x3c/0x5f
> [44507.850986]  [<ffffffff811417b5>] evict+0x122/0x1ac
> [44507.850990]  [<ffffffff81142016>] iput+0xed/0x169
> [44507.851003]  [<ffffffffa0038c18>] btrfs_run_delayed_iputs+0xd6/0xf6 [btrfs]
> [44507.851033]  [<ffffffffa002db75>] btrfs_commit_super+0x2c/0xfd [btrfs]
> [44507.851048]  [<ffffffffa002f289>] close_ctree+0x2c1/0x300 [btrfs]
> [44507.851052]  [<ffffffff811424c9>] ? evict_inodes+0x106/0x115
> [44507.851063]  [<ffffffffa00070b1>] btrfs_put_super+0x19/0x1b [btrfs]
> [44507.851066]  [<ffffffff8112b321>] generic_shutdown_super+0x5b/0xdc
> [44507.851070]  [<ffffffff8112b424>] kill_anon_super+0x16/0x24
> [44507.851080]  [<ffffffffa000ad98>] btrfs_kill_super+0x1a/0x8f [btrfs]
> [44507.851084]  [<ffffffff8112b647>] deactivate_locked_super+0x33/0x6c
> [44507.851087]  [<ffffffff8112c25f>] deactivate_super+0x4e/0x66
> [44507.851091]  [<ffffffff81145e64>] mntput_no_expire+0xf7/0x14d
> [44507.851095]  [<ffffffff81146ced>] sys_umount+0x63/0x37a
> [44507.851099]  [<ffffffff8183e642>] system_call_fastpath+0x16/0x1b
> [44507.851101] ---[ end trace ba14fbf3de68a238 ]---
> [44507.851104] ------------[ cut here ]------------
> [44507.851121] WARNING: at fs/btrfs/inode.c:7758
> btrfs_destroy_inode+0x28d/0x2c4 [btrfs]()
> [44507.851123] Hardware name: OptiPlex 745
> [44507.851124] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
> mfd_core lrw xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
> btrfs libcrc32c
> [44507.851140] Pid: 15719, comm: umount Tainted: G        W
> 3.7.0-btrfs-next+ #1
> [44507.851142] Call Trace:
> [44507.851148]  [<ffffffff810364da>] warn_slowpath_common+0x74/0xa2
> [44507.851152]  [<ffffffff81036522>] warn_slowpath_null+0x1a/0x1c
> [44507.851168]  [<ffffffffa0041e6a>] btrfs_destroy_inode+0x28d/0x2c4 [btrfs]
> [44507.851172]  [<ffffffff81141670>] destroy_inode+0x3c/0x5f
> [44507.851176]  [<ffffffff811417b5>] evict+0x122/0x1ac
> [44507.851180]  [<ffffffff81142016>] iput+0xed/0x169
> [44507.851195]  [<ffffffffa0038c18>] btrfs_run_delayed_iputs+0xd6/0xf6 [btrfs]
> [44507.851209]  [<ffffffffa002db75>] btrfs_commit_super+0x2c/0xfd [btrfs]
> [44507.851223]  [<ffffffffa002f289>] close_ctree+0x2c1/0x300 [btrfs]
> [44507.851227]  [<ffffffff811424c9>] ? evict_inodes+0x106/0x115
> [44507.851237]  [<ffffffffa00070b1>] btrfs_put_super+0x19/0x1b [btrfs]
> [44507.851241]  [<ffffffff8112b321>] generic_shutdown_super+0x5b/0xdc
> [44507.851245]  [<ffffffff8112b424>] kill_anon_super+0x16/0x24
> [44507.851255]  [<ffffffffa000ad98>] btrfs_kill_super+0x1a/0x8f [btrfs]
> [44507.851259]  [<ffffffff8112b647>] deactivate_locked_super+0x33/0x6c
> [44507.851263]  [<ffffffff8112c25f>] deactivate_super+0x4e/0x66
> [44507.851266]  [<ffffffff81145e64>] mntput_no_expire+0xf7/0x14d
> [44507.851270]  [<ffffffff81146ced>] sys_umount+0x63/0x37a
> [44507.851274]  [<ffffffff8183e642>] system_call_fastpath+0x16/0x1b
> [44507.851277] ---[ end trace ba14fbf3de68a239 ]---
> [44507.969039] ------------[ cut here ]------------
> [44507.969069] WARNING: at fs/btrfs/extent-tree.c:4351
> btrfs_free_block_groups+0x2ad/0x34c [btrfs]()
> [44507.969074] Hardware name: OptiPlex 745
> [44507.969078] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
> mfd_core lrw xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
> btrfs libcrc32c
> [44507.969104] Pid: 15719, comm: umount Tainted: G        W
> 3.7.0-btrfs-next+ #1
> [44507.969107] Call Trace:
> [44507.969119]  [<ffffffff810364da>] warn_slowpath_common+0x74/0xa2
> [44507.969125]  [<ffffffff81036522>] warn_slowpath_null+0x1a/0x1c
> [44507.969144]  [<ffffffffa0022ec0>] btrfs_free_block_groups+0x2ad/0x34c [btrfs]
> [44507.969166]  [<ffffffffa002f188>] close_ctree+0x1c0/0x300 [btrfs]
> [44507.969173]  [<ffffffff811424c9>] ? evict_inodes+0x106/0x115
> [44507.969189]  [<ffffffffa00070b1>] btrfs_put_super+0x19/0x1b [btrfs]
> [44507.969195]  [<ffffffff8112b321>] generic_shutdown_super+0x5b/0xdc
> [44507.969201]  [<ffffffff8112b424>] kill_anon_super+0x16/0x24
> [44507.969215]  [<ffffffffa000ad98>] btrfs_kill_super+0x1a/0x8f [btrfs]
> [44507.969221]  [<ffffffff8112b647>] deactivate_locked_super+0x33/0x6c
> [44507.969226]  [<ffffffff8112c25f>] deactivate_super+0x4e/0x66
> [44507.969232]  [<ffffffff81145e64>] mntput_no_expire+0xf7/0x14d
> [44507.969237]  [<ffffffff81146ced>] sys_umount+0x63/0x37a
> [44507.969245]  [<ffffffff8183e642>] system_call_fastpath+0x16/0x1b
> [44507.969249] ---[ end trace ba14fbf3de68a23a ]---
> [44507.969252] ------------[ cut here ]------------
> [44507.969271] WARNING: at fs/btrfs/extent-tree.c:4352
> btrfs_free_block_groups+0x347/0x34c [btrfs]()
> [44507.969275] Hardware name: OptiPlex 745
> [44507.969279] Modules linked in: iTCO_wdt iTCO_vendor_support lpc_ich
> mfd_core lrw xts gf128mul ablk_helper cryptd aes_x86_64 sha256_generic
> btrfs libcrc32c
> [44507.969302] Pid: 15719, comm: umount Tainted: G        W
> 3.7.0-btrfs-next+ #1
> [44507.969306] Call Trace:
> [44507.969315]  [<ffffffff810364da>] warn_slowpath_common+0x74/0xa2
> [44507.969322]  [<ffffffff81036522>] warn_slowpath_null+0x1a/0x1c
> [44507.969342]  [<ffffffffa0022f5a>] btrfs_free_block_groups+0x347/0x34c [btrfs]
> [44507.969362]  [<ffffffffa002f188>] close_ctree+0x1c0/0x300 [btrfs]
> [44507.969368]  [<ffffffff811424c9>] ? evict_inodes+0x106/0x115
> [44507.969383]  [<ffffffffa00070b1>] btrfs_put_super+0x19/0x1b [btrfs]
> [44507.969389]  [<ffffffff8112b321>] generic_shutdown_super+0x5b/0xdc
> [44507.969394]  [<ffffffff8112b424>] kill_anon_super+0x16/0x24
> [44507.969408]  [<ffffffffa000ad98>] btrfs_kill_super+0x1a/0x8f [btrfs]
> [44507.969414]  [<ffffffff8112b647>] deactivate_locked_super+0x33/0x6c
> [44507.969419]  [<ffffffff8112c25f>] deactivate_super+0x4e/0x66
> [44507.969424]  [<ffffffff81145e64>] mntput_no_expire+0xf7/0x14d
> [44507.969429]  [<ffffffff81146ced>] sys_umount+0x63/0x37a
> [44507.969434]  [<ffffffff8183e642>] system_call_fastpath+0x16/0x1b
> [44507.969439] ---[ end trace ba14fbf3de68a23b ]---

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2012-12-13  1:30 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-10-27 10:28 [PATCH 1/2 v4] Btrfs: snapshot-aware defrag Liu Bo
2012-10-27 10:28 ` [PATCH 2/2] Btrfs: make snapshot-aware defrag as a mount option Liu Bo
2012-10-30 23:31   ` David Sterba
2012-10-31  0:34     ` Liu Bo
2012-10-31  0:44       ` David Sterba
2012-10-31 13:31         ` Liu Bo
2012-11-01 14:43   ` Chris Mason
2012-11-01 15:49     ` Liu Bo
2012-10-29 20:06 ` [PATCH 1/2 v4] Btrfs: snapshot-aware defrag Mitch Harder
2012-10-30  1:20   ` Liu Bo
2012-10-30 20:59     ` Mitch Harder
2012-10-31 12:13 ` Itaru Kitayama
2012-10-31 12:55   ` Liu Bo
2012-11-01 11:08     ` Itaru Kitayama
2012-11-01 11:21       ` Liu Bo
2012-11-01 14:05         ` Itaru Kitayama
2012-11-01 16:01           ` Liu Bo
     [not found]             ` <CANW9uyt9qE9384WnQq5ggZ2hb-DbahZe8KY5-WXRFSKTiedekg@mail.gmail.com>
2012-11-26 10:30               ` Liu Bo
2012-12-12 19:37         ` Mitch Harder
2012-12-13  1:28           ` Liu Bo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.