All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability
@ 2016-06-23 19:26 jeffm
  2016-06-23 19:26 ` [PATCH 1/3] btrfs-progs: check: add helpers for converting between structures jeffm
                   ` (4 more replies)
  0 siblings, 5 replies; 7+ messages in thread
From: jeffm @ 2016-06-23 19:26 UTC (permalink / raw)
  To: linux-btrfs

From: Jeff Mahoney <jeffm@suse.com>

While running xfstests generic/291, which creates a single file populated
with reflinks to the same extent, I found that fsck had been running for
hours.  perf top lead me to find_data_backref as the culprit, and a litte
more digging made it clear: For every extent record we add, we iterate
the entire list first.  My test case had ~2M records.  That math doesn't
go well.

This patchset converts the extent_backref list to an rbtree.  The test
that used to run for more than 8 hours without completing now takes
less than 20 seconds.

-Jeff

---

Jeff Mahoney (3):
  btrfs-progs: check: add helpers for converting between structures
  btrfs-progs: check: supplement extent backref list with rbtree
  btrfs-progs: check: switch to iterating over the backref_tree

 cmds-check.c | 355 +++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 237 insertions(+), 118 deletions(-)

-- 
2.7.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/3] btrfs-progs: check: add helpers for converting between structures
  2016-06-23 19:26 [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability jeffm
@ 2016-06-23 19:26 ` jeffm
  2016-06-23 19:26 ` [PATCH 2/3] btrfs-progs: check: supplement extent backref list with rbtree jeffm
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 7+ messages in thread
From: jeffm @ 2016-06-23 19:26 UTC (permalink / raw)
  To: linux-btrfs

From: Jeff Mahoney <jeffm@suse.com>

We either open code list_entry calls or outright cast between types.  The
compiler will do the right thing if we use static inlines to do
typesafe conversions.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
 cmds-check.c | 100 +++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 66 insertions(+), 34 deletions(-)

diff --git a/cmds-check.c b/cmds-check.c
index ec0bbfd..a202a9d 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -84,6 +84,12 @@ struct extent_backref {
 	unsigned int broken:1;
 };
 
+static inline struct extent_backref *
+to_extent_backref(struct list_head *entry)
+{
+	return list_entry(entry, struct extent_backref, list);
+}
+
 struct data_backref {
 	struct extent_backref node;
 	union {
@@ -99,6 +105,12 @@ struct data_backref {
 	u32 found_ref;
 };
 
+static inline struct data_backref *
+to_data_backref(struct extent_backref *back)
+{
+	return container_of(back, struct data_backref, node);
+}
+
 /*
  * Much like data_backref, just removed the undetermined members
  * and change it to use list_head.
@@ -122,6 +134,12 @@ struct tree_backref {
 	};
 };
 
+static inline struct tree_backref *
+to_tree_backref(struct extent_backref *back)
+{
+	return container_of(back, struct tree_backref, node);
+}
+
 /* Explicit initialization for extent_record::flag_block_full_backref */
 enum { FLAG_UNSET = 2 };
 
@@ -152,6 +170,12 @@ struct extent_record {
 	unsigned int wrong_chunk_type:1;
 };
 
+static inline struct extent_record *
+to_extent_record(struct list_head *entry)
+{
+	return container_of(entry, struct extent_record, list);
+}
+
 struct inode_backref {
 	struct list_head list;
 	unsigned int found_dir_item:1;
@@ -166,6 +190,12 @@ struct inode_backref {
 	char name[0];
 };
 
+static inline struct inode_backref *
+to_inode_backref(struct list_head *entry)
+{
+	return list_entry(entry, struct inode_backref, list);
+}
+
 struct root_item_record {
 	struct list_head list;
 	u64 objectid;
@@ -256,6 +286,12 @@ struct root_backref {
 	char name[0];
 };
 
+static inline struct root_backref *
+to_root_backref(struct list_head *entry)
+{
+	return list_entry(entry, struct root_backref, list);
+}
+
 struct root_record {
 	struct list_head backrefs;
 	struct cache_extent cache;
@@ -834,8 +870,7 @@ static void free_inode_rec(struct inode_record *rec)
 		return;
 
 	while (!list_empty(&rec->backrefs)) {
-		backref = list_entry(rec->backrefs.next,
-				     struct inode_backref, list);
+		backref = to_inode_backref(rec->backrefs.next);
 		list_del(&backref->list);
 		free(backref);
 	}
@@ -1979,7 +2014,7 @@ static int check_root_dir(struct inode_record *rec)
 		goto out;
 	if (list_empty(&rec->backrefs))
 		goto out;
-	backref = list_entry(rec->backrefs.next, struct inode_backref, list);
+	backref = to_inode_backref(rec->backrefs.next);
 	if (!backref->found_inode_ref)
 		goto out;
 	if (backref->index != 0 || backref->namelen != 2 ||
@@ -3116,8 +3151,7 @@ static void free_root_record(struct cache_extent *cache)
 
 	rec = container_of(cache, struct root_record, cache);
 	while (!list_empty(&rec->backrefs)) {
-		backref = list_entry(rec->backrefs.next,
-				     struct root_backref, list);
+		backref = to_root_backref(rec->backrefs.next);
 		list_del(&backref->list);
 		free(backref);
 	}
@@ -3751,14 +3785,14 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs)
 	int err = 0;
 
 	while(cur != &rec->backrefs) {
-		back = list_entry(cur, struct extent_backref, list);
+		back = to_extent_backref(cur);
 		cur = cur->next;
 		if (!back->found_extent_tree) {
 			err = 1;
 			if (!print_errs)
 				goto out;
 			if (back->is_data) {
-				dback = (struct data_backref *)back;
+				dback = to_data_backref(back);
 				fprintf(stderr, "Backref %llu %s %llu"
 					" owner %llu offset %llu num_refs %lu"
 					" not found in extent tree\n",
@@ -3772,7 +3806,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs)
 					(unsigned long long)dback->offset,
 					(unsigned long)dback->num_refs);
 			} else {
-				tback = (struct tree_backref *)back;
+				tback = to_tree_backref(back);
 				fprintf(stderr, "Backref %llu parent %llu"
 					" root %llu not found in extent tree\n",
 					(unsigned long long)rec->start,
@@ -3784,7 +3818,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs)
 			err = 1;
 			if (!print_errs)
 				goto out;
-			tback = (struct tree_backref *)back;
+			tback = to_tree_backref(back);
 			fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
 				(unsigned long long)rec->start,
 				back->full_backref ? "parent" : "root",
@@ -3793,7 +3827,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs)
 				(unsigned long long)tback->root, back);
 		}
 		if (back->is_data) {
-			dback = (struct data_backref *)back;
+			dback = to_data_backref(back);
 			if (dback->found_ref != dback->num_refs) {
 				err = 1;
 				if (!print_errs)
@@ -3837,7 +3871,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs)
 		if (!back->is_data) {
 			found += 1;
 		} else {
-			dback = (struct data_backref *)back;
+			dback = to_data_backref(back);
 			found += dback->found_ref;
 		}
 	}
@@ -3861,7 +3895,7 @@ static int free_all_extent_backrefs(struct extent_record *rec)
 	struct list_head *cur;
 	while (!list_empty(&rec->backrefs)) {
 		cur = rec->backrefs.next;
-		back = list_entry(cur, struct extent_backref, list);
+		back = to_extent_backref(cur);
 		list_del(cur);
 		free(back);
 	}
@@ -3922,7 +3956,7 @@ static int check_owner_ref(struct btrfs_root *root,
 			continue;
 		if (node->full_backref)
 			continue;
-		back = (struct tree_backref *)node;
+		back = to_tree_backref(node);
 		if (btrfs_header_owner(buf) == back->root)
 			return 0;
 	}
@@ -3966,11 +4000,11 @@ static int is_extent_tree_record(struct extent_record *rec)
 	int is_extent = 0;
 
 	while(cur != &rec->backrefs) {
-		node = list_entry(cur, struct extent_backref, list);
+		node = to_extent_backref(cur);
 		cur = cur->next;
 		if (node->is_data)
 			return 0;
-		back = (struct tree_backref *)node;
+		back = to_tree_backref(node);
 		if (node->full_backref)
 			return 0;
 		if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
@@ -4353,11 +4387,11 @@ static struct tree_backref *find_tree_backref(struct extent_record *rec,
 	struct tree_backref *back;
 
 	while(cur != &rec->backrefs) {
-		node = list_entry(cur, struct extent_backref, list);
+		node = to_extent_backref(cur);
 		cur = cur->next;
 		if (node->is_data)
 			continue;
-		back = (struct tree_backref *)node;
+		back = to_tree_backref(node);
 		if (parent > 0) {
 			if (!node->full_backref)
 				continue;
@@ -4404,11 +4438,11 @@ static struct data_backref *find_data_backref(struct extent_record *rec,
 	struct data_backref *back;
 
 	while(cur != &rec->backrefs) {
-		node = list_entry(cur, struct extent_backref, list);
+		node = to_extent_backref(cur);
 		cur = cur->next;
 		if (!node->is_data)
 			continue;
-		back = (struct data_backref *)node;
+		back = to_data_backref(node);
 		if (parent > 0) {
 			if (!node->full_backref)
 				continue;
@@ -4494,8 +4528,7 @@ static void check_extent_type(struct extent_record *rec)
 		struct tree_backref *tback;
 		u64 bg_type;
 
-		node = list_entry(rec->backrefs.next, struct extent_backref,
-				  list);
+		node = to_extent_backref(rec->backrefs.next);
 		if (node->is_data) {
 			/* tree block shouldn't have data backref */
 			rec->wrong_chunk_type = 1;
@@ -6507,7 +6540,7 @@ static int record_extent(struct btrfs_trans_handle *trans,
 		} else {
 			struct btrfs_disk_key copy_key;;
 
-			tback = (struct tree_backref *)back;
+			tback = to_tree_backref(back);
 			bi = (struct btrfs_tree_block_info *)(ei + 1);
 			memset_extent_buffer(leaf, 0, (unsigned long)bi,
 					     sizeof(*bi));
@@ -6536,7 +6569,7 @@ static int record_extent(struct btrfs_trans_handle *trans,
 		u64 parent;
 		int i;
 
-		dback = (struct data_backref *)back;
+		dback = to_data_backref(back);
 		if (back->full_backref)
 			parent = dback->parent;
 		else
@@ -6574,7 +6607,7 @@ static int record_extent(struct btrfs_trans_handle *trans,
 	} else {
 		u64 parent;
 
-		tback = (struct tree_backref *)back;
+		tback = to_tree_backref(back);
 		if (back->full_backref)
 			parent = tback->parent;
 		else
@@ -6823,7 +6856,7 @@ static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
 		if (back->full_backref || !back->is_data)
 			continue;
 
-		dback = (struct data_backref *)back;
+		dback = to_data_backref(back);
 
 		/*
 		 * We only pay attention to backrefs that we found a real
@@ -6949,7 +6982,7 @@ static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
 		if (back->full_backref || !back->is_data)
 			continue;
 
-		dback = (struct data_backref *)back;
+		dback = to_data_backref(back);
 
 		/*
 		 * Still ignoring backrefs that don't have a real ref attached
@@ -7013,7 +7046,7 @@ static int process_duplicates(struct btrfs_root *root,
 	 */
 	remove_cache_extent(extent_cache, &rec->cache);
 
-	good = list_entry(rec->dups.next, struct extent_record, list);
+	good = to_extent_record(rec->dups.next);
 	list_del_init(&good->list);
 	INIT_LIST_HEAD(&good->backrefs);
 	INIT_LIST_HEAD(&good->dups);
@@ -7147,7 +7180,7 @@ static int delete_duplicate_records(struct btrfs_root *root,
 		ret = err;
 out:
 	while (!list_empty(&delete_list)) {
-		tmp = list_entry(delete_list.next, struct extent_record, list);
+		tmp = to_extent_record(delete_list.next);
 		list_del_init(&tmp->list);
 		if (tmp == rec)
 			continue;
@@ -7155,7 +7188,7 @@ out:
 	}
 
 	while (!list_empty(&rec->dups)) {
-		tmp = list_entry(rec->dups.next, struct extent_record, list);
+		tmp = to_extent_record(rec->dups.next);
 		list_del_init(&tmp->list);
 		free(tmp);
 	}
@@ -7187,7 +7220,7 @@ static int find_possible_backrefs(struct btrfs_fs_info *info,
 		if (back->full_backref || !back->is_data)
 			continue;
 
-		dback = (struct data_backref *)back;
+		dback = to_data_backref(back);
 
 		/* We found this one, we don't need to do a lookup */
 		if (dback->found_ref)
@@ -7286,7 +7319,7 @@ static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
 		if (back->full_backref || !back->is_data ||
 		    !back->found_extent_tree)
 			continue;
-		dback = (struct data_backref *)back;
+		dback = to_data_backref(back);
 		if (dback->found_ref)
 			continue;
 		key.objectid = dback->root;
@@ -7403,7 +7436,7 @@ static int fixup_extent_refs(struct btrfs_fs_info *info,
 
 	/* step three, recreate all the refs we did find */
 	while(cur != &rec->backrefs) {
-		back = list_entry(cur, struct extent_backref, list);
+		back = to_extent_backref(cur);
 		cur = cur->next;
 
 		/*
@@ -7660,8 +7693,7 @@ static int check_extent_refs(struct btrfs_root *root,
 	 * belong to a different extent item and not the weird duplicate one.
 	 */
 	while (repair && !list_empty(&duplicate_extents)) {
-		rec = list_entry(duplicate_extents.next, struct extent_record,
-				 list);
+		rec = to_extent_record(duplicate_extents.next);
 		list_del_init(&rec->list);
 
 		/* Sometimes we can find a backref before we find an actual
-- 
2.7.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] btrfs-progs: check: supplement extent backref list with rbtree
  2016-06-23 19:26 [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability jeffm
  2016-06-23 19:26 ` [PATCH 1/3] btrfs-progs: check: add helpers for converting between structures jeffm
@ 2016-06-23 19:26 ` jeffm
  2016-06-23 19:26 ` [PATCH 3/3] btrfs-progs: check: switch to iterating over the backref_tree jeffm
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 7+ messages in thread
From: jeffm @ 2016-06-23 19:26 UTC (permalink / raw)
  To: linux-btrfs

From: Jeff Mahoney <jeffm@suse.com>

For the pathlogical case, like xfstests generic/297 that creates a
large file consisting of one, repeating reflinked extent, fsck can
take hours.  The root cause is that calling find_data_backref while
iterating the extent records is an O(n^2) algorithm.  For my
example test run, n was 2*2^20 and fsck was at 8 hours and counting.

This patch supplements the list with an rbtree and drops the runtime
of that testcase to about 20 seconds.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
 cmds-check.c | 199 ++++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 149 insertions(+), 50 deletions(-)

diff --git a/cmds-check.c b/cmds-check.c
index a202a9d..4785f00 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -77,6 +77,7 @@ static struct cache_tree *roots_info_cache = NULL;
 
 struct extent_backref {
 	struct list_head list;
+	struct rb_node node;
 	unsigned int is_data:1;
 	unsigned int found_extent_tree:1;
 	unsigned int full_backref:1;
@@ -90,6 +91,12 @@ to_extent_backref(struct list_head *entry)
 	return list_entry(entry, struct extent_backref, list);
 }
 
+static inline struct extent_backref *
+rb_node_to_extent_backref(struct rb_node *node)
+{
+	return rb_entry(node, struct extent_backref, node);
+}
+
 struct data_backref {
 	struct extent_backref node;
 	union {
@@ -111,6 +118,57 @@ to_data_backref(struct extent_backref *back)
 	return container_of(back, struct data_backref, node);
 }
 
+static int compare_data_backref(struct rb_node *node1,
+				struct rb_node *node2)
+{
+	struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
+	struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
+	struct data_backref *back1 = to_data_backref(ext1);
+	struct data_backref *back2 = to_data_backref(ext2);
+
+	WARN_ON(!ext1->is_data);
+	WARN_ON(!ext2->is_data);
+
+	/* parent and root are a union, so this covers both */
+	if (back1->parent > back2->parent)
+		return 1;
+	if (back1->parent < back2->parent)
+		return -1;
+
+	/* This is a full backref and the parents match. */
+	if (back1->node.full_backref)
+		return 0;
+
+	if (back1->owner > back2->owner)
+		return 1;
+	if (back1->owner < back2->owner)
+		return -1;
+
+	if (back1->offset > back2->offset)
+		return 1;
+	if (back1->offset < back2->offset)
+		return -1;
+
+	if (back1->bytes > back2->bytes)
+		return 1;
+	if (back1->bytes < back2->bytes)
+		return -1;
+
+	if (back1->found_ref && back2->found_ref) {
+		if (back1->disk_bytenr > back2->disk_bytenr)
+			return 1;
+		if (back1->disk_bytenr < back2->disk_bytenr)
+			return -1;
+
+		if (back1->found_ref > back2->found_ref)
+			return 1;
+		if (back1->found_ref < back2->found_ref)
+			return -1;
+	}
+
+	return 0;
+}
+
 /*
  * Much like data_backref, just removed the undetermined members
  * and change it to use list_head.
@@ -140,12 +198,56 @@ to_tree_backref(struct extent_backref *back)
 	return container_of(back, struct tree_backref, node);
 }
 
+static int compare_tree_backref(struct rb_node *node1,
+				struct rb_node *node2)
+{
+	struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
+	struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
+	struct tree_backref *back1 = to_tree_backref(ext1);
+	struct tree_backref *back2 = to_tree_backref(ext2);
+
+	WARN_ON(ext1->is_data);
+	WARN_ON(ext2->is_data);
+
+	/* parent and root are a union, so this covers both */
+	if (back1->parent > back2->parent)
+		return 1;
+	if (back1->parent < back2->parent)
+		return -1;
+
+	return 0;
+}
+
+static int compare_extent_backref(struct rb_node *node1,
+				  struct rb_node *node2)
+{
+	struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
+	struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
+
+	if (ext1->is_data > ext2->is_data)
+		return 1;
+
+	if (ext1->is_data < ext2->is_data)
+		return -1;
+
+	if (ext1->full_backref > ext2->full_backref)
+		return 1;
+	if (ext1->full_backref < ext2->full_backref)
+		return -1;
+
+	if (ext1->is_data)
+		return compare_data_backref(node1, node2);
+	else
+		return compare_tree_backref(node1, node2);
+}
+
 /* Explicit initialization for extent_record::flag_block_full_backref */
 enum { FLAG_UNSET = 2 };
 
 struct extent_record {
 	struct list_head backrefs;
 	struct list_head dups;
+	struct rb_root backref_tree;
 	struct list_head list;
 	struct cache_extent cache;
 	struct btrfs_disk_key parent_key;
@@ -4379,32 +4481,30 @@ static int check_block(struct btrfs_root *root,
 	return ret;
 }
 
+
 static struct tree_backref *find_tree_backref(struct extent_record *rec,
 						u64 parent, u64 root)
 {
-	struct list_head *cur = rec->backrefs.next;
-	struct extent_backref *node;
-	struct tree_backref *back;
+	struct rb_node *node;
+	struct tree_backref *back = NULL;
+	struct tree_backref match = {
+		.node = {
+			.is_data = 0,
+		},
+	};
 
-	while(cur != &rec->backrefs) {
-		node = to_extent_backref(cur);
-		cur = cur->next;
-		if (node->is_data)
-			continue;
-		back = to_tree_backref(node);
-		if (parent > 0) {
-			if (!node->full_backref)
-				continue;
-			if (parent == back->parent)
-				return back;
-		} else {
-			if (node->full_backref)
-				continue;
-			if (back->root == root)
-				return back;
-		}
-	}
-	return NULL;
+	if (parent) {
+		match.parent = parent;
+		match.node.full_backref = 1;
+	} else
+		match.root = root;
+
+	node = rb_search(&rec->backref_tree, &match.node.node,
+			 (rb_compare_keys)compare_extent_backref, NULL);
+	if (node)
+		back = to_tree_backref(rb_node_to_extent_backref(node));
+
+	return back;
 }
 
 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
@@ -4423,6 +4523,7 @@ static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
 		ref->node.full_backref = 0;
 	}
 	list_add_tail(&ref->node.list, &rec->backrefs);
+	rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
 
 	return ref;
 }
@@ -4433,35 +4534,31 @@ static struct data_backref *find_data_backref(struct extent_record *rec,
 						int found_ref,
 						u64 disk_bytenr, u64 bytes)
 {
-	struct list_head *cur = rec->backrefs.next;
-	struct extent_backref *node;
-	struct data_backref *back;
+	struct rb_node *node;
+	struct data_backref *back = NULL;
+	struct data_backref match = {
+		.node = {
+			.is_data = 1,
+		},
+		.owner = owner,
+		.offset = offset,
+		.bytes = bytes,
+		.found_ref = found_ref,
+		.disk_bytenr = disk_bytenr,
+	};
 
-	while(cur != &rec->backrefs) {
-		node = to_extent_backref(cur);
-		cur = cur->next;
-		if (!node->is_data)
-			continue;
-		back = to_data_backref(node);
-		if (parent > 0) {
-			if (!node->full_backref)
-				continue;
-			if (parent == back->parent)
-				return back;
-		} else {
-			if (node->full_backref)
-				continue;
-			if (back->root == root && back->owner == owner &&
-			    back->offset == offset) {
-				if (found_ref && node->found_ref &&
-				    (back->bytes != bytes ||
-				    back->disk_bytenr != disk_bytenr))
-					continue;
-				return back;
-			}
-		}
-	}
-	return NULL;
+	if (parent) {
+		match.parent = parent;
+		match.node.full_backref = 1;
+	} else
+		match.root = root;
+
+	node = rb_search(&rec->backref_tree, &match.node.node,
+			 (rb_compare_keys)compare_extent_backref, NULL);
+	if (node)
+		back = to_data_backref(rb_node_to_extent_backref(node));
+
+	return back;
 }
 
 static struct data_backref *alloc_data_backref(struct extent_record *rec,
@@ -4491,6 +4588,7 @@ static struct data_backref *alloc_data_backref(struct extent_record *rec,
 	ref->found_ref = 0;
 	ref->num_refs = 0;
 	list_add_tail(&ref->node.list, &rec->backrefs);
+	rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
 	if (max_size > rec->max_size)
 		rec->max_size = max_size;
 	return ref;
@@ -4578,6 +4676,7 @@ static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
 	INIT_LIST_HEAD(&rec->backrefs);
 	INIT_LIST_HEAD(&rec->dups);
 	INIT_LIST_HEAD(&rec->list);
+	rec->backref_tree = RB_ROOT;
 	memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
 	rec->cache.start = tmpl->start;
 	rec->cache.size = tmpl->nr;
-- 
2.7.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] btrfs-progs: check: switch to iterating over the backref_tree
  2016-06-23 19:26 [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability jeffm
  2016-06-23 19:26 ` [PATCH 1/3] btrfs-progs: check: add helpers for converting between structures jeffm
  2016-06-23 19:26 ` [PATCH 2/3] btrfs-progs: check: supplement extent backref list with rbtree jeffm
@ 2016-06-23 19:26 ` jeffm
  2016-06-23 21:24 ` [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability Holger Hoffstätte
  2016-07-04 12:20 ` David Sterba
  4 siblings, 0 replies; 7+ messages in thread
From: jeffm @ 2016-06-23 19:26 UTC (permalink / raw)
  To: linux-btrfs

From: Jeff Mahoney <jeffm@suse.com>

We now have two data structures that can be used to iterate the same data
set, and there may be quite a few of them in memory.  Eliminating the
list_head member will reduce memory consumption while iterating over
the extent backrefs.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
 cmds-check.c | 88 ++++++++++++++++++++++++++----------------------------------
 1 file changed, 38 insertions(+), 50 deletions(-)

diff --git a/cmds-check.c b/cmds-check.c
index 4785f00..6434857 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -76,7 +76,6 @@ static struct task_ctx ctx = { 0 };
 static struct cache_tree *roots_info_cache = NULL;
 
 struct extent_backref {
-	struct list_head list;
 	struct rb_node node;
 	unsigned int is_data:1;
 	unsigned int found_extent_tree:1;
@@ -86,12 +85,6 @@ struct extent_backref {
 };
 
 static inline struct extent_backref *
-to_extent_backref(struct list_head *entry)
-{
-	return list_entry(entry, struct extent_backref, list);
-}
-
-static inline struct extent_backref *
 rb_node_to_extent_backref(struct rb_node *node)
 {
 	return rb_entry(node, struct extent_backref, node);
@@ -3879,16 +3872,15 @@ out:
 
 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
 {
-	struct list_head *cur = rec->backrefs.next;
+	struct rb_node *n;
 	struct extent_backref *back;
 	struct tree_backref *tback;
 	struct data_backref *dback;
 	u64 found = 0;
 	int err = 0;
 
-	while(cur != &rec->backrefs) {
-		back = to_extent_backref(cur);
-		cur = cur->next;
+	for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
+		back = rb_node_to_extent_backref(n);
 		if (!back->found_extent_tree) {
 			err = 1;
 			if (!print_errs)
@@ -3991,17 +3983,15 @@ out:
 	return err;
 }
 
-static int free_all_extent_backrefs(struct extent_record *rec)
+static void __free_one_backref(struct rb_node *node)
 {
-	struct extent_backref *back;
-	struct list_head *cur;
-	while (!list_empty(&rec->backrefs)) {
-		cur = rec->backrefs.next;
-		back = to_extent_backref(cur);
-		list_del(cur);
-		free(back);
-	}
-	return 0;
+	struct extent_backref *back = rb_node_to_extent_backref(node);
+	free(back);
+}
+
+static void free_all_extent_backrefs(struct extent_record *rec)
+{
+	rb_free_nodes(&rec->backref_tree, __free_one_backref);
 }
 
 static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
@@ -4041,7 +4031,7 @@ static int check_owner_ref(struct btrfs_root *root,
 			    struct extent_record *rec,
 			    struct extent_buffer *buf)
 {
-	struct extent_backref *node;
+	struct extent_backref *node, *tmp;
 	struct tree_backref *back;
 	struct btrfs_root *ref_root;
 	struct btrfs_key key;
@@ -4051,7 +4041,8 @@ static int check_owner_ref(struct btrfs_root *root,
 	int found = 0;
 	int ret;
 
-	list_for_each_entry(node, &rec->backrefs, list) {
+	rbtree_postorder_for_each_entry_safe(node, tmp,
+					     &rec->backref_tree, node) {
 		if (node->is_data)
 			continue;
 		if (!node->found_ref)
@@ -4096,18 +4087,16 @@ static int check_owner_ref(struct btrfs_root *root,
 
 static int is_extent_tree_record(struct extent_record *rec)
 {
-	struct list_head *cur = rec->backrefs.next;
-	struct extent_backref *node;
+	struct extent_backref *ref, *tmp;
 	struct tree_backref *back;
 	int is_extent = 0;
 
-	while(cur != &rec->backrefs) {
-		node = to_extent_backref(cur);
-		cur = cur->next;
-		if (node->is_data)
+	rbtree_postorder_for_each_entry_safe(ref, tmp,
+					     &rec->backref_tree, node) {
+		if (ref->is_data)
 			return 0;
-		back = to_tree_backref(node);
-		if (node->full_backref)
+		back = to_tree_backref(ref);
+		if (ref->full_backref)
 			return 0;
 		if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
 			is_extent = 1;
@@ -4522,7 +4511,6 @@ static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
 		ref->root = root;
 		ref->node.full_backref = 0;
 	}
-	list_add_tail(&ref->node.list, &rec->backrefs);
 	rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
 
 	return ref;
@@ -4587,7 +4575,6 @@ static struct data_backref *alloc_data_backref(struct extent_record *rec,
 	ref->bytes = max_size;
 	ref->found_ref = 0;
 	ref->num_refs = 0;
-	list_add_tail(&ref->node.list, &rec->backrefs);
 	rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
 	if (max_size > rec->max_size)
 		rec->max_size = max_size;
@@ -4621,12 +4608,12 @@ static void check_extent_type(struct extent_record *rec)
 	 * Check SYSTEM extent, as it's also marked as metadata, we can only
 	 * make sure it's a SYSTEM extent by its backref
 	 */
-	if (!list_empty(&rec->backrefs)) {
+	if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
 		struct extent_backref *node;
 		struct tree_backref *tback;
 		u64 bg_type;
 
-		node = to_extent_backref(rec->backrefs.next);
+		node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
 		if (node->is_data) {
 			/* tree block shouldn't have data backref */
 			rec->wrong_chunk_type = 1;
@@ -6479,7 +6466,7 @@ static int free_extent_hook(struct btrfs_trans_handle *trans,
 			back->node.found_extent_tree = 0;
 
 		if (!back->node.found_extent_tree && back->node.found_ref) {
-			list_del(&back->node.list);
+			rb_erase(&back->node.node, &rec->backref_tree);
 			free(back);
 		}
 	} else {
@@ -6498,7 +6485,7 @@ static int free_extent_hook(struct btrfs_trans_handle *trans,
 			back->node.found_extent_tree = 0;
 		}
 		if (!back->node.found_extent_tree && back->node.found_ref) {
-			list_del(&back->node.list);
+			rb_erase(&back->node.node, &rec->backref_tree);
 			free(back);
 		}
 	}
@@ -6935,7 +6922,7 @@ out:
 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
 			   struct extent_record *rec)
 {
-	struct extent_backref *back;
+	struct extent_backref *back, *tmp;
 	struct data_backref *dback;
 	struct extent_entry *entry, *best = NULL;
 	LIST_HEAD(entries);
@@ -6951,7 +6938,8 @@ static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
 	if (rec->metadata)
 		return 0;
 
-	list_for_each_entry(back, &rec->backrefs, list) {
+	rbtree_postorder_for_each_entry_safe(back, tmp,
+					     &rec->backref_tree, node) {
 		if (back->full_backref || !back->is_data)
 			continue;
 
@@ -7077,7 +7065,8 @@ static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
 	 * Ok great we all agreed on an extent record, let's go find the real
 	 * references and fix up the ones that don't match.
 	 */
-	list_for_each_entry(back, &rec->backrefs, list) {
+	rbtree_postorder_for_each_entry_safe(back, tmp,
+					     &rec->backref_tree, node) {
 		if (back->full_backref || !back->is_data)
 			continue;
 
@@ -7306,7 +7295,7 @@ static int find_possible_backrefs(struct btrfs_fs_info *info,
 				  struct extent_record *rec)
 {
 	struct btrfs_root *root;
-	struct extent_backref *back;
+	struct extent_backref *back, *tmp;
 	struct data_backref *dback;
 	struct cache_extent *cache;
 	struct btrfs_file_extent_item *fi;
@@ -7314,7 +7303,8 @@ static int find_possible_backrefs(struct btrfs_fs_info *info,
 	u64 bytenr, bytes;
 	int ret;
 
-	list_for_each_entry(back, &rec->backrefs, list) {
+	rbtree_postorder_for_each_entry_safe(back, tmp,
+					     &rec->backref_tree, node) {
 		/* Don't care about full backrefs (poor unloved backrefs) */
 		if (back->full_backref || !back->is_data)
 			continue;
@@ -7402,7 +7392,7 @@ static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
 {
 	struct btrfs_key key;
 	struct btrfs_root *dest_root;
-	struct extent_backref *back;
+	struct extent_backref *back, *tmp;
 	struct data_backref *dback;
 	struct orphan_data_extent *orphan;
 	struct btrfs_path *path;
@@ -7414,7 +7404,8 @@ static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
-	list_for_each_entry(back, &rec->backrefs, list) {
+	rbtree_postorder_for_each_entry_safe(back, tmp,
+					     &rec->backref_tree, node) {
 		if (back->full_backref || !back->is_data ||
 		    !back->found_extent_tree)
 			continue;
@@ -7481,9 +7472,8 @@ static int fixup_extent_refs(struct btrfs_fs_info *info,
 	struct btrfs_trans_handle *trans = NULL;
 	int ret;
 	struct btrfs_path *path;
-	struct list_head *cur = rec->backrefs.next;
 	struct cache_extent *cache;
-	struct extent_backref *back;
+	struct extent_backref *back, *tmp;
 	int allocated = 0;
 	u64 flags = 0;
 
@@ -7534,10 +7524,8 @@ static int fixup_extent_refs(struct btrfs_fs_info *info,
 	}
 
 	/* step three, recreate all the refs we did find */
-	while(cur != &rec->backrefs) {
-		back = to_extent_backref(cur);
-		cur = cur->next;
-
+	rbtree_postorder_for_each_entry_safe(back, tmp,
+					     &rec->backref_tree, node) {
 		/*
 		 * if we didn't find any references, don't create a
 		 * new extent record
-- 
2.7.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability
  2016-06-23 19:26 [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability jeffm
                   ` (2 preceding siblings ...)
  2016-06-23 19:26 ` [PATCH 3/3] btrfs-progs: check: switch to iterating over the backref_tree jeffm
@ 2016-06-23 21:24 ` Holger Hoffstätte
  2016-06-23 21:38   ` Jeff Mahoney
  2016-07-04 12:20 ` David Sterba
  4 siblings, 1 reply; 7+ messages in thread
From: Holger Hoffstätte @ 2016-06-23 21:24 UTC (permalink / raw)
  To: jeffm, linux-btrfs

On 06/23/16 21:26, jeffm@suse.com wrote:
> From: Jeff Mahoney <jeffm@suse.com>
> 
> While running xfstests generic/291, which creates a single file populated
> with reflinks to the same extent, I found that fsck had been running for
> hours.  perf top lead me to find_data_backref as the culprit, and a litte
> more digging made it clear: For every extent record we add, we iterate
> the entire list first.  My test case had ~2M records.  That math doesn't
> go well.
> 
> This patchset converts the extent_backref list to an rbtree.  The test
> that used to run for more than 8 hours without completing now takes
> less than 20 seconds.

Very interesting. Time for science!

unpatched:

holger>time btrfs check /dev/sdc1 
Checking filesystem on /dev/sdc1
..
btrfs check /dev/sdc1  17.03s user 3.79s system 25% cpu 1:22.82 total

patched:

holger>time btrfs check /dev/sdc1
Checking filesystem on /dev/sdc1
..
btrfs check /dev/sdc1  17.03s user 3.74s system 24% cpu 1:23.24 total

This is a 1TB disk with ~850GB data in 4 subvolumes, ~2 snapshots each.
I guess it only starts to matter (relative to the necessary I/O cost per
extent) when the level of sharing is higher, i.e. many more snapshots?

OTOH it doesn't explode, so that's good. :)

cheers
Holger


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability
  2016-06-23 21:24 ` [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability Holger Hoffstätte
@ 2016-06-23 21:38   ` Jeff Mahoney
  0 siblings, 0 replies; 7+ messages in thread
From: Jeff Mahoney @ 2016-06-23 21:38 UTC (permalink / raw)
  To: Holger Hoffstätte, linux-btrfs


[-- Attachment #1.1: Type: text/plain, Size: 2010 bytes --]

On 6/23/16 5:24 PM, Holger Hoffstätte wrote:
> On 06/23/16 21:26, jeffm@suse.com wrote:
>> From: Jeff Mahoney <jeffm@suse.com>

Hi Holger -

>> While running xfstests generic/291, which creates a single file populated

Whoops, this should've been generic/297.

>> with reflinks to the same extent, I found that fsck had been running for
>> hours.  perf top lead me to find_data_backref as the culprit, and a litte
>> more digging made it clear: For every extent record we add, we iterate
>> the entire list first.  My test case had ~2M records.  That math doesn't
>> go well.
>>
>> This patchset converts the extent_backref list to an rbtree.  The test
>> that used to run for more than 8 hours without completing now takes
>> less than 20 seconds.
> 
> Very interesting. Time for science!
> 
> unpatched:
> 
> holger>time btrfs check /dev/sdc1 
> Checking filesystem on /dev/sdc1
> ..
> btrfs check /dev/sdc1  17.03s user 3.79s system 25% cpu 1:22.82 total
> 
> patched:
> 
> holger>time btrfs check /dev/sdc1
> Checking filesystem on /dev/sdc1
> ..
> btrfs check /dev/sdc1  17.03s user 3.74s system 24% cpu 1:23.24 total
> 
> This is a 1TB disk with ~850GB data in 4 subvolumes, ~2 snapshots each.
> I guess it only starts to matter (relative to the necessary I/O cost per
> extent) when the level of sharing is higher, i.e. many more snapshots?

Thanks for testing.  This is exactly the result I wanted to see -- that
the impact on the regular case is minimal.  Once the number of reflinks
to a single extent rises significantly, the improvement is clear.  My
test case was an 8 GB file with every block referencing the first block
of the file.  You're welcome to try that test case as well, but I will
warn you *not* to run filefrag on it or you'll either have to wait a
really long time for it to complete or reboot your system.

> OTOH it doesn't explode, so that's good. :)

Hey, I'll take that feedback. :)

-Jeff

-- 
Jeff Mahoney
SUSE Labs


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 881 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability
  2016-06-23 19:26 [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability jeffm
                   ` (3 preceding siblings ...)
  2016-06-23 21:24 ` [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability Holger Hoffstätte
@ 2016-07-04 12:20 ` David Sterba
  4 siblings, 0 replies; 7+ messages in thread
From: David Sterba @ 2016-07-04 12:20 UTC (permalink / raw)
  To: jeffm; +Cc: linux-btrfs

On Thu, Jun 23, 2016 at 03:26:03PM -0400, jeffm@suse.com wrote:
> From: Jeff Mahoney <jeffm@suse.com>
> 
> While running xfstests generic/291, which creates a single file populated
> with reflinks to the same extent, I found that fsck had been running for
> hours.  perf top lead me to find_data_backref as the culprit, and a litte
> more digging made it clear: For every extent record we add, we iterate
> the entire list first.  My test case had ~2M records.  That math doesn't
> go well.
> 
> This patchset converts the extent_backref list to an rbtree.  The test
> that used to run for more than 8 hours without completing now takes
> less than 20 seconds.
> 
> -Jeff
> 
> ---
> 
> Jeff Mahoney (3):
>   btrfs-progs: check: add helpers for converting between structures
>   btrfs-progs: check: supplement extent backref list with rbtree
>   btrfs-progs: check: switch to iterating over the backref_tree

Applied (with some trivial fixups), thanks.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-07-04 12:20 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-23 19:26 [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability jeffm
2016-06-23 19:26 ` [PATCH 1/3] btrfs-progs: check: add helpers for converting between structures jeffm
2016-06-23 19:26 ` [PATCH 2/3] btrfs-progs: check: supplement extent backref list with rbtree jeffm
2016-06-23 19:26 ` [PATCH 3/3] btrfs-progs: check: switch to iterating over the backref_tree jeffm
2016-06-23 21:24 ` [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability Holger Hoffstätte
2016-06-23 21:38   ` Jeff Mahoney
2016-07-04 12:20 ` David Sterba

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.