* [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability
@ 2016-06-23 19:26 jeffm
2016-06-23 19:26 ` [PATCH 1/3] btrfs-progs: check: add helpers for converting between structures jeffm
` (4 more replies)
0 siblings, 5 replies; 7+ messages in thread
From: jeffm @ 2016-06-23 19:26 UTC (permalink / raw)
To: linux-btrfs
From: Jeff Mahoney <jeffm@suse.com>
While running xfstests generic/291, which creates a single file populated
with reflinks to the same extent, I found that fsck had been running for
hours. perf top lead me to find_data_backref as the culprit, and a litte
more digging made it clear: For every extent record we add, we iterate
the entire list first. My test case had ~2M records. That math doesn't
go well.
This patchset converts the extent_backref list to an rbtree. The test
that used to run for more than 8 hours without completing now takes
less than 20 seconds.
-Jeff
---
Jeff Mahoney (3):
btrfs-progs: check: add helpers for converting between structures
btrfs-progs: check: supplement extent backref list with rbtree
btrfs-progs: check: switch to iterating over the backref_tree
cmds-check.c | 355 +++++++++++++++++++++++++++++++++++++++--------------------
1 file changed, 237 insertions(+), 118 deletions(-)
--
2.7.1
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 1/3] btrfs-progs: check: add helpers for converting between structures
2016-06-23 19:26 [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability jeffm
@ 2016-06-23 19:26 ` jeffm
2016-06-23 19:26 ` [PATCH 2/3] btrfs-progs: check: supplement extent backref list with rbtree jeffm
` (3 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: jeffm @ 2016-06-23 19:26 UTC (permalink / raw)
To: linux-btrfs
From: Jeff Mahoney <jeffm@suse.com>
We either open code list_entry calls or outright cast between types. The
compiler will do the right thing if we use static inlines to do
typesafe conversions.
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
cmds-check.c | 100 +++++++++++++++++++++++++++++++++++++++--------------------
1 file changed, 66 insertions(+), 34 deletions(-)
diff --git a/cmds-check.c b/cmds-check.c
index ec0bbfd..a202a9d 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -84,6 +84,12 @@ struct extent_backref {
unsigned int broken:1;
};
+static inline struct extent_backref *
+to_extent_backref(struct list_head *entry)
+{
+ return list_entry(entry, struct extent_backref, list);
+}
+
struct data_backref {
struct extent_backref node;
union {
@@ -99,6 +105,12 @@ struct data_backref {
u32 found_ref;
};
+static inline struct data_backref *
+to_data_backref(struct extent_backref *back)
+{
+ return container_of(back, struct data_backref, node);
+}
+
/*
* Much like data_backref, just removed the undetermined members
* and change it to use list_head.
@@ -122,6 +134,12 @@ struct tree_backref {
};
};
+static inline struct tree_backref *
+to_tree_backref(struct extent_backref *back)
+{
+ return container_of(back, struct tree_backref, node);
+}
+
/* Explicit initialization for extent_record::flag_block_full_backref */
enum { FLAG_UNSET = 2 };
@@ -152,6 +170,12 @@ struct extent_record {
unsigned int wrong_chunk_type:1;
};
+static inline struct extent_record *
+to_extent_record(struct list_head *entry)
+{
+ return container_of(entry, struct extent_record, list);
+}
+
struct inode_backref {
struct list_head list;
unsigned int found_dir_item:1;
@@ -166,6 +190,12 @@ struct inode_backref {
char name[0];
};
+static inline struct inode_backref *
+to_inode_backref(struct list_head *entry)
+{
+ return list_entry(entry, struct inode_backref, list);
+}
+
struct root_item_record {
struct list_head list;
u64 objectid;
@@ -256,6 +286,12 @@ struct root_backref {
char name[0];
};
+static inline struct root_backref *
+to_root_backref(struct list_head *entry)
+{
+ return list_entry(entry, struct root_backref, list);
+}
+
struct root_record {
struct list_head backrefs;
struct cache_extent cache;
@@ -834,8 +870,7 @@ static void free_inode_rec(struct inode_record *rec)
return;
while (!list_empty(&rec->backrefs)) {
- backref = list_entry(rec->backrefs.next,
- struct inode_backref, list);
+ backref = to_inode_backref(rec->backrefs.next);
list_del(&backref->list);
free(backref);
}
@@ -1979,7 +2014,7 @@ static int check_root_dir(struct inode_record *rec)
goto out;
if (list_empty(&rec->backrefs))
goto out;
- backref = list_entry(rec->backrefs.next, struct inode_backref, list);
+ backref = to_inode_backref(rec->backrefs.next);
if (!backref->found_inode_ref)
goto out;
if (backref->index != 0 || backref->namelen != 2 ||
@@ -3116,8 +3151,7 @@ static void free_root_record(struct cache_extent *cache)
rec = container_of(cache, struct root_record, cache);
while (!list_empty(&rec->backrefs)) {
- backref = list_entry(rec->backrefs.next,
- struct root_backref, list);
+ backref = to_root_backref(rec->backrefs.next);
list_del(&backref->list);
free(backref);
}
@@ -3751,14 +3785,14 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs)
int err = 0;
while(cur != &rec->backrefs) {
- back = list_entry(cur, struct extent_backref, list);
+ back = to_extent_backref(cur);
cur = cur->next;
if (!back->found_extent_tree) {
err = 1;
if (!print_errs)
goto out;
if (back->is_data) {
- dback = (struct data_backref *)back;
+ dback = to_data_backref(back);
fprintf(stderr, "Backref %llu %s %llu"
" owner %llu offset %llu num_refs %lu"
" not found in extent tree\n",
@@ -3772,7 +3806,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs)
(unsigned long long)dback->offset,
(unsigned long)dback->num_refs);
} else {
- tback = (struct tree_backref *)back;
+ tback = to_tree_backref(back);
fprintf(stderr, "Backref %llu parent %llu"
" root %llu not found in extent tree\n",
(unsigned long long)rec->start,
@@ -3784,7 +3818,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs)
err = 1;
if (!print_errs)
goto out;
- tback = (struct tree_backref *)back;
+ tback = to_tree_backref(back);
fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n",
(unsigned long long)rec->start,
back->full_backref ? "parent" : "root",
@@ -3793,7 +3827,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs)
(unsigned long long)tback->root, back);
}
if (back->is_data) {
- dback = (struct data_backref *)back;
+ dback = to_data_backref(back);
if (dback->found_ref != dback->num_refs) {
err = 1;
if (!print_errs)
@@ -3837,7 +3871,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs)
if (!back->is_data) {
found += 1;
} else {
- dback = (struct data_backref *)back;
+ dback = to_data_backref(back);
found += dback->found_ref;
}
}
@@ -3861,7 +3895,7 @@ static int free_all_extent_backrefs(struct extent_record *rec)
struct list_head *cur;
while (!list_empty(&rec->backrefs)) {
cur = rec->backrefs.next;
- back = list_entry(cur, struct extent_backref, list);
+ back = to_extent_backref(cur);
list_del(cur);
free(back);
}
@@ -3922,7 +3956,7 @@ static int check_owner_ref(struct btrfs_root *root,
continue;
if (node->full_backref)
continue;
- back = (struct tree_backref *)node;
+ back = to_tree_backref(node);
if (btrfs_header_owner(buf) == back->root)
return 0;
}
@@ -3966,11 +4000,11 @@ static int is_extent_tree_record(struct extent_record *rec)
int is_extent = 0;
while(cur != &rec->backrefs) {
- node = list_entry(cur, struct extent_backref, list);
+ node = to_extent_backref(cur);
cur = cur->next;
if (node->is_data)
return 0;
- back = (struct tree_backref *)node;
+ back = to_tree_backref(node);
if (node->full_backref)
return 0;
if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
@@ -4353,11 +4387,11 @@ static struct tree_backref *find_tree_backref(struct extent_record *rec,
struct tree_backref *back;
while(cur != &rec->backrefs) {
- node = list_entry(cur, struct extent_backref, list);
+ node = to_extent_backref(cur);
cur = cur->next;
if (node->is_data)
continue;
- back = (struct tree_backref *)node;
+ back = to_tree_backref(node);
if (parent > 0) {
if (!node->full_backref)
continue;
@@ -4404,11 +4438,11 @@ static struct data_backref *find_data_backref(struct extent_record *rec,
struct data_backref *back;
while(cur != &rec->backrefs) {
- node = list_entry(cur, struct extent_backref, list);
+ node = to_extent_backref(cur);
cur = cur->next;
if (!node->is_data)
continue;
- back = (struct data_backref *)node;
+ back = to_data_backref(node);
if (parent > 0) {
if (!node->full_backref)
continue;
@@ -4494,8 +4528,7 @@ static void check_extent_type(struct extent_record *rec)
struct tree_backref *tback;
u64 bg_type;
- node = list_entry(rec->backrefs.next, struct extent_backref,
- list);
+ node = to_extent_backref(rec->backrefs.next);
if (node->is_data) {
/* tree block shouldn't have data backref */
rec->wrong_chunk_type = 1;
@@ -6507,7 +6540,7 @@ static int record_extent(struct btrfs_trans_handle *trans,
} else {
struct btrfs_disk_key copy_key;;
- tback = (struct tree_backref *)back;
+ tback = to_tree_backref(back);
bi = (struct btrfs_tree_block_info *)(ei + 1);
memset_extent_buffer(leaf, 0, (unsigned long)bi,
sizeof(*bi));
@@ -6536,7 +6569,7 @@ static int record_extent(struct btrfs_trans_handle *trans,
u64 parent;
int i;
- dback = (struct data_backref *)back;
+ dback = to_data_backref(back);
if (back->full_backref)
parent = dback->parent;
else
@@ -6574,7 +6607,7 @@ static int record_extent(struct btrfs_trans_handle *trans,
} else {
u64 parent;
- tback = (struct tree_backref *)back;
+ tback = to_tree_backref(back);
if (back->full_backref)
parent = tback->parent;
else
@@ -6823,7 +6856,7 @@ static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
if (back->full_backref || !back->is_data)
continue;
- dback = (struct data_backref *)back;
+ dback = to_data_backref(back);
/*
* We only pay attention to backrefs that we found a real
@@ -6949,7 +6982,7 @@ static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
if (back->full_backref || !back->is_data)
continue;
- dback = (struct data_backref *)back;
+ dback = to_data_backref(back);
/*
* Still ignoring backrefs that don't have a real ref attached
@@ -7013,7 +7046,7 @@ static int process_duplicates(struct btrfs_root *root,
*/
remove_cache_extent(extent_cache, &rec->cache);
- good = list_entry(rec->dups.next, struct extent_record, list);
+ good = to_extent_record(rec->dups.next);
list_del_init(&good->list);
INIT_LIST_HEAD(&good->backrefs);
INIT_LIST_HEAD(&good->dups);
@@ -7147,7 +7180,7 @@ static int delete_duplicate_records(struct btrfs_root *root,
ret = err;
out:
while (!list_empty(&delete_list)) {
- tmp = list_entry(delete_list.next, struct extent_record, list);
+ tmp = to_extent_record(delete_list.next);
list_del_init(&tmp->list);
if (tmp == rec)
continue;
@@ -7155,7 +7188,7 @@ out:
}
while (!list_empty(&rec->dups)) {
- tmp = list_entry(rec->dups.next, struct extent_record, list);
+ tmp = to_extent_record(rec->dups.next);
list_del_init(&tmp->list);
free(tmp);
}
@@ -7187,7 +7220,7 @@ static int find_possible_backrefs(struct btrfs_fs_info *info,
if (back->full_backref || !back->is_data)
continue;
- dback = (struct data_backref *)back;
+ dback = to_data_backref(back);
/* We found this one, we don't need to do a lookup */
if (dback->found_ref)
@@ -7286,7 +7319,7 @@ static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
if (back->full_backref || !back->is_data ||
!back->found_extent_tree)
continue;
- dback = (struct data_backref *)back;
+ dback = to_data_backref(back);
if (dback->found_ref)
continue;
key.objectid = dback->root;
@@ -7403,7 +7436,7 @@ static int fixup_extent_refs(struct btrfs_fs_info *info,
/* step three, recreate all the refs we did find */
while(cur != &rec->backrefs) {
- back = list_entry(cur, struct extent_backref, list);
+ back = to_extent_backref(cur);
cur = cur->next;
/*
@@ -7660,8 +7693,7 @@ static int check_extent_refs(struct btrfs_root *root,
* belong to a different extent item and not the weird duplicate one.
*/
while (repair && !list_empty(&duplicate_extents)) {
- rec = list_entry(duplicate_extents.next, struct extent_record,
- list);
+ rec = to_extent_record(duplicate_extents.next);
list_del_init(&rec->list);
/* Sometimes we can find a backref before we find an actual
--
2.7.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/3] btrfs-progs: check: supplement extent backref list with rbtree
2016-06-23 19:26 [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability jeffm
2016-06-23 19:26 ` [PATCH 1/3] btrfs-progs: check: add helpers for converting between structures jeffm
@ 2016-06-23 19:26 ` jeffm
2016-06-23 19:26 ` [PATCH 3/3] btrfs-progs: check: switch to iterating over the backref_tree jeffm
` (2 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: jeffm @ 2016-06-23 19:26 UTC (permalink / raw)
To: linux-btrfs
From: Jeff Mahoney <jeffm@suse.com>
For the pathlogical case, like xfstests generic/297 that creates a
large file consisting of one, repeating reflinked extent, fsck can
take hours. The root cause is that calling find_data_backref while
iterating the extent records is an O(n^2) algorithm. For my
example test run, n was 2*2^20 and fsck was at 8 hours and counting.
This patch supplements the list with an rbtree and drops the runtime
of that testcase to about 20 seconds.
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
cmds-check.c | 199 ++++++++++++++++++++++++++++++++++++++++++++---------------
1 file changed, 149 insertions(+), 50 deletions(-)
diff --git a/cmds-check.c b/cmds-check.c
index a202a9d..4785f00 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -77,6 +77,7 @@ static struct cache_tree *roots_info_cache = NULL;
struct extent_backref {
struct list_head list;
+ struct rb_node node;
unsigned int is_data:1;
unsigned int found_extent_tree:1;
unsigned int full_backref:1;
@@ -90,6 +91,12 @@ to_extent_backref(struct list_head *entry)
return list_entry(entry, struct extent_backref, list);
}
+static inline struct extent_backref *
+rb_node_to_extent_backref(struct rb_node *node)
+{
+ return rb_entry(node, struct extent_backref, node);
+}
+
struct data_backref {
struct extent_backref node;
union {
@@ -111,6 +118,57 @@ to_data_backref(struct extent_backref *back)
return container_of(back, struct data_backref, node);
}
+static int compare_data_backref(struct rb_node *node1,
+ struct rb_node *node2)
+{
+ struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
+ struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
+ struct data_backref *back1 = to_data_backref(ext1);
+ struct data_backref *back2 = to_data_backref(ext2);
+
+ WARN_ON(!ext1->is_data);
+ WARN_ON(!ext2->is_data);
+
+ /* parent and root are a union, so this covers both */
+ if (back1->parent > back2->parent)
+ return 1;
+ if (back1->parent < back2->parent)
+ return -1;
+
+ /* This is a full backref and the parents match. */
+ if (back1->node.full_backref)
+ return 0;
+
+ if (back1->owner > back2->owner)
+ return 1;
+ if (back1->owner < back2->owner)
+ return -1;
+
+ if (back1->offset > back2->offset)
+ return 1;
+ if (back1->offset < back2->offset)
+ return -1;
+
+ if (back1->bytes > back2->bytes)
+ return 1;
+ if (back1->bytes < back2->bytes)
+ return -1;
+
+ if (back1->found_ref && back2->found_ref) {
+ if (back1->disk_bytenr > back2->disk_bytenr)
+ return 1;
+ if (back1->disk_bytenr < back2->disk_bytenr)
+ return -1;
+
+ if (back1->found_ref > back2->found_ref)
+ return 1;
+ if (back1->found_ref < back2->found_ref)
+ return -1;
+ }
+
+ return 0;
+}
+
/*
* Much like data_backref, just removed the undetermined members
* and change it to use list_head.
@@ -140,12 +198,56 @@ to_tree_backref(struct extent_backref *back)
return container_of(back, struct tree_backref, node);
}
+static int compare_tree_backref(struct rb_node *node1,
+ struct rb_node *node2)
+{
+ struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
+ struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
+ struct tree_backref *back1 = to_tree_backref(ext1);
+ struct tree_backref *back2 = to_tree_backref(ext2);
+
+ WARN_ON(ext1->is_data);
+ WARN_ON(ext2->is_data);
+
+ /* parent and root are a union, so this covers both */
+ if (back1->parent > back2->parent)
+ return 1;
+ if (back1->parent < back2->parent)
+ return -1;
+
+ return 0;
+}
+
+static int compare_extent_backref(struct rb_node *node1,
+ struct rb_node *node2)
+{
+ struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
+ struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
+
+ if (ext1->is_data > ext2->is_data)
+ return 1;
+
+ if (ext1->is_data < ext2->is_data)
+ return -1;
+
+ if (ext1->full_backref > ext2->full_backref)
+ return 1;
+ if (ext1->full_backref < ext2->full_backref)
+ return -1;
+
+ if (ext1->is_data)
+ return compare_data_backref(node1, node2);
+ else
+ return compare_tree_backref(node1, node2);
+}
+
/* Explicit initialization for extent_record::flag_block_full_backref */
enum { FLAG_UNSET = 2 };
struct extent_record {
struct list_head backrefs;
struct list_head dups;
+ struct rb_root backref_tree;
struct list_head list;
struct cache_extent cache;
struct btrfs_disk_key parent_key;
@@ -4379,32 +4481,30 @@ static int check_block(struct btrfs_root *root,
return ret;
}
+
static struct tree_backref *find_tree_backref(struct extent_record *rec,
u64 parent, u64 root)
{
- struct list_head *cur = rec->backrefs.next;
- struct extent_backref *node;
- struct tree_backref *back;
+ struct rb_node *node;
+ struct tree_backref *back = NULL;
+ struct tree_backref match = {
+ .node = {
+ .is_data = 0,
+ },
+ };
- while(cur != &rec->backrefs) {
- node = to_extent_backref(cur);
- cur = cur->next;
- if (node->is_data)
- continue;
- back = to_tree_backref(node);
- if (parent > 0) {
- if (!node->full_backref)
- continue;
- if (parent == back->parent)
- return back;
- } else {
- if (node->full_backref)
- continue;
- if (back->root == root)
- return back;
- }
- }
- return NULL;
+ if (parent) {
+ match.parent = parent;
+ match.node.full_backref = 1;
+ } else
+ match.root = root;
+
+ node = rb_search(&rec->backref_tree, &match.node.node,
+ (rb_compare_keys)compare_extent_backref, NULL);
+ if (node)
+ back = to_tree_backref(rb_node_to_extent_backref(node));
+
+ return back;
}
static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
@@ -4423,6 +4523,7 @@ static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
ref->node.full_backref = 0;
}
list_add_tail(&ref->node.list, &rec->backrefs);
+ rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
return ref;
}
@@ -4433,35 +4534,31 @@ static struct data_backref *find_data_backref(struct extent_record *rec,
int found_ref,
u64 disk_bytenr, u64 bytes)
{
- struct list_head *cur = rec->backrefs.next;
- struct extent_backref *node;
- struct data_backref *back;
+ struct rb_node *node;
+ struct data_backref *back = NULL;
+ struct data_backref match = {
+ .node = {
+ .is_data = 1,
+ },
+ .owner = owner,
+ .offset = offset,
+ .bytes = bytes,
+ .found_ref = found_ref,
+ .disk_bytenr = disk_bytenr,
+ };
- while(cur != &rec->backrefs) {
- node = to_extent_backref(cur);
- cur = cur->next;
- if (!node->is_data)
- continue;
- back = to_data_backref(node);
- if (parent > 0) {
- if (!node->full_backref)
- continue;
- if (parent == back->parent)
- return back;
- } else {
- if (node->full_backref)
- continue;
- if (back->root == root && back->owner == owner &&
- back->offset == offset) {
- if (found_ref && node->found_ref &&
- (back->bytes != bytes ||
- back->disk_bytenr != disk_bytenr))
- continue;
- return back;
- }
- }
- }
- return NULL;
+ if (parent) {
+ match.parent = parent;
+ match.node.full_backref = 1;
+ } else
+ match.root = root;
+
+ node = rb_search(&rec->backref_tree, &match.node.node,
+ (rb_compare_keys)compare_extent_backref, NULL);
+ if (node)
+ back = to_data_backref(rb_node_to_extent_backref(node));
+
+ return back;
}
static struct data_backref *alloc_data_backref(struct extent_record *rec,
@@ -4491,6 +4588,7 @@ static struct data_backref *alloc_data_backref(struct extent_record *rec,
ref->found_ref = 0;
ref->num_refs = 0;
list_add_tail(&ref->node.list, &rec->backrefs);
+ rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
if (max_size > rec->max_size)
rec->max_size = max_size;
return ref;
@@ -4578,6 +4676,7 @@ static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
INIT_LIST_HEAD(&rec->backrefs);
INIT_LIST_HEAD(&rec->dups);
INIT_LIST_HEAD(&rec->list);
+ rec->backref_tree = RB_ROOT;
memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
rec->cache.start = tmpl->start;
rec->cache.size = tmpl->nr;
--
2.7.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/3] btrfs-progs: check: switch to iterating over the backref_tree
2016-06-23 19:26 [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability jeffm
2016-06-23 19:26 ` [PATCH 1/3] btrfs-progs: check: add helpers for converting between structures jeffm
2016-06-23 19:26 ` [PATCH 2/3] btrfs-progs: check: supplement extent backref list with rbtree jeffm
@ 2016-06-23 19:26 ` jeffm
2016-06-23 21:24 ` [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability Holger Hoffstätte
2016-07-04 12:20 ` David Sterba
4 siblings, 0 replies; 7+ messages in thread
From: jeffm @ 2016-06-23 19:26 UTC (permalink / raw)
To: linux-btrfs
From: Jeff Mahoney <jeffm@suse.com>
We now have two data structures that can be used to iterate the same data
set, and there may be quite a few of them in memory. Eliminating the
list_head member will reduce memory consumption while iterating over
the extent backrefs.
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
cmds-check.c | 88 ++++++++++++++++++++++++++----------------------------------
1 file changed, 38 insertions(+), 50 deletions(-)
diff --git a/cmds-check.c b/cmds-check.c
index 4785f00..6434857 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -76,7 +76,6 @@ static struct task_ctx ctx = { 0 };
static struct cache_tree *roots_info_cache = NULL;
struct extent_backref {
- struct list_head list;
struct rb_node node;
unsigned int is_data:1;
unsigned int found_extent_tree:1;
@@ -86,12 +85,6 @@ struct extent_backref {
};
static inline struct extent_backref *
-to_extent_backref(struct list_head *entry)
-{
- return list_entry(entry, struct extent_backref, list);
-}
-
-static inline struct extent_backref *
rb_node_to_extent_backref(struct rb_node *node)
{
return rb_entry(node, struct extent_backref, node);
@@ -3879,16 +3872,15 @@ out:
static int all_backpointers_checked(struct extent_record *rec, int print_errs)
{
- struct list_head *cur = rec->backrefs.next;
+ struct rb_node *n;
struct extent_backref *back;
struct tree_backref *tback;
struct data_backref *dback;
u64 found = 0;
int err = 0;
- while(cur != &rec->backrefs) {
- back = to_extent_backref(cur);
- cur = cur->next;
+ for (n = rb_first(&rec->backref_tree); n; n = rb_next(n)) {
+ back = rb_node_to_extent_backref(n);
if (!back->found_extent_tree) {
err = 1;
if (!print_errs)
@@ -3991,17 +3983,15 @@ out:
return err;
}
-static int free_all_extent_backrefs(struct extent_record *rec)
+static void __free_one_backref(struct rb_node *node)
{
- struct extent_backref *back;
- struct list_head *cur;
- while (!list_empty(&rec->backrefs)) {
- cur = rec->backrefs.next;
- back = to_extent_backref(cur);
- list_del(cur);
- free(back);
- }
- return 0;
+ struct extent_backref *back = rb_node_to_extent_backref(node);
+ free(back);
+}
+
+static void free_all_extent_backrefs(struct extent_record *rec)
+{
+ rb_free_nodes(&rec->backref_tree, __free_one_backref);
}
static void free_extent_record_cache(struct btrfs_fs_info *fs_info,
@@ -4041,7 +4031,7 @@ static int check_owner_ref(struct btrfs_root *root,
struct extent_record *rec,
struct extent_buffer *buf)
{
- struct extent_backref *node;
+ struct extent_backref *node, *tmp;
struct tree_backref *back;
struct btrfs_root *ref_root;
struct btrfs_key key;
@@ -4051,7 +4041,8 @@ static int check_owner_ref(struct btrfs_root *root,
int found = 0;
int ret;
- list_for_each_entry(node, &rec->backrefs, list) {
+ rbtree_postorder_for_each_entry_safe(node, tmp,
+ &rec->backref_tree, node) {
if (node->is_data)
continue;
if (!node->found_ref)
@@ -4096,18 +4087,16 @@ static int check_owner_ref(struct btrfs_root *root,
static int is_extent_tree_record(struct extent_record *rec)
{
- struct list_head *cur = rec->backrefs.next;
- struct extent_backref *node;
+ struct extent_backref *ref, *tmp;
struct tree_backref *back;
int is_extent = 0;
- while(cur != &rec->backrefs) {
- node = to_extent_backref(cur);
- cur = cur->next;
- if (node->is_data)
+ rbtree_postorder_for_each_entry_safe(ref, tmp,
+ &rec->backref_tree, node) {
+ if (ref->is_data)
return 0;
- back = to_tree_backref(node);
- if (node->full_backref)
+ back = to_tree_backref(ref);
+ if (ref->full_backref)
return 0;
if (back->root == BTRFS_EXTENT_TREE_OBJECTID)
is_extent = 1;
@@ -4522,7 +4511,6 @@ static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
ref->root = root;
ref->node.full_backref = 0;
}
- list_add_tail(&ref->node.list, &rec->backrefs);
rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
return ref;
@@ -4587,7 +4575,6 @@ static struct data_backref *alloc_data_backref(struct extent_record *rec,
ref->bytes = max_size;
ref->found_ref = 0;
ref->num_refs = 0;
- list_add_tail(&ref->node.list, &rec->backrefs);
rb_insert(&rec->backref_tree, &ref->node.node, compare_extent_backref);
if (max_size > rec->max_size)
rec->max_size = max_size;
@@ -4621,12 +4608,12 @@ static void check_extent_type(struct extent_record *rec)
* Check SYSTEM extent, as it's also marked as metadata, we can only
* make sure it's a SYSTEM extent by its backref
*/
- if (!list_empty(&rec->backrefs)) {
+ if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
struct extent_backref *node;
struct tree_backref *tback;
u64 bg_type;
- node = to_extent_backref(rec->backrefs.next);
+ node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
if (node->is_data) {
/* tree block shouldn't have data backref */
rec->wrong_chunk_type = 1;
@@ -6479,7 +6466,7 @@ static int free_extent_hook(struct btrfs_trans_handle *trans,
back->node.found_extent_tree = 0;
if (!back->node.found_extent_tree && back->node.found_ref) {
- list_del(&back->node.list);
+ rb_erase(&back->node.node, &rec->backref_tree);
free(back);
}
} else {
@@ -6498,7 +6485,7 @@ static int free_extent_hook(struct btrfs_trans_handle *trans,
back->node.found_extent_tree = 0;
}
if (!back->node.found_extent_tree && back->node.found_ref) {
- list_del(&back->node.list);
+ rb_erase(&back->node.node, &rec->backref_tree);
free(back);
}
}
@@ -6935,7 +6922,7 @@ out:
static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
struct extent_record *rec)
{
- struct extent_backref *back;
+ struct extent_backref *back, *tmp;
struct data_backref *dback;
struct extent_entry *entry, *best = NULL;
LIST_HEAD(entries);
@@ -6951,7 +6938,8 @@ static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
if (rec->metadata)
return 0;
- list_for_each_entry(back, &rec->backrefs, list) {
+ rbtree_postorder_for_each_entry_safe(back, tmp,
+ &rec->backref_tree, node) {
if (back->full_backref || !back->is_data)
continue;
@@ -7077,7 +7065,8 @@ static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
* Ok great we all agreed on an extent record, let's go find the real
* references and fix up the ones that don't match.
*/
- list_for_each_entry(back, &rec->backrefs, list) {
+ rbtree_postorder_for_each_entry_safe(back, tmp,
+ &rec->backref_tree, node) {
if (back->full_backref || !back->is_data)
continue;
@@ -7306,7 +7295,7 @@ static int find_possible_backrefs(struct btrfs_fs_info *info,
struct extent_record *rec)
{
struct btrfs_root *root;
- struct extent_backref *back;
+ struct extent_backref *back, *tmp;
struct data_backref *dback;
struct cache_extent *cache;
struct btrfs_file_extent_item *fi;
@@ -7314,7 +7303,8 @@ static int find_possible_backrefs(struct btrfs_fs_info *info,
u64 bytenr, bytes;
int ret;
- list_for_each_entry(back, &rec->backrefs, list) {
+ rbtree_postorder_for_each_entry_safe(back, tmp,
+ &rec->backref_tree, node) {
/* Don't care about full backrefs (poor unloved backrefs) */
if (back->full_backref || !back->is_data)
continue;
@@ -7402,7 +7392,7 @@ static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
{
struct btrfs_key key;
struct btrfs_root *dest_root;
- struct extent_backref *back;
+ struct extent_backref *back, *tmp;
struct data_backref *dback;
struct orphan_data_extent *orphan;
struct btrfs_path *path;
@@ -7414,7 +7404,8 @@ static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- list_for_each_entry(back, &rec->backrefs, list) {
+ rbtree_postorder_for_each_entry_safe(back, tmp,
+ &rec->backref_tree, node) {
if (back->full_backref || !back->is_data ||
!back->found_extent_tree)
continue;
@@ -7481,9 +7472,8 @@ static int fixup_extent_refs(struct btrfs_fs_info *info,
struct btrfs_trans_handle *trans = NULL;
int ret;
struct btrfs_path *path;
- struct list_head *cur = rec->backrefs.next;
struct cache_extent *cache;
- struct extent_backref *back;
+ struct extent_backref *back, *tmp;
int allocated = 0;
u64 flags = 0;
@@ -7534,10 +7524,8 @@ static int fixup_extent_refs(struct btrfs_fs_info *info,
}
/* step three, recreate all the refs we did find */
- while(cur != &rec->backrefs) {
- back = to_extent_backref(cur);
- cur = cur->next;
-
+ rbtree_postorder_for_each_entry_safe(back, tmp,
+ &rec->backref_tree, node) {
/*
* if we didn't find any references, don't create a
* new extent record
--
2.7.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability
2016-06-23 19:26 [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability jeffm
` (2 preceding siblings ...)
2016-06-23 19:26 ` [PATCH 3/3] btrfs-progs: check: switch to iterating over the backref_tree jeffm
@ 2016-06-23 21:24 ` Holger Hoffstätte
2016-06-23 21:38 ` Jeff Mahoney
2016-07-04 12:20 ` David Sterba
4 siblings, 1 reply; 7+ messages in thread
From: Holger Hoffstätte @ 2016-06-23 21:24 UTC (permalink / raw)
To: jeffm, linux-btrfs
On 06/23/16 21:26, jeffm@suse.com wrote:
> From: Jeff Mahoney <jeffm@suse.com>
>
> While running xfstests generic/291, which creates a single file populated
> with reflinks to the same extent, I found that fsck had been running for
> hours. perf top lead me to find_data_backref as the culprit, and a litte
> more digging made it clear: For every extent record we add, we iterate
> the entire list first. My test case had ~2M records. That math doesn't
> go well.
>
> This patchset converts the extent_backref list to an rbtree. The test
> that used to run for more than 8 hours without completing now takes
> less than 20 seconds.
Very interesting. Time for science!
unpatched:
holger>time btrfs check /dev/sdc1
Checking filesystem on /dev/sdc1
..
btrfs check /dev/sdc1 17.03s user 3.79s system 25% cpu 1:22.82 total
patched:
holger>time btrfs check /dev/sdc1
Checking filesystem on /dev/sdc1
..
btrfs check /dev/sdc1 17.03s user 3.74s system 24% cpu 1:23.24 total
This is a 1TB disk with ~850GB data in 4 subvolumes, ~2 snapshots each.
I guess it only starts to matter (relative to the necessary I/O cost per
extent) when the level of sharing is higher, i.e. many more snapshots?
OTOH it doesn't explode, so that's good. :)
cheers
Holger
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability
2016-06-23 21:24 ` [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability Holger Hoffstätte
@ 2016-06-23 21:38 ` Jeff Mahoney
0 siblings, 0 replies; 7+ messages in thread
From: Jeff Mahoney @ 2016-06-23 21:38 UTC (permalink / raw)
To: Holger Hoffstätte, linux-btrfs
[-- Attachment #1.1: Type: text/plain, Size: 2010 bytes --]
On 6/23/16 5:24 PM, Holger Hoffstätte wrote:
> On 06/23/16 21:26, jeffm@suse.com wrote:
>> From: Jeff Mahoney <jeffm@suse.com>
Hi Holger -
>> While running xfstests generic/291, which creates a single file populated
Whoops, this should've been generic/297.
>> with reflinks to the same extent, I found that fsck had been running for
>> hours. perf top lead me to find_data_backref as the culprit, and a litte
>> more digging made it clear: For every extent record we add, we iterate
>> the entire list first. My test case had ~2M records. That math doesn't
>> go well.
>>
>> This patchset converts the extent_backref list to an rbtree. The test
>> that used to run for more than 8 hours without completing now takes
>> less than 20 seconds.
>
> Very interesting. Time for science!
>
> unpatched:
>
> holger>time btrfs check /dev/sdc1
> Checking filesystem on /dev/sdc1
> ..
> btrfs check /dev/sdc1 17.03s user 3.79s system 25% cpu 1:22.82 total
>
> patched:
>
> holger>time btrfs check /dev/sdc1
> Checking filesystem on /dev/sdc1
> ..
> btrfs check /dev/sdc1 17.03s user 3.74s system 24% cpu 1:23.24 total
>
> This is a 1TB disk with ~850GB data in 4 subvolumes, ~2 snapshots each.
> I guess it only starts to matter (relative to the necessary I/O cost per
> extent) when the level of sharing is higher, i.e. many more snapshots?
Thanks for testing. This is exactly the result I wanted to see -- that
the impact on the regular case is minimal. Once the number of reflinks
to a single extent rises significantly, the improvement is clear. My
test case was an 8 GB file with every block referencing the first block
of the file. You're welcome to try that test case as well, but I will
warn you *not* to run filefrag on it or you'll either have to wait a
really long time for it to complete or reboot your system.
> OTOH it doesn't explode, so that's good. :)
Hey, I'll take that feedback. :)
-Jeff
--
Jeff Mahoney
SUSE Labs
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 881 bytes --]
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability
2016-06-23 19:26 [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability jeffm
` (3 preceding siblings ...)
2016-06-23 21:24 ` [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability Holger Hoffstätte
@ 2016-07-04 12:20 ` David Sterba
4 siblings, 0 replies; 7+ messages in thread
From: David Sterba @ 2016-07-04 12:20 UTC (permalink / raw)
To: jeffm; +Cc: linux-btrfs
On Thu, Jun 23, 2016 at 03:26:03PM -0400, jeffm@suse.com wrote:
> From: Jeff Mahoney <jeffm@suse.com>
>
> While running xfstests generic/291, which creates a single file populated
> with reflinks to the same extent, I found that fsck had been running for
> hours. perf top lead me to find_data_backref as the culprit, and a litte
> more digging made it clear: For every extent record we add, we iterate
> the entire list first. My test case had ~2M records. That math doesn't
> go well.
>
> This patchset converts the extent_backref list to an rbtree. The test
> that used to run for more than 8 hours without completing now takes
> less than 20 seconds.
>
> -Jeff
>
> ---
>
> Jeff Mahoney (3):
> btrfs-progs: check: add helpers for converting between structures
> btrfs-progs: check: supplement extent backref list with rbtree
> btrfs-progs: check: switch to iterating over the backref_tree
Applied (with some trivial fixups), thanks.
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2016-07-04 12:20 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-23 19:26 [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability jeffm
2016-06-23 19:26 ` [PATCH 1/3] btrfs-progs: check: add helpers for converting between structures jeffm
2016-06-23 19:26 ` [PATCH 2/3] btrfs-progs: check: supplement extent backref list with rbtree jeffm
2016-06-23 19:26 ` [PATCH 3/3] btrfs-progs: check: switch to iterating over the backref_tree jeffm
2016-06-23 21:24 ` [PATCH 0/3] btrfs-progs: check improve 'checking extents' scalability Holger Hoffstätte
2016-06-23 21:38 ` Jeff Mahoney
2016-07-04 12:20 ` David Sterba
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.