All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/7] btrfs-progs: check: supplement extent backref list with rbtree
@ 2017-07-25 20:51 jeffm
  2017-07-25 20:51 ` [PATCH 2/7] btrfs-progs: check: switch to iterating over the backref_tree jeffm
                   ` (6 more replies)
  0 siblings, 7 replies; 14+ messages in thread
From: jeffm @ 2017-07-25 20:51 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Jeff Mahoney

From: Jeff Mahoney <jeffm@suse.com>

For the pathlogical case, like xfstests generic/297 that creates a
large file consisting of one, repeating reflinked extent, fsck can
take hours.  The root cause is that calling find_data_backref while
iterating the extent records is an O(n^2) algorithm.  For my
example test run, n was 2*2^20 and fsck was at 8 hours and counting.

This patch supplements the list with an rbtree and drops the runtime
of that testcase to about 20 seconds.

A previous version of this patch introduced a regression that would
have corrupted file systems during repair.  It was traced to the
compare algorithm honoring ->bytes regardless of whether the
reference had been found and a failure to reinsert nodes after
the target reference was found.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
 cmds-check.c | 184 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 180 insertions(+), 4 deletions(-)

diff --git a/cmds-check.c b/cmds-check.c
index 23adc03..6a04553 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -87,6 +87,7 @@ static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
 
 struct extent_backref {
 	struct list_head list;
+	struct rb_node node;
 	unsigned int is_data:1;
 	unsigned int found_extent_tree:1;
 	unsigned int full_backref:1;
@@ -99,6 +100,11 @@ static inline struct extent_backref* to_extent_backref(struct list_head *entry)
 	return list_entry(entry, struct extent_backref, list);
 }
 
+static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
+{
+	return rb_entry(node, struct extent_backref, node);
+}
+
 struct data_backref {
 	struct extent_backref node;
 	union {
@@ -137,6 +143,51 @@ static inline struct data_backref* to_data_backref(struct extent_backref *back)
 	return container_of(back, struct data_backref, node);
 }
 
+static int compare_data_backref(struct rb_node *node1, struct rb_node *node2)
+{
+	struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
+	struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
+	struct data_backref *back1 = to_data_backref(ext1);
+	struct data_backref *back2 = to_data_backref(ext2);
+
+	WARN_ON(!ext1->is_data);
+	WARN_ON(!ext2->is_data);
+
+	/* parent and root are a union, so this covers both */
+	if (back1->parent > back2->parent)
+		return 1;
+	if (back1->parent < back2->parent)
+		return -1;
+
+	/* This is a full backref and the parents match. */
+	if (back1->node.full_backref)
+		return 0;
+
+	if (back1->owner > back2->owner)
+		return 1;
+	if (back1->owner < back2->owner)
+		return -1;
+
+	if (back1->offset > back2->offset)
+		return 1;
+	if (back1->offset < back2->offset)
+		return -1;
+
+	if (back1->found_ref && back2->found_ref) {
+		if (back1->disk_bytenr > back2->disk_bytenr)
+			return 1;
+		if (back1->disk_bytenr < back2->disk_bytenr)
+			return -1;
+
+		if (back1->bytes > back2->bytes)
+			return 1;
+		if (back1->bytes < back2->bytes)
+			return -1;
+	}
+
+	return 0;
+}
+
 /*
  * Much like data_backref, just removed the undetermined members
  * and change it to use list_head.
@@ -165,12 +216,54 @@ static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
 	return container_of(back, struct tree_backref, node);
 }
 
+static int compare_tree_backref(struct rb_node *node1, struct rb_node *node2)
+{
+	struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
+	struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
+	struct tree_backref *back1 = to_tree_backref(ext1);
+	struct tree_backref *back2 = to_tree_backref(ext2);
+
+	WARN_ON(ext1->is_data);
+	WARN_ON(ext2->is_data);
+
+	/* parent and root are a union, so this covers both */
+	if (back1->parent > back2->parent)
+		return 1;
+	if (back1->parent < back2->parent)
+		return -1;
+
+	return 0;
+}
+
+static int compare_extent_backref(struct rb_node *node1, struct rb_node *node2)
+{
+	struct extent_backref *ext1 = rb_node_to_extent_backref(node1);
+	struct extent_backref *ext2 = rb_node_to_extent_backref(node2);
+
+	if (ext1->is_data > ext2->is_data)
+		return 1;
+
+	if (ext1->is_data < ext2->is_data)
+		return -1;
+
+	if (ext1->full_backref > ext2->full_backref)
+		return 1;
+	if (ext1->full_backref < ext2->full_backref)
+		return -1;
+
+	if (ext1->is_data)
+		return compare_data_backref(node1, node2);
+	else
+		return compare_tree_backref(node1, node2);
+}
+
 /* Explicit initialization for extent_record::flag_block_full_backref */
 enum { FLAG_UNSET = 2 };
 
 struct extent_record {
 	struct list_head backrefs;
 	struct list_head dups;
+	struct rb_root backref_tree;
 	struct list_head list;
 	struct cache_extent cache;
 	struct btrfs_disk_key parent_key;
@@ -5051,6 +5144,65 @@ out:
 	return ret;
 }
 
+static struct tree_backref *find_tree_backref(struct extent_record *rec,
+						u64 parent, u64 root)
+{
+	struct rb_node *node;
+	struct tree_backref *back = NULL;
+	struct tree_backref match = {
+		.node = {
+			.is_data = 0,
+		},
+	};
+
+	if (parent) {
+		match.parent = parent;
+		match.node.full_backref = 1;
+	} else {
+		match.root = root;
+	}
+
+	node = rb_search(&rec->backref_tree, &match.node.node,
+			 (rb_compare_keys)compare_extent_backref, NULL);
+	if (node)
+		back = to_tree_backref(rb_node_to_extent_backref(node));
+
+	return back;
+}
+
+static struct data_backref *find_data_backref(struct extent_record *rec,
+						u64 parent, u64 root,
+						u64 owner, u64 offset,
+						int found_ref,
+						u64 disk_bytenr, u64 bytes)
+{
+	struct rb_node *node;
+	struct data_backref *back = NULL;
+	struct data_backref match = {
+		.node = {
+			.is_data = 1,
+		},
+		.owner = owner,
+		.offset = offset,
+		.bytes = bytes,
+		.found_ref = found_ref,
+		.disk_bytenr = disk_bytenr,
+	};
+
+	if (parent) {
+		match.parent = parent;
+		match.node.full_backref = 1;
+	} else {
+		match.root = root;
+	}
+
+	node = rb_search(&rec->backref_tree, &match.node.node,
+			 (rb_compare_keys)compare_extent_backref, NULL);
+	if (node)
+		back = to_data_backref(rb_node_to_extent_backref(node));
+
+	return back;
+}
 /*
  * Iterate all item on the tree and call check_inode_item() to check.
  *
@@ -5316,7 +5468,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs)
 				goto out;
 			if (back->is_data) {
 				dback = to_data_backref(back);
-				fprintf(stderr, "Backref %llu %s %llu"
+				fprintf(stderr, "Data backref %llu %s %llu"
 					" owner %llu offset %llu num_refs %lu"
 					" not found in extent tree\n",
 					(unsigned long long)rec->start,
@@ -5330,7 +5482,7 @@ static int all_backpointers_checked(struct extent_record *rec, int print_errs)
 					(unsigned long)dback->num_refs);
 			} else {
 				tback = to_tree_backref(back);
-				fprintf(stderr, "Backref %llu parent %llu"
+				fprintf(stderr, "Tree backref %llu parent %llu"
 					" root %llu not found in extent tree\n",
 					(unsigned long long)rec->start,
 					(unsigned long long)tback->parent,
@@ -5888,6 +6040,7 @@ static int check_block(struct btrfs_root *root,
 	return ret;
 }
 
+#if 0
 static struct tree_backref *find_tree_backref(struct extent_record *rec,
 						u64 parent, u64 root)
 {
@@ -5915,6 +6068,7 @@ static struct tree_backref *find_tree_backref(struct extent_record *rec,
 	}
 	return NULL;
 }
+#endif
 
 static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
 						u64 parent, u64 root)
@@ -5936,6 +6090,7 @@ static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
 	return ref;
 }
 
+#if 0
 static struct data_backref *find_data_backref(struct extent_record *rec,
 						u64 parent, u64 root,
 						u64 owner, u64 offset,
@@ -5972,6 +6127,7 @@ static struct data_backref *find_data_backref(struct extent_record *rec,
 	}
 	return NULL;
 }
+#endif
 
 static struct data_backref *alloc_data_backref(struct extent_record *rec,
 						u64 parent, u64 root,
@@ -6088,6 +6244,7 @@ static int add_extent_rec_nolookup(struct cache_tree *extent_cache,
 	INIT_LIST_HEAD(&rec->backrefs);
 	INIT_LIST_HEAD(&rec->dups);
 	INIT_LIST_HEAD(&rec->list);
+	rec->backref_tree = RB_ROOT;
 	memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key));
 	rec->cache.start = tmpl->start;
 	rec->cache.size = tmpl->nr;
@@ -6220,6 +6377,7 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
 	struct tree_backref *back;
 	struct cache_extent *cache;
 	int ret;
+	int insert = false;
 
 	cache = lookup_cache_extent(extent_cache, bytenr, 1);
 	if (!cache) {
@@ -6254,6 +6412,7 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
 		back = alloc_tree_backref(rec, parent, root);
 		if (!back)
 			return -ENOMEM;
+		insert = true;
 	}
 
 	if (found_ref) {
@@ -6275,6 +6434,9 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
 		}
 		back->node.found_extent_tree = 1;
 	}
+	if (insert)
+		WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
+			compare_extent_backref));
 	check_extent_type(rec);
 	maybe_free_extent_rec(extent_cache, rec);
 	return 0;
@@ -6288,6 +6450,7 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
 	struct data_backref *back;
 	struct cache_extent *cache;
 	int ret;
+	int insert = false;
 
 	cache = lookup_cache_extent(extent_cache, bytenr, 1);
 	if (!cache) {
@@ -6327,6 +6490,7 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
 		back = alloc_data_backref(rec, parent, root, owner, offset,
 					  max_size);
 		BUG_ON(!back);
+		insert = true;
 	}
 
 	if (found_ref) {
@@ -6335,8 +6499,16 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
 			BUG_ON(back->bytes != max_size);
 		back->node.found_ref = 1;
 		back->found_ref += 1;
-		back->bytes = max_size;
-		back->disk_bytenr = bytenr;
+		if (back->bytes != max_size || back->disk_bytenr != bytenr) {
+			back->bytes = max_size;
+			back->disk_bytenr = bytenr;
+
+			/* Need to reinsert if not already in the tree */
+			if (!insert) {
+				rb_erase(&back->node.node, &rec->backref_tree);
+				insert = true;
+			}
+		}
 		rec->refs += 1;
 		rec->content_checked = 1;
 		rec->owner_ref_checked = 1;
@@ -6355,6 +6527,10 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
 		back->num_refs = num_refs;
 		back->node.found_extent_tree = 1;
 	}
+	if (insert)
+		WARN_ON(rb_insert(&rec->backref_tree, &back->node.node,
+			compare_extent_backref));
+
 	maybe_free_extent_rec(extent_cache, rec);
 	return 0;
 }
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/7] btrfs-progs: check: switch to iterating over the backref_tree
  2017-07-25 20:51 [PATCH 1/7] btrfs-progs: check: supplement extent backref list with rbtree jeffm
@ 2017-07-25 20:51 ` jeffm
  2017-07-25 20:51 ` [PATCH 3/7] btrfs-progs: extent-cache: actually cache extent buffers jeffm
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: jeffm @ 2017-07-25 20:51 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Jeff Mahoney

From: Jeff Mahoney <jeffm@suse.com>

We now have two data structures that can be used to iterate the same data
set, and there may be quite a few of them in memory.  Eliminating the
list_head member will reduce memory consumption while iterating over
the extent backrefs.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
 cmds-check.c | 83 ++++++++++++++++++++++++++----------------------------------
 1 file changed, 36 insertions(+), 47 deletions(-)

diff --git a/cmds-check.c b/cmds-check.c
index 6a04553..b46a686 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -86,7 +86,6 @@ enum btrfs_check_mode {
 static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
 
 struct extent_backref {
-	struct list_head list;
 	struct rb_node node;
 	unsigned int is_data:1;
 	unsigned int found_extent_tree:1;
@@ -95,11 +94,6 @@ struct extent_backref {
 	unsigned int broken:1;
 };
 
-static inline struct extent_backref* to_extent_backref(struct list_head *entry)
-{
-	return list_entry(entry, struct extent_backref, list);
-}
-
 static inline struct extent_backref* rb_node_to_extent_backref(struct rb_node *node)
 {
 	return rb_entry(node, struct extent_backref, node);
@@ -5452,16 +5446,14 @@ out:
 
 static int all_backpointers_checked(struct extent_record *rec, int print_errs)
 {
-	struct list_head *cur = rec->backrefs.next;
-	struct extent_backref *back;
+	struct extent_backref *back, *tmp;
 	struct tree_backref *tback;
 	struct data_backref *dback;
 	u64 found = 0;
 	int err = 0;
 
-	while(cur != &rec->backrefs) {
-		back = to_extent_backref(cur);
-		cur = cur->next;
+	rbtree_postorder_for_each_entry_safe(back, tmp,
+					     &rec->backref_tree, node) {
 		if (!back->found_extent_tree) {
 			err = 1;
 			if (!print_errs)
@@ -5564,17 +5556,16 @@ out:
 	return err;
 }
 
-static int free_all_extent_backrefs(struct extent_record *rec)
+static void __free_one_backref(struct rb_node *node)
 {
-	struct extent_backref *back;
-	struct list_head *cur;
-	while (!list_empty(&rec->backrefs)) {
-		cur = rec->backrefs.next;
-		back = to_extent_backref(cur);
-		list_del(cur);
-		free(back);
-	}
-	return 0;
+	struct extent_backref *back = rb_node_to_extent_backref(node);
+
+	free(back);
+}
+
+static void free_all_extent_backrefs(struct extent_record *rec)
+{
+	rb_free_nodes(&rec->backref_tree, __free_one_backref);
 }
 
 static void free_extent_record_cache(struct cache_tree *extent_cache)
@@ -5613,7 +5604,7 @@ static int check_owner_ref(struct btrfs_root *root,
 			    struct extent_record *rec,
 			    struct extent_buffer *buf)
 {
-	struct extent_backref *node;
+	struct extent_backref *node, *tmp;
 	struct tree_backref *back;
 	struct btrfs_root *ref_root;
 	struct btrfs_key key;
@@ -5623,7 +5614,8 @@ static int check_owner_ref(struct btrfs_root *root,
 	int found = 0;
 	int ret;
 
-	list_for_each_entry(node, &rec->backrefs, list) {
+	rbtree_postorder_for_each_entry_safe(node, tmp,
+					     &rec->backref_tree, node) {
 		if (node->is_data)
 			continue;
 		if (!node->found_ref)
@@ -5668,14 +5660,12 @@ static int check_owner_ref(struct btrfs_root *root,
 
 static int is_extent_tree_record(struct extent_record *rec)
 {
-	struct list_head *cur = rec->backrefs.next;
-	struct extent_backref *node;
+	struct extent_backref *node, *tmp;
 	struct tree_backref *back;
 	int is_extent = 0;
 
-	while(cur != &rec->backrefs) {
-		node = to_extent_backref(cur);
-		cur = cur->next;
+	rbtree_postorder_for_each_entry_safe(node, tmp,
+					     &rec->backref_tree, node) {
 		if (node->is_data)
 			return 0;
 		back = to_tree_backref(node);
@@ -6085,7 +6075,6 @@ static struct tree_backref *alloc_tree_backref(struct extent_record *rec,
 		ref->root = root;
 		ref->node.full_backref = 0;
 	}
-	list_add_tail(&ref->node.list, &rec->backrefs);
 
 	return ref;
 }
@@ -6155,7 +6144,6 @@ static struct data_backref *alloc_data_backref(struct extent_record *rec,
 	ref->bytes = max_size;
 	ref->found_ref = 0;
 	ref->num_refs = 0;
-	list_add_tail(&ref->node.list, &rec->backrefs);
 	if (max_size > rec->max_size)
 		rec->max_size = max_size;
 	return ref;
@@ -6188,12 +6176,12 @@ static void check_extent_type(struct extent_record *rec)
 	 * Check SYSTEM extent, as it's also marked as metadata, we can only
 	 * make sure it's a SYSTEM extent by its backref
 	 */
-	if (!list_empty(&rec->backrefs)) {
+	if (!RB_EMPTY_ROOT(&rec->backref_tree)) {
 		struct extent_backref *node;
 		struct tree_backref *tback;
 		u64 bg_type;
 
-		node = to_extent_backref(rec->backrefs.next);
+		node = rb_node_to_extent_backref(rb_first(&rec->backref_tree));
 		if (node->is_data) {
 			/* tree block shouldn't have data backref */
 			rec->wrong_chunk_type = 1;
@@ -8191,7 +8179,7 @@ static int free_extent_hook(struct btrfs_trans_handle *trans,
 			back->node.found_extent_tree = 0;
 
 		if (!back->node.found_extent_tree && back->node.found_ref) {
-			list_del(&back->node.list);
+			rb_erase(&back->node.node, &rec->backref_tree);
 			free(back);
 		}
 	} else {
@@ -8210,7 +8198,7 @@ static int free_extent_hook(struct btrfs_trans_handle *trans,
 			back->node.found_extent_tree = 0;
 		}
 		if (!back->node.found_extent_tree && back->node.found_ref) {
-			list_del(&back->node.list);
+			rb_erase(&back->node.node, &rec->backref_tree);
 			free(back);
 		}
 	}
@@ -8651,7 +8639,7 @@ out:
 static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
 			   struct extent_record *rec)
 {
-	struct extent_backref *back;
+	struct extent_backref *back, *tmp;
 	struct data_backref *dback;
 	struct extent_entry *entry, *best = NULL;
 	LIST_HEAD(entries);
@@ -8667,7 +8655,8 @@ static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
 	if (rec->metadata)
 		return 0;
 
-	list_for_each_entry(back, &rec->backrefs, list) {
+	rbtree_postorder_for_each_entry_safe(back, tmp,
+					     &rec->backref_tree, node) {
 		if (back->full_backref || !back->is_data)
 			continue;
 
@@ -8793,7 +8782,8 @@ static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path,
 	 * Ok great we all agreed on an extent record, let's go find the real
 	 * references and fix up the ones that don't match.
 	 */
-	list_for_each_entry(back, &rec->backrefs, list) {
+	rbtree_postorder_for_each_entry_safe(back, tmp,
+					     &rec->backref_tree, node) {
 		if (back->full_backref || !back->is_data)
 			continue;
 
@@ -9017,7 +9007,7 @@ static int find_possible_backrefs(struct btrfs_fs_info *info,
 				  struct extent_record *rec)
 {
 	struct btrfs_root *root;
-	struct extent_backref *back;
+	struct extent_backref *back, *tmp;
 	struct data_backref *dback;
 	struct cache_extent *cache;
 	struct btrfs_file_extent_item *fi;
@@ -9025,7 +9015,8 @@ static int find_possible_backrefs(struct btrfs_fs_info *info,
 	u64 bytenr, bytes;
 	int ret;
 
-	list_for_each_entry(back, &rec->backrefs, list) {
+	rbtree_postorder_for_each_entry_safe(back, tmp,
+					     &rec->backref_tree, node) {
 		/* Don't care about full backrefs (poor unloved backrefs) */
 		if (back->full_backref || !back->is_data)
 			continue;
@@ -9113,7 +9104,7 @@ static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
 {
 	struct btrfs_key key;
 	struct btrfs_root *dest_root;
-	struct extent_backref *back;
+	struct extent_backref *back, *tmp;
 	struct data_backref *dback;
 	struct orphan_data_extent *orphan;
 	struct btrfs_path path;
@@ -9123,7 +9114,8 @@ static int record_orphan_data_extents(struct btrfs_fs_info *fs_info,
 	if (rec->metadata)
 		return 1;
 	btrfs_init_path(&path);
-	list_for_each_entry(back, &rec->backrefs, list) {
+	rbtree_postorder_for_each_entry_safe(back, tmp,
+					     &rec->backref_tree, node) {
 		if (back->full_backref || !back->is_data ||
 		    !back->found_extent_tree)
 			continue;
@@ -9191,9 +9183,8 @@ static int fixup_extent_refs(struct btrfs_fs_info *info,
 	struct btrfs_trans_handle *trans = NULL;
 	int ret;
 	struct btrfs_path path;
-	struct list_head *cur = rec->backrefs.next;
 	struct cache_extent *cache;
-	struct extent_backref *back;
+	struct extent_backref *back, *tmp;
 	int allocated = 0;
 	u64 flags = 0;
 
@@ -9241,10 +9232,8 @@ static int fixup_extent_refs(struct btrfs_fs_info *info,
 	}
 
 	/* step three, recreate all the refs we did find */
-	while(cur != &rec->backrefs) {
-		back = to_extent_backref(cur);
-		cur = cur->next;
-
+	rbtree_postorder_for_each_entry_safe(back, tmp,
+					     &rec->backref_tree, node) {
 		/*
 		 * if we didn't find any references, don't create a
 		 * new extent record
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/7] btrfs-progs: extent-cache: actually cache extent buffers
  2017-07-25 20:51 [PATCH 1/7] btrfs-progs: check: supplement extent backref list with rbtree jeffm
  2017-07-25 20:51 ` [PATCH 2/7] btrfs-progs: check: switch to iterating over the backref_tree jeffm
@ 2017-07-25 20:51 ` jeffm
  2017-07-26  7:00   ` Nikolay Borisov
  2017-08-22 15:44   ` David Sterba
  2017-07-25 20:51 ` [PATCH 4/7] btrfs-progs: backref: push state tracking into a helper structure jeffm
                   ` (4 subsequent siblings)
  6 siblings, 2 replies; 14+ messages in thread
From: jeffm @ 2017-07-25 20:51 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Jeff Mahoney

From: Jeff Mahoney <jeffm@suse.com>

We have the infrastructure to cache extent buffers but we don't actually
do the caching.  As soon as the last reference is dropped, the buffer
is dropped.  This patch keeps the extent buffers around until the max
cache size is reached (defaults to 25% of memory) and then it drops
the last 10% of the LRU to free up cache space for reallocation.  The
cache size is configurable (for use by e.g. lowmem) when the cache is
initialized.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
 extent_io.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++-----------
 extent_io.h |  4 ++++
 utils.c     | 12 ++++++++++
 utils.h     |  3 +++
 4 files changed, 80 insertions(+), 13 deletions(-)

diff --git a/extent_io.c b/extent_io.c
index 915c6ed..937ff90 100644
--- a/extent_io.c
+++ b/extent_io.c
@@ -27,6 +27,7 @@
 #include "list.h"
 #include "ctree.h"
 #include "volumes.h"
+#include "utils.h"
 #include "internal.h"
 
 void extent_io_tree_init(struct extent_io_tree *tree)
@@ -35,6 +36,14 @@ void extent_io_tree_init(struct extent_io_tree *tree)
 	cache_tree_init(&tree->cache);
 	INIT_LIST_HEAD(&tree->lru);
 	tree->cache_size = 0;
+	tree->max_cache_size = (u64)(total_memory() * 1024) / 4;
+}
+
+void extent_io_tree_init_cache_max(struct extent_io_tree *tree,
+				   u64 max_cache_size)
+{
+	extent_io_tree_init(tree);
+	tree->max_cache_size = max_cache_size;
 }
 
 static struct extent_state *alloc_extent_state(void)
@@ -67,16 +76,20 @@ static void free_extent_state_func(struct cache_extent *cache)
 	btrfs_free_extent_state(es);
 }
 
+static void free_extent_buffer_final(struct extent_buffer *eb);
 void extent_io_tree_cleanup(struct extent_io_tree *tree)
 {
 	struct extent_buffer *eb;
 
 	while(!list_empty(&tree->lru)) {
 		eb = list_entry(tree->lru.next, struct extent_buffer, lru);
-		fprintf(stderr, "extent buffer leak: "
-			"start %llu len %u\n",
-			(unsigned long long)eb->start, eb->len);
-		free_extent_buffer(eb);
+		if (eb->refs) {
+			fprintf(stderr, "extent buffer leak: "
+				"start %llu len %u\n",
+				(unsigned long long)eb->start, eb->len);
+			free_extent_buffer_nocache(eb);
+		} else
+			free_extent_buffer_final(eb);
 	}
 
 	cache_tree_free_extents(&tree->state, free_extent_state_func);
@@ -567,7 +580,21 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
 	return new;
 }
 
-void free_extent_buffer(struct extent_buffer *eb)
+static void free_extent_buffer_final(struct extent_buffer *eb)
+{
+	struct extent_io_tree *tree = eb->tree;
+
+	BUG_ON(eb->refs);
+	BUG_ON(tree->cache_size < eb->len);
+	list_del_init(&eb->lru);
+	if (!(eb->flags & EXTENT_BUFFER_DUMMY)) {
+		remove_cache_extent(&tree->cache, &eb->cache_node);
+		tree->cache_size -= eb->len;
+	}
+	free(eb);
+}
+
+static void free_extent_buffer_internal(struct extent_buffer *eb, int free_now)
 {
 	if (!eb || IS_ERR(eb))
 		return;
@@ -575,19 +602,23 @@ void free_extent_buffer(struct extent_buffer *eb)
 	eb->refs--;
 	BUG_ON(eb->refs < 0);
 	if (eb->refs == 0) {
-		struct extent_io_tree *tree = eb->tree;
 		BUG_ON(eb->flags & EXTENT_DIRTY);
-		list_del_init(&eb->lru);
 		list_del_init(&eb->recow);
-		if (!(eb->flags & EXTENT_BUFFER_DUMMY)) {
-			BUG_ON(tree->cache_size < eb->len);
-			remove_cache_extent(&tree->cache, &eb->cache_node);
-			tree->cache_size -= eb->len;
-		}
-		free(eb);
+		if (eb->flags & EXTENT_BUFFER_DUMMY || free_now)
+			free_extent_buffer_final(eb);
 	}
 }
 
+void free_extent_buffer(struct extent_buffer *eb)
+{
+	free_extent_buffer_internal(eb, 0);
+}
+
+void free_extent_buffer_nocache(struct extent_buffer *eb)
+{
+	free_extent_buffer_internal(eb, 1);
+}
+
 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
 					 u64 bytenr, u32 blocksize)
 {
@@ -619,6 +650,21 @@ struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
 	return eb;
 }
 
+static void
+trim_extent_buffer_cache(struct extent_io_tree *tree)
+{
+	struct extent_buffer *eb, *tmp;
+	u64 count = 0;
+	list_for_each_entry_safe(eb, tmp, &tree->lru, lru) {
+		if (eb->refs == 0) {
+			free_extent_buffer_final(eb);
+			count++;
+		}
+		if (tree->cache_size <= ((tree->max_cache_size * 9) / 10))
+			break;
+	}
+}
+
 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
 					  u64 bytenr, u32 blocksize)
 {
@@ -649,6 +695,8 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
 		}
 		list_add_tail(&eb->lru, &tree->lru);
 		tree->cache_size += blocksize;
+		if (tree->cache_size >= tree->max_cache_size)
+			trim_extent_buffer_cache(tree);
 	}
 	return eb;
 }
diff --git a/extent_io.h b/extent_io.h
index e617489..17a4a82 100644
--- a/extent_io.h
+++ b/extent_io.h
@@ -75,6 +75,7 @@ struct extent_io_tree {
 	struct cache_tree cache;
 	struct list_head lru;
 	u64 cache_size;
+	u64 max_cache_size;
 };
 
 struct extent_state {
@@ -106,6 +107,8 @@ static inline void extent_buffer_get(struct extent_buffer *eb)
 }
 
 void extent_io_tree_init(struct extent_io_tree *tree);
+void extent_io_tree_init_cache_max(struct extent_io_tree *tree,
+				   u64 max_cache_size);
 void extent_io_tree_cleanup(struct extent_io_tree *tree);
 int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits);
 int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits);
@@ -146,6 +149,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
 					  u64 bytenr, u32 blocksize);
 struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
 void free_extent_buffer(struct extent_buffer *eb);
+void free_extent_buffer_nocache(struct extent_buffer *eb);
 int read_extent_from_disk(struct extent_buffer *eb,
 			  unsigned long offset, unsigned long len);
 int write_extent_to_disk(struct extent_buffer *eb);
diff --git a/utils.c b/utils.c
index d2489e7..c565e08 100644
--- a/utils.c
+++ b/utils.c
@@ -24,6 +24,7 @@
 #include <sys/mount.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/sysinfo.h>
 #include <uuid/uuid.h>
 #include <fcntl.h>
 #include <unistd.h>
@@ -2521,3 +2522,14 @@ u8 rand_u8(void)
 void btrfs_config_init(void)
 {
 }
+
+unsigned long total_memory(void)
+{
+        struct sysinfo  si;
+
+        if (sysinfo(&si) < 0) {
+                error("can't determine memory size");
+                return -1UL;
+        }
+        return (si.totalram >> 10) * si.mem_unit;       /* kilobytes */
+}
diff --git a/utils.h b/utils.h
index 24d0a20..6be0d6f 100644
--- a/utils.h
+++ b/utils.h
@@ -148,6 +148,9 @@ unsigned int get_unit_mode_from_arg(int *argc, char *argv[], int df_mode);
 int string_is_numerical(const char *str);
 int prefixcmp(const char *str, const char *prefix);
 
+/* Returns total size of main memory in KiB units. -1ULL if error. */
+unsigned long total_memory(void);
+
 /*
  * Global program state, configurable by command line and available to
  * functions without extra context passing.
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 4/7] btrfs-progs: backref: push state tracking into a helper structure
  2017-07-25 20:51 [PATCH 1/7] btrfs-progs: check: supplement extent backref list with rbtree jeffm
  2017-07-25 20:51 ` [PATCH 2/7] btrfs-progs: check: switch to iterating over the backref_tree jeffm
  2017-07-25 20:51 ` [PATCH 3/7] btrfs-progs: extent-cache: actually cache extent buffers jeffm
@ 2017-07-25 20:51 ` jeffm
  2017-07-25 20:51 ` [PATCH 5/7] btrfs-progs: backref: add list_first_pref helper jeffm
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: jeffm @ 2017-07-25 20:51 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Jeff Mahoney

From: Jeff Mahoney <jeffm@suse.com>

Eventually, we'll have several lists and trees, as well as some statistics.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
 backref.c | 75 ++++++++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 45 insertions(+), 30 deletions(-)

diff --git a/backref.c b/backref.c
index e1f41e1..ac1b506 100644
--- a/backref.c
+++ b/backref.c
@@ -130,6 +130,15 @@ struct __prelim_ref {
 	u64 wanted_disk_byte;
 };
 
+struct pref_state {
+	struct list_head pending;
+};
+
+static void init_pref_state(struct pref_state *prefstate)
+{
+	INIT_LIST_HEAD(&prefstate->pending);
+}
+
 /*
  * the rules for all callers of this function are:
  * - obtaining the parent is the goal
@@ -169,11 +178,12 @@ struct __prelim_ref {
  * block might help in merging entries to gain some speed.
  */
 
-static int __add_prelim_ref(struct list_head *head, u64 root_id,
+static int __add_prelim_ref(struct pref_state *prefstate, u64 root_id,
 			    struct btrfs_key *key, int level,
 			    u64 parent, u64 wanted_disk_byte, int count,
 			    gfp_t gfp_mask)
 {
+	struct list_head *head = &prefstate->pending;
 	struct __prelim_ref *ref;
 
 	if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID)
@@ -345,10 +355,11 @@ out:
  * resolve all indirect backrefs from the list
  */
 static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
+				   struct pref_state *prefstate,
 				   struct btrfs_path *path, u64 time_seq,
-				   struct list_head *head,
 				   const u64 *extent_item_pos, u64 total_refs)
 {
+	struct list_head *head = &prefstate->pending;
 	int err;
 	int ret = 0;
 	struct __prelim_ref *ref;
@@ -436,8 +447,9 @@ static inline int ref_for_same_block(struct __prelim_ref *ref1,
  * read tree blocks and add keys where required.
  */
 static int __add_missing_keys(struct btrfs_fs_info *fs_info,
-			      struct list_head *head)
+			      struct pref_state *prefstate)
 {
+	struct list_head *head = &prefstate->pending;
 	struct list_head *pos;
 	struct extent_buffer *eb;
 
@@ -475,8 +487,9 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
  *           having a parent).
  * mode = 2: merge identical parents
  */
-static void __merge_refs(struct list_head *head, int mode)
+static void __merge_refs(struct pref_state *prefstate, int mode)
 {
+	struct list_head *head = &prefstate->pending;
 	struct list_head *pos1;
 
 	list_for_each(pos1, head) {
@@ -527,9 +540,9 @@ static void __merge_refs(struct list_head *head, int mode)
  * add all inline backrefs for bytenr to the list
  */
 static int __add_inline_refs(struct btrfs_fs_info *fs_info,
+			     struct pref_state *prefstate,
 			     struct btrfs_path *path, u64 bytenr,
-			     int *info_level, struct list_head *prefs,
-			     u64 *total_refs)
+			     int *info_level, u64 *total_refs)
 {
 	int ret = 0;
 	int slot;
@@ -541,7 +554,6 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
 	struct btrfs_extent_item *ei;
 	u64 flags;
 	u64 item_size;
-
 	/*
 	 * enumerate all inline refs
 	 */
@@ -584,7 +596,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
 
 		switch (type) {
 		case BTRFS_SHARED_BLOCK_REF_KEY:
-			ret = __add_prelim_ref(prefs, 0, NULL,
+			ret = __add_prelim_ref(prefstate, 0, NULL,
 						*info_level + 1, offset,
 						bytenr, 1, GFP_NOFS);
 			break;
@@ -594,12 +606,12 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
 
 			sdref = (struct btrfs_shared_data_ref *)(iref + 1);
 			count = btrfs_shared_data_ref_count(leaf, sdref);
-			ret = __add_prelim_ref(prefs, 0, NULL, 0, offset,
+			ret = __add_prelim_ref(prefstate, 0, NULL, 0, offset,
 					       bytenr, count, GFP_NOFS);
 			break;
 		}
 		case BTRFS_TREE_BLOCK_REF_KEY:
-			ret = __add_prelim_ref(prefs, offset, NULL,
+			ret = __add_prelim_ref(prefstate, offset, NULL,
 					       *info_level + 1, 0,
 					       bytenr, 1, GFP_NOFS);
 			break;
@@ -615,7 +627,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
 			key.type = BTRFS_EXTENT_DATA_KEY;
 			key.offset = btrfs_extent_data_ref_offset(leaf, dref);
 			root = btrfs_extent_data_ref_root(leaf, dref);
-			ret = __add_prelim_ref(prefs, root, &key, 0, 0,
+			ret = __add_prelim_ref(prefstate, root, &key, 0, 0,
 					       bytenr, count, GFP_NOFS);
 			break;
 		}
@@ -634,8 +646,9 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
  * add all non-inline backrefs for bytenr to the list
  */
 static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
+			    struct pref_state *prefstate,
 			    struct btrfs_path *path, u64 bytenr,
-			    int info_level, struct list_head *prefs)
+			    int info_level)
 {
 	struct btrfs_root *extent_root = fs_info->extent_root;
 	int ret;
@@ -665,7 +678,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
 
 		switch (key.type) {
 		case BTRFS_SHARED_BLOCK_REF_KEY:
-			ret = __add_prelim_ref(prefs, 0, NULL,
+			ret = __add_prelim_ref(prefstate, 0, NULL,
 						info_level + 1, key.offset,
 						bytenr, 1, GFP_NOFS);
 			break;
@@ -676,12 +689,12 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
 			sdref = btrfs_item_ptr(leaf, slot,
 					      struct btrfs_shared_data_ref);
 			count = btrfs_shared_data_ref_count(leaf, sdref);
-			ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset,
+			ret = __add_prelim_ref(prefstate, 0, NULL, 0, key.offset,
 						bytenr, count, GFP_NOFS);
 			break;
 		}
 		case BTRFS_TREE_BLOCK_REF_KEY:
-			ret = __add_prelim_ref(prefs, key.offset, NULL,
+			ret = __add_prelim_ref(prefstate, key.offset, NULL,
 					       info_level + 1, 0,
 					       bytenr, 1, GFP_NOFS);
 			break;
@@ -698,7 +711,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
 			key.type = BTRFS_EXTENT_DATA_KEY;
 			key.offset = btrfs_extent_data_ref_offset(leaf, dref);
 			root = btrfs_extent_data_ref_root(leaf, dref);
-			ret = __add_prelim_ref(prefs, root, &key, 0, 0,
+			ret = __add_prelim_ref(prefstate, root, &key, 0, 0,
 					       bytenr, count, GFP_NOFS);
 			break;
 		}
@@ -730,12 +743,12 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
 	struct btrfs_path *path;
 	int info_level = 0;
 	int ret;
-	struct list_head prefs;
+	struct pref_state prefstate;
 	struct __prelim_ref *ref;
 	struct extent_inode_elem *eie = NULL;
 	u64 total_refs = 0;
 
-	INIT_LIST_HEAD(&prefs);
+	init_pref_state(&prefstate);
 
 	key.objectid = bytenr;
 	key.offset = (u64)-1;
@@ -764,34 +777,35 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
 		if (key.objectid == bytenr &&
 		    (key.type == BTRFS_EXTENT_ITEM_KEY ||
 		     key.type == BTRFS_METADATA_ITEM_KEY)) {
-			ret = __add_inline_refs(fs_info, path, bytenr,
-						&info_level, &prefs,
+			ret = __add_inline_refs(fs_info, &prefstate, path,
+						bytenr, &info_level,
 						&total_refs);
 			if (ret)
 				goto out;
-			ret = __add_keyed_refs(fs_info, path, bytenr,
-					       info_level, &prefs);
+			ret = __add_keyed_refs(fs_info, &prefstate, path,
+					       bytenr, info_level);
 			if (ret)
 				goto out;
 		}
 	}
 	btrfs_release_path(path);
 
-	ret = __add_missing_keys(fs_info, &prefs);
+	ret = __add_missing_keys(fs_info, &prefstate);
 	if (ret)
 		goto out;
 
-	__merge_refs(&prefs, 1);
+	__merge_refs(&prefstate, 1);
 
-	ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs,
+	ret = __resolve_indirect_refs(fs_info, &prefstate, path, time_seq,
 				      extent_item_pos, total_refs);
 	if (ret)
 		goto out;
 
-	__merge_refs(&prefs, 2);
+	__merge_refs(&prefstate, 2);
 
-	while (!list_empty(&prefs)) {
-		ref = list_first_entry(&prefs, struct __prelim_ref, list);
+	while (!list_empty(&prefstate.pending)) {
+		ref = list_first_entry(&prefstate.pending,
+				       struct __prelim_ref, list);
 		WARN_ON(ref->count < 0);
 		if (roots && ref->count && ref->root_id && ref->parent == 0) {
 			/* no parent == root of tree */
@@ -842,8 +856,9 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
 
 out:
 	btrfs_free_path(path);
-	while (!list_empty(&prefs)) {
-		ref = list_first_entry(&prefs, struct __prelim_ref, list);
+	while (!list_empty(&prefstate.pending)) {
+		ref = list_first_entry(&prefstate.pending,
+				       struct __prelim_ref, list);
 		list_del(&ref->list);
 		kfree(ref);
 	}
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 5/7] btrfs-progs: backref: add list_first_pref helper
  2017-07-25 20:51 [PATCH 1/7] btrfs-progs: check: supplement extent backref list with rbtree jeffm
                   ` (2 preceding siblings ...)
  2017-07-25 20:51 ` [PATCH 4/7] btrfs-progs: backref: push state tracking into a helper structure jeffm
@ 2017-07-25 20:51 ` jeffm
  2017-07-26  7:08   ` Nikolay Borisov
  2017-07-25 20:51 ` [PATCH 6/7] btrfs-progs: backref: use separate list for missing keys jeffm
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 14+ messages in thread
From: jeffm @ 2017-07-25 20:51 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Jeff Mahoney

From: Jeff Mahoney <jeffm@suse.com>

---
 backref.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/backref.c b/backref.c
index ac1b506..be3376a 100644
--- a/backref.c
+++ b/backref.c
@@ -130,6 +130,11 @@ struct __prelim_ref {
 	u64 wanted_disk_byte;
 };
 
+static struct __prelim_ref *list_first_pref(struct list_head *head)
+{
+	return list_first_entry(head, struct __prelim_ref, list);
+}
+
 struct pref_state {
 	struct list_head pending;
 };
@@ -804,8 +809,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
 	__merge_refs(&prefstate, 2);
 
 	while (!list_empty(&prefstate.pending)) {
-		ref = list_first_entry(&prefstate.pending,
-				       struct __prelim_ref, list);
+		ref = list_first_pref(&prefstate.pending);
 		WARN_ON(ref->count < 0);
 		if (roots && ref->count && ref->root_id && ref->parent == 0) {
 			/* no parent == root of tree */
@@ -857,8 +861,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
 out:
 	btrfs_free_path(path);
 	while (!list_empty(&prefstate.pending)) {
-		ref = list_first_entry(&prefstate.pending,
-				       struct __prelim_ref, list);
+		ref = list_first_pref(&prefstate.pending);
 		list_del(&ref->list);
 		kfree(ref);
 	}
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 6/7] btrfs-progs: backref: use separate list for missing keys
  2017-07-25 20:51 [PATCH 1/7] btrfs-progs: check: supplement extent backref list with rbtree jeffm
                   ` (3 preceding siblings ...)
  2017-07-25 20:51 ` [PATCH 5/7] btrfs-progs: backref: add list_first_pref helper jeffm
@ 2017-07-25 20:51 ` jeffm
  2017-07-25 20:51 ` [PATCH 7/7] btrfs-progs: backref: use separate list for indirect refs jeffm
  2017-09-29 17:21 ` [PATCH 1/7] btrfs-progs: check: supplement extent backref list with rbtree David Sterba
  6 siblings, 0 replies; 14+ messages in thread
From: jeffm @ 2017-07-25 20:51 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Jeff Mahoney

From: Jeff Mahoney <jeffm@suse.com>

Rather than iterate over all outstanding backrefs to resolve missing keys,
use a separate list that only contains refs that need missing keys resolved.

Once the missing key is resolved, move the ref to the pending list.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
 backref.c | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/backref.c b/backref.c
index be3376a..1d670ee 100644
--- a/backref.c
+++ b/backref.c
@@ -137,11 +137,13 @@ static struct __prelim_ref *list_first_pref(struct list_head *head)
 
 struct pref_state {
 	struct list_head pending;
+	struct list_head pending_missing_keys;
 };
 
 static void init_pref_state(struct pref_state *prefstate)
 {
 	INIT_LIST_HEAD(&prefstate->pending);
+	INIT_LIST_HEAD(&prefstate->pending_missing_keys);
 }
 
 /*
@@ -188,7 +190,7 @@ static int __add_prelim_ref(struct pref_state *prefstate, u64 root_id,
 			    u64 parent, u64 wanted_disk_byte, int count,
 			    gfp_t gfp_mask)
 {
-	struct list_head *head = &prefstate->pending;
+	struct list_head *head;
 	struct __prelim_ref *ref;
 
 	if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID)
@@ -199,16 +201,20 @@ static int __add_prelim_ref(struct pref_state *prefstate, u64 root_id,
 		return -ENOMEM;
 
 	ref->root_id = root_id;
-	if (key)
+	if (key) {
 		ref->key_for_search = *key;
-	else
+		head = &prefstate->pending;
+	} else {
 		memset(&ref->key_for_search, 0, sizeof(ref->key_for_search));
+		head = &prefstate->pending_missing_keys;
+	}
 
 	ref->inode_list = NULL;
 	ref->level = level;
 	ref->count = count;
 	ref->parent = parent;
 	ref->wanted_disk_byte = wanted_disk_byte;
+
 	list_add_tail(&ref->list, head);
 
 	return 0;
@@ -454,18 +460,15 @@ static inline int ref_for_same_block(struct __prelim_ref *ref1,
 static int __add_missing_keys(struct btrfs_fs_info *fs_info,
 			      struct pref_state *prefstate)
 {
-	struct list_head *head = &prefstate->pending;
-	struct list_head *pos;
 	struct extent_buffer *eb;
 
-	list_for_each(pos, head) {
+	while(!list_empty(&prefstate->pending_missing_keys)) {
 		struct __prelim_ref *ref;
-		ref = list_entry(pos, struct __prelim_ref, list);
+		ref = list_first_pref(&prefstate->pending_missing_keys);
 
-		if (ref->parent)
-			continue;
-		if (ref->key_for_search.type)
-			continue;
+		ASSERT(ref->root_id);
+		ASSERT(!ref->parent);
+		ASSERT(ref->key_for_search.type);
 		BUG_ON(!ref->wanted_disk_byte);
 		eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte,
 				     fs_info->tree_root->nodesize, 0);
@@ -478,6 +481,7 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
 		else
 			btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0);
 		free_extent_buffer(eb);
+		list_move(&ref->list, &prefstate->pending);
 	}
 	return 0;
 }
@@ -808,6 +812,8 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
 
 	__merge_refs(&prefstate, 2);
 
+	BUG_ON(!list_empty(&prefstate.pending_missing_keys));
+
 	while (!list_empty(&prefstate.pending)) {
 		ref = list_first_pref(&prefstate.pending);
 		WARN_ON(ref->count < 0);
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 7/7] btrfs-progs: backref: use separate list for indirect refs
  2017-07-25 20:51 [PATCH 1/7] btrfs-progs: check: supplement extent backref list with rbtree jeffm
                   ` (4 preceding siblings ...)
  2017-07-25 20:51 ` [PATCH 6/7] btrfs-progs: backref: use separate list for missing keys jeffm
@ 2017-07-25 20:51 ` jeffm
  2017-09-29 17:21 ` [PATCH 1/7] btrfs-progs: check: supplement extent backref list with rbtree David Sterba
  6 siblings, 0 replies; 14+ messages in thread
From: jeffm @ 2017-07-25 20:51 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Jeff Mahoney

From: Jeff Mahoney <jeffm@suse.com>

Rather than iterate over all outstanding backrefs to resolve indirect refs,
use a separate list that only contains indirect refs.

When we process missing keys, the ref moves to the indirect ref list.
Once the indirect ref is resolved, move the ref to the pending list.

Eventually these lists will be replaced by rbtrees.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
 backref.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/backref.c b/backref.c
index 1d670ee..2f262cf 100644
--- a/backref.c
+++ b/backref.c
@@ -138,12 +138,14 @@ static struct __prelim_ref *list_first_pref(struct list_head *head)
 struct pref_state {
 	struct list_head pending;
 	struct list_head pending_missing_keys;
+	struct list_head pending_indirect_refs;
 };
 
 static void init_pref_state(struct pref_state *prefstate)
 {
 	INIT_LIST_HEAD(&prefstate->pending);
 	INIT_LIST_HEAD(&prefstate->pending_missing_keys);
+	INIT_LIST_HEAD(&prefstate->pending_indirect_refs);
 }
 
 /*
@@ -370,11 +372,10 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
 				   struct btrfs_path *path, u64 time_seq,
 				   const u64 *extent_item_pos, u64 total_refs)
 {
-	struct list_head *head = &prefstate->pending;
+	struct list_head *head = &prefstate->pending_indirect_refs;
 	int err;
 	int ret = 0;
 	struct __prelim_ref *ref;
-	struct __prelim_ref *ref_safe;
 	struct __prelim_ref *new_ref;
 	struct ulist *parents;
 	struct ulist_node *node;
@@ -384,16 +385,11 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
 	if (!parents)
 		return -ENOMEM;
 
-	/*
-	 * _safe allows us to insert directly after the current item without
-	 * iterating over the newly inserted items.
-	 * we're also allowed to re-assign ref during iteration.
-	 */
-	list_for_each_entry_safe(ref, ref_safe, head, list) {
-		if (ref->parent)	/* already direct */
-			continue;
-		if (ref->count == 0)
-			continue;
+	while (!list_empty(head)) {
+		ref = list_first_pref(head);
+		list_move(&ref->list, &prefstate->pending);
+		ASSERT(!ref->parent);	/* already direct */
+		ASSERT(ref->count);
 		err = __resolve_indirect_ref(fs_info, path, time_seq, ref,
 					     parents, extent_item_pos,
 					     total_refs);
@@ -426,7 +422,7 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
 			new_ref->parent = node->val;
 			new_ref->inode_list = (struct extent_inode_elem *)
 							(uintptr_t)node->aux;
-			list_add(&new_ref->list, &ref->list);
+			list_add_tail(&new_ref->list, &prefstate->pending);
 		}
 		ulist_reinit(parents);
 	}
@@ -813,6 +809,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
 	__merge_refs(&prefstate, 2);
 
 	BUG_ON(!list_empty(&prefstate.pending_missing_keys));
+	BUG_ON(!list_empty(&prefstate.pending_indirect_refs));
 
 	while (!list_empty(&prefstate.pending)) {
 		ref = list_first_pref(&prefstate.pending);
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/7] btrfs-progs: extent-cache: actually cache extent buffers
  2017-07-25 20:51 ` [PATCH 3/7] btrfs-progs: extent-cache: actually cache extent buffers jeffm
@ 2017-07-26  7:00   ` Nikolay Borisov
  2017-07-26 13:21     ` Jeff Mahoney
  2017-08-22 15:44   ` David Sterba
  1 sibling, 1 reply; 14+ messages in thread
From: Nikolay Borisov @ 2017-07-26  7:00 UTC (permalink / raw)
  To: jeffm, linux-btrfs



On 25.07.2017 23:51, jeffm@suse.com wrote:
> From: Jeff Mahoney <jeffm@suse.com>
> 
> We have the infrastructure to cache extent buffers but we don't actually
> do the caching.  As soon as the last reference is dropped, the buffer
> is dropped.  This patch keeps the extent buffers around until the max
> cache size is reached (defaults to 25% of memory) and then it drops
> the last 10% of the LRU to free up cache space for reallocation.  The
> cache size is configurable (for use by e.g. lowmem) when the cache is
> initialized.
> 
> Signed-off-by: Jeff Mahoney <jeffm@suse.com>
> ---
>  extent_io.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++-----------
>  extent_io.h |  4 ++++
>  utils.c     | 12 ++++++++++
>  utils.h     |  3 +++
>  4 files changed, 80 insertions(+), 13 deletions(-)
> 
> diff --git a/extent_io.c b/extent_io.c
> index 915c6ed..937ff90 100644
> --- a/extent_io.c
> +++ b/extent_io.c
> @@ -27,6 +27,7 @@
>  #include "list.h"
>  #include "ctree.h"
>  #include "volumes.h"
> +#include "utils.h"
>  #include "internal.h"
>  
>  void extent_io_tree_init(struct extent_io_tree *tree)
> @@ -35,6 +36,14 @@ void extent_io_tree_init(struct extent_io_tree *tree)
>  	cache_tree_init(&tree->cache);
>  	INIT_LIST_HEAD(&tree->lru);
>  	tree->cache_size = 0;
> +	tree->max_cache_size = (u64)(total_memory() * 1024) / 4;
> +}
> +
> +void extent_io_tree_init_cache_max(struct extent_io_tree *tree,
> +				   u64 max_cache_size)
> +{
> +	extent_io_tree_init(tree);
> +	tree->max_cache_size = max_cache_size;
>  }
>  
>  static struct extent_state *alloc_extent_state(void)
> @@ -67,16 +76,20 @@ static void free_extent_state_func(struct cache_extent *cache)
>  	btrfs_free_extent_state(es);
>  }
>  
> +static void free_extent_buffer_final(struct extent_buffer *eb);
>  void extent_io_tree_cleanup(struct extent_io_tree *tree)
>  {
>  	struct extent_buffer *eb;
>  
>  	while(!list_empty(&tree->lru)) {
>  		eb = list_entry(tree->lru.next, struct extent_buffer, lru);
> -		fprintf(stderr, "extent buffer leak: "
> -			"start %llu len %u\n",
> -			(unsigned long long)eb->start, eb->len);
> -		free_extent_buffer(eb);
> +		if (eb->refs) {
> +			fprintf(stderr, "extent buffer leak: "
> +				"start %llu len %u\n",
> +				(unsigned long long)eb->start, eb->len);
> +			free_extent_buffer_nocache(eb);
> +		} else
> +			free_extent_buffer_final(eb);
>  	}
>  
>  	cache_tree_free_extents(&tree->state, free_extent_state_func);
> @@ -567,7 +580,21 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
>  	return new;
>  }
>  
> -void free_extent_buffer(struct extent_buffer *eb)
> +static void free_extent_buffer_final(struct extent_buffer *eb)
> +{
> +	struct extent_io_tree *tree = eb->tree;
> +
> +	BUG_ON(eb->refs);
> +	BUG_ON(tree->cache_size < eb->len);
> +	list_del_init(&eb->lru);
> +	if (!(eb->flags & EXTENT_BUFFER_DUMMY)) {
> +		remove_cache_extent(&tree->cache, &eb->cache_node);
> +		tree->cache_size -= eb->len;
> +	}
> +	free(eb);
> +}
> +
> +static void free_extent_buffer_internal(struct extent_buffer *eb, int free_now)

nit: free_ow -> boolean

>  {
>  	if (!eb || IS_ERR(eb))
>  		return;
> @@ -575,19 +602,23 @@ void free_extent_buffer(struct extent_buffer *eb)
>  	eb->refs--;
>  	BUG_ON(eb->refs < 0);
>  	if (eb->refs == 0) {
> -		struct extent_io_tree *tree = eb->tree;
>  		BUG_ON(eb->flags & EXTENT_DIRTY);
> -		list_del_init(&eb->lru);
>  		list_del_init(&eb->recow);
> -		if (!(eb->flags & EXTENT_BUFFER_DUMMY)) {
> -			BUG_ON(tree->cache_size < eb->len);
> -			remove_cache_extent(&tree->cache, &eb->cache_node);
> -			tree->cache_size -= eb->len;
> -		}
> -		free(eb);
> +		if (eb->flags & EXTENT_BUFFER_DUMMY || free_now)
> +			free_extent_buffer_final(eb);
>  	}
>  }
>  
> +void free_extent_buffer(struct extent_buffer *eb)
> +{
> +	free_extent_buffer_internal(eb, 0);
> +}
> +
> +void free_extent_buffer_nocache(struct extent_buffer *eb)
> +{
> +	free_extent_buffer_internal(eb, 1);
> +}
> +
>  struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
>  					 u64 bytenr, u32 blocksize)
>  {
> @@ -619,6 +650,21 @@ struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
>  	return eb;
>  }
>  
> +static void
> +trim_extent_buffer_cache(struct extent_io_tree *tree)
> +{
> +	struct extent_buffer *eb, *tmp;
> +	u64 count = 0;

count seems to be a leftover from something, so you could remove it

> +	list_for_each_entry_safe(eb, tmp, &tree->lru, lru) {
> +		if (eb->refs == 0) {
> +			free_extent_buffer_final(eb);
> +			count++;
> +		}
> +		if (tree->cache_size <= ((tree->max_cache_size * 9) / 10))
> +			break;
> +	}
> +}
> +
>  struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
>  					  u64 bytenr, u32 blocksize)
>  {
> @@ -649,6 +695,8 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
>  		}
>  		list_add_tail(&eb->lru, &tree->lru);
>  		tree->cache_size += blocksize;
> +		if (tree->cache_size >= tree->max_cache_size)
> +			trim_extent_buffer_cache(tree);
>  	}
>  	return eb;
>  }
> diff --git a/extent_io.h b/extent_io.h
> index e617489..17a4a82 100644
> --- a/extent_io.h
> +++ b/extent_io.h
> @@ -75,6 +75,7 @@ struct extent_io_tree {
>  	struct cache_tree cache;
>  	struct list_head lru;
>  	u64 cache_size;
> +	u64 max_cache_size;
>  };
>  
>  struct extent_state {
> @@ -106,6 +107,8 @@ static inline void extent_buffer_get(struct extent_buffer *eb)
>  }
>  
>  void extent_io_tree_init(struct extent_io_tree *tree);
> +void extent_io_tree_init_cache_max(struct extent_io_tree *tree,
> +				   u64 max_cache_size);
>  void extent_io_tree_cleanup(struct extent_io_tree *tree);
>  int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits);
>  int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits);
> @@ -146,6 +149,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
>  					  u64 bytenr, u32 blocksize);
>  struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
>  void free_extent_buffer(struct extent_buffer *eb);
> +void free_extent_buffer_nocache(struct extent_buffer *eb);
>  int read_extent_from_disk(struct extent_buffer *eb,
>  			  unsigned long offset, unsigned long len);
>  int write_extent_to_disk(struct extent_buffer *eb);
> diff --git a/utils.c b/utils.c
> index d2489e7..c565e08 100644
> --- a/utils.c
> +++ b/utils.c
> @@ -24,6 +24,7 @@
>  #include <sys/mount.h>
>  #include <sys/types.h>
>  #include <sys/stat.h>
> +#include <sys/sysinfo.h>
>  #include <uuid/uuid.h>
>  #include <fcntl.h>
>  #include <unistd.h>
> @@ -2521,3 +2522,14 @@ u8 rand_u8(void)
>  void btrfs_config_init(void)
>  {
>  }
> +
> +unsigned long total_memory(void)

perhaps rename to total_memory_bytes and return the memory size in
bytes. Returning them in kilobytes seems rather arbitrary. That way
you'd save the constant *1024 to turn the kbs in bytes in the callers
(currently only in extent_io_tree_init())

> +{
> +        struct sysinfo  si;
> +
> +        if (sysinfo(&si) < 0) {
> +                error("can't determine memory size");
> +                return -1UL;
> +        }
> +        return (si.totalram >> 10) * si.mem_unit;       /* kilobytes */
> +}
> diff --git a/utils.h b/utils.h
> index 24d0a20..6be0d6f 100644
> --- a/utils.h
> +++ b/utils.h
> @@ -148,6 +148,9 @@ unsigned int get_unit_mode_from_arg(int *argc, char *argv[], int df_mode);
>  int string_is_numerical(const char *str);
>  int prefixcmp(const char *str, const char *prefix);
>  
> +/* Returns total size of main memory in KiB units. -1ULL if error. */
> +unsigned long total_memory(void);
> +
>  /*
>   * Global program state, configurable by command line and available to
>   * functions without extra context passing.
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 5/7] btrfs-progs: backref: add list_first_pref helper
  2017-07-25 20:51 ` [PATCH 5/7] btrfs-progs: backref: add list_first_pref helper jeffm
@ 2017-07-26  7:08   ` Nikolay Borisov
  2017-07-26 13:22     ` Jeff Mahoney
  0 siblings, 1 reply; 14+ messages in thread
From: Nikolay Borisov @ 2017-07-26  7:08 UTC (permalink / raw)
  To: jeffm, linux-btrfs



On 25.07.2017 23:51, jeffm@suse.com wrote:
> From: Jeff Mahoney <jeffm@suse.com>
> 
> ---
>  backref.c | 11 +++++++----
>  1 file changed, 7 insertions(+), 4 deletions(-)
> 
> diff --git a/backref.c b/backref.c
> index ac1b506..be3376a 100644
> --- a/backref.c
> +++ b/backref.c
> @@ -130,6 +130,11 @@ struct __prelim_ref {
>  	u64 wanted_disk_byte;
>  };
>  
> +static struct __prelim_ref *list_first_pref(struct list_head *head)
> +{
> +	return list_first_entry(head, struct __prelim_ref, list);
> +}
> +

I think this just adds one more level of abstraction with no real
benefit whatsoever. Why not drop the patch entirely.

>  struct pref_state {
>  	struct list_head pending;
>  };
> @@ -804,8 +809,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
>  	__merge_refs(&prefstate, 2);
>  
>  	while (!list_empty(&prefstate.pending)) {
> -		ref = list_first_entry(&prefstate.pending,
> -				       struct __prelim_ref, list);
> +		ref = list_first_pref(&prefstate.pending);
>  		WARN_ON(ref->count < 0);
>  		if (roots && ref->count && ref->root_id && ref->parent == 0) {
>  			/* no parent == root of tree */
> @@ -857,8 +861,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
>  out:
>  	btrfs_free_path(path);
>  	while (!list_empty(&prefstate.pending)) {
> -		ref = list_first_entry(&prefstate.pending,
> -				       struct __prelim_ref, list);
> +		ref = list_first_pref(&prefstate.pending);
>  		list_del(&ref->list);
>  		kfree(ref);
>  	}
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/7] btrfs-progs: extent-cache: actually cache extent buffers
  2017-07-26  7:00   ` Nikolay Borisov
@ 2017-07-26 13:21     ` Jeff Mahoney
  0 siblings, 0 replies; 14+ messages in thread
From: Jeff Mahoney @ 2017-07-26 13:21 UTC (permalink / raw)
  To: Nikolay Borisov, linux-btrfs


[-- Attachment #1.1: Type: text/plain, Size: 2304 bytes --]

On 7/26/17 3:00 AM, Nikolay Borisov wrote:
> 
> 
> On 25.07.2017 23:51, jeffm@suse.com wrote:
>> From: Jeff Mahoney <jeffm@suse.com>
>>
>> We have the infrastructure to cache extent buffers but we don't actually
>> do the caching.  As soon as the last reference is dropped, the buffer
>> is dropped.  This patch keeps the extent buffers around until the max
>> cache size is reached (defaults to 25% of memory) and then it drops
>> the last 10% of the LRU to free up cache space for reallocation.  The
>> cache size is configurable (for use by e.g. lowmem) when the cache is
>> initialized.
>>
>> Signed-off-by: Jeff Mahoney <jeffm@suse.com>

>> @@ -567,7 +580,21 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
>>  	return new;
>>  }
>>  
>> -void free_extent_buffer(struct extent_buffer *eb)
>> +static void free_extent_buffer_final(struct extent_buffer *eb)
>> +{
>> +	struct extent_io_tree *tree = eb->tree;
>> +
>> +	BUG_ON(eb->refs);
>> +	BUG_ON(tree->cache_size < eb->len);
>> +	list_del_init(&eb->lru);
>> +	if (!(eb->flags & EXTENT_BUFFER_DUMMY)) {
>> +		remove_cache_extent(&tree->cache, &eb->cache_node);
>> +		tree->cache_size -= eb->len;
>> +	}
>> +	free(eb);
>> +}
>> +
>> +static void free_extent_buffer_internal(struct extent_buffer *eb, int free_now)
> 
> nit: free_ow -> boolean

Ack.  There should be a bunch of int -> bool conversions elsewhere too.

>> @@ -619,6 +650,21 @@ struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
>>  	return eb;
>>  }
>>  
>> +static void
>> +trim_extent_buffer_cache(struct extent_io_tree *tree)
>> +{
>> +	struct extent_buffer *eb, *tmp;
>> +	u64 count = 0;
> 
> count seems to be a leftover from something, so you could remove it

Yep, that was during debugging.  Removed.

>> @@ -2521,3 +2522,14 @@ u8 rand_u8(void)
>>  void btrfs_config_init(void)
>>  {
>>  }
>> +
>> +unsigned long total_memory(void)
> 
> perhaps rename to total_memory_bytes and return the memory size in
> bytes. Returning them in kilobytes seems rather arbitrary. That way
> you'd save the constant *1024 to turn the kbs in bytes in the callers
> (currently only in extent_io_tree_init())
> 

Ack.

Thanks,

-Jeff


-- 
Jeff Mahoney
SUSE Labs


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 5/7] btrfs-progs: backref: add list_first_pref helper
  2017-07-26  7:08   ` Nikolay Borisov
@ 2017-07-26 13:22     ` Jeff Mahoney
  2017-07-26 13:25       ` Jeff Mahoney
  0 siblings, 1 reply; 14+ messages in thread
From: Jeff Mahoney @ 2017-07-26 13:22 UTC (permalink / raw)
  To: Nikolay Borisov, linux-btrfs


[-- Attachment #1.1: Type: text/plain, Size: 869 bytes --]

On 7/26/17 3:08 AM, Nikolay Borisov wrote:
> 
> 
> On 25.07.2017 23:51, jeffm@suse.com wrote:
>> From: Jeff Mahoney <jeffm@suse.com>
>>
>> ---
>>  backref.c | 11 +++++++----
>>  1 file changed, 7 insertions(+), 4 deletions(-)
>>
>> diff --git a/backref.c b/backref.c
>> index ac1b506..be3376a 100644
>> --- a/backref.c
>> +++ b/backref.c
>> @@ -130,6 +130,11 @@ struct __prelim_ref {
>>  	u64 wanted_disk_byte;
>>  };
>>  
>> +static struct __prelim_ref *list_first_pref(struct list_head *head)
>> +{
>> +	return list_first_entry(head, struct __prelim_ref, list);
>> +}
>> +
> 
> I think this just adds one more level of abstraction with no real
> benefit whatsoever. Why not drop the patch entirely.

Ack.  I thought it might be more readable but it ends up taking the same
number of characters.

-Jeff

-- 
Jeff Mahoney
SUSE Labs


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 5/7] btrfs-progs: backref: add list_first_pref helper
  2017-07-26 13:22     ` Jeff Mahoney
@ 2017-07-26 13:25       ` Jeff Mahoney
  0 siblings, 0 replies; 14+ messages in thread
From: Jeff Mahoney @ 2017-07-26 13:25 UTC (permalink / raw)
  To: Nikolay Borisov, linux-btrfs


[-- Attachment #1.1: Type: text/plain, Size: 1293 bytes --]

On 7/26/17 9:22 AM, Jeff Mahoney wrote:
> On 7/26/17 3:08 AM, Nikolay Borisov wrote:
>>
>>
>> On 25.07.2017 23:51, jeffm@suse.com wrote:
>>> From: Jeff Mahoney <jeffm@suse.com>
>>>
>>> ---
>>>  backref.c | 11 +++++++----
>>>  1 file changed, 7 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/backref.c b/backref.c
>>> index ac1b506..be3376a 100644
>>> --- a/backref.c
>>> +++ b/backref.c
>>> @@ -130,6 +130,11 @@ struct __prelim_ref {
>>>  	u64 wanted_disk_byte;
>>>  };
>>>  
>>> +static struct __prelim_ref *list_first_pref(struct list_head *head)
>>> +{
>>> +	return list_first_entry(head, struct __prelim_ref, list);
>>> +}
>>> +
>>
>> I think this just adds one more level of abstraction with no real
>> benefit whatsoever. Why not drop the patch entirely.
> 
> Ack.  I thought it might be more readable but it ends up taking the same
> number of characters.

Actually, no, it doesn't.  That's only true if using 'head' as the list head
as in the helper.

It ends up being

	ref = list_first_pref(&prefstate->pending_missing_keys);
vs
	ref = list_first_entry(&prefstate->pending_missing_keys,
                               struct __prelim_ref, list);

and I have to say I prefer reading the former.

-Jeff

-- 
Jeff Mahoney
SUSE Labs


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/7] btrfs-progs: extent-cache: actually cache extent buffers
  2017-07-25 20:51 ` [PATCH 3/7] btrfs-progs: extent-cache: actually cache extent buffers jeffm
  2017-07-26  7:00   ` Nikolay Borisov
@ 2017-08-22 15:44   ` David Sterba
  1 sibling, 0 replies; 14+ messages in thread
From: David Sterba @ 2017-08-22 15:44 UTC (permalink / raw)
  To: jeffm; +Cc: linux-btrfs

On Tue, Jul 25, 2017 at 04:51:34PM -0400, jeffm@suse.com wrote:
> From: Jeff Mahoney <jeffm@suse.com>
> 
> We have the infrastructure to cache extent buffers but we don't actually
> do the caching.  As soon as the last reference is dropped, the buffer
> is dropped.  This patch keeps the extent buffers around until the max
> cache size is reached (defaults to 25% of memory) and then it drops
> the last 10% of the LRU to free up cache space for reallocation.  The
> cache size is configurable (for use by e.g. lowmem) when the cache is
> initialized.
> 
> Signed-off-by: Jeff Mahoney <jeffm@suse.com>

I've started to merge the series, changed code according to the review.
In this patch, test-mkfs and test-check fail (segfaults and assertions).
A debugging build or asan (make D=all,asan) does not reproduce the
errors so this will need to be found by other means.

I also fixed some trivial coding style issues, so the changes are now in
the branch
https://github.com/kdave/btrfs-progs/tree/ext/jeffm/extent-cache

Please use this as a starting point, I'm fine with resending just this
patch or an incremental.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/7] btrfs-progs: check: supplement extent backref list with rbtree
  2017-07-25 20:51 [PATCH 1/7] btrfs-progs: check: supplement extent backref list with rbtree jeffm
                   ` (5 preceding siblings ...)
  2017-07-25 20:51 ` [PATCH 7/7] btrfs-progs: backref: use separate list for indirect refs jeffm
@ 2017-09-29 17:21 ` David Sterba
  6 siblings, 0 replies; 14+ messages in thread
From: David Sterba @ 2017-09-29 17:21 UTC (permalink / raw)
  To: jeffm; +Cc: linux-btrfs

On Tue, Jul 25, 2017 at 04:51:32PM -0400, jeffm@suse.com wrote:
> From: Jeff Mahoney <jeffm@suse.com>
> 
> For the pathlogical case, like xfstests generic/297 that creates a
> large file consisting of one, repeating reflinked extent, fsck can
> take hours.  The root cause is that calling find_data_backref while
> iterating the extent records is an O(n^2) algorithm.  For my
> example test run, n was 2*2^20 and fsck was at 8 hours and counting.
> 
> This patch supplements the list with an rbtree and drops the runtime
> of that testcase to about 20 seconds.
> 
> A previous version of this patch introduced a regression that would
> have corrupted file systems during repair.  It was traced to the
> compare algorithm honoring ->bytes regardless of whether the
> reference had been found and a failure to reinsert nodes after
> the target reference was found.
> 
> Signed-off-by: Jeff Mahoney <jeffm@suse.com>

Josef has fixed the crash so I've added the patchset to devel.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-09-29 17:23 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-25 20:51 [PATCH 1/7] btrfs-progs: check: supplement extent backref list with rbtree jeffm
2017-07-25 20:51 ` [PATCH 2/7] btrfs-progs: check: switch to iterating over the backref_tree jeffm
2017-07-25 20:51 ` [PATCH 3/7] btrfs-progs: extent-cache: actually cache extent buffers jeffm
2017-07-26  7:00   ` Nikolay Borisov
2017-07-26 13:21     ` Jeff Mahoney
2017-08-22 15:44   ` David Sterba
2017-07-25 20:51 ` [PATCH 4/7] btrfs-progs: backref: push state tracking into a helper structure jeffm
2017-07-25 20:51 ` [PATCH 5/7] btrfs-progs: backref: add list_first_pref helper jeffm
2017-07-26  7:08   ` Nikolay Borisov
2017-07-26 13:22     ` Jeff Mahoney
2017-07-26 13:25       ` Jeff Mahoney
2017-07-25 20:51 ` [PATCH 6/7] btrfs-progs: backref: use separate list for missing keys jeffm
2017-07-25 20:51 ` [PATCH 7/7] btrfs-progs: backref: use separate list for indirect refs jeffm
2017-09-29 17:21 ` [PATCH 1/7] btrfs-progs: check: supplement extent backref list with rbtree David Sterba

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.