All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mark Fasheh <mfasheh@suse.de>
To: linux-btrfs@vger.kernel.org
Cc: David Sterba <dsterba@suse.cz>,
	Qu Wenruo <quwenruo@cn.fujitsu.com>,
	Mark Fasheh <mfasheh@suse.de>
Subject: [PATCH 2/2] btrfs-progs: btrfsck: verify qgroups above level 0
Date: Wed, 15 Jun 2016 15:50:02 -0700	[thread overview]
Message-ID: <1466031002-14533-3-git-send-email-mfasheh@suse.de> (raw)
In-Reply-To: <1466031002-14533-1-git-send-email-mfasheh@suse.de>

At the moment we only check subvolume quota groups (level 0). With this
patch we can check groups above 0, thus verifying the entire qgroup
hierarchy on a file system.  The accounting portion of this patch is modeled
after the kernel - we are essentially reimplementing the 'quota rescan' case
here. Most other sections of this code went unchanged, in particular the
root counting works independently of the accounting.

Signed-off-by: Mark Fasheh <mfasheh@suse.de>
---
 qgroup-verify.c | 305 ++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 251 insertions(+), 54 deletions(-)

diff --git a/qgroup-verify.c b/qgroup-verify.c
index 7b78504..4d2ea66 100644
--- a/qgroup-verify.c
+++ b/qgroup-verify.c
@@ -35,7 +35,8 @@
 /*#define QGROUP_VERIFY_DEBUG*/
 static unsigned long tot_extents_scanned = 0;
 
-static void add_bytes(u64 root_objectid, u64 num_bytes, int exclusive);
+struct qgroup_count;
+static struct qgroup_count *find_count(u64 qgroupid);
 
 struct qgroup_info {
 	u64 referenced;
@@ -54,6 +55,14 @@ struct qgroup_count {
 	struct qgroup_info info;
 
 	struct rb_node rb_node;
+
+	struct list_head groups;  /* parents when we are a child group */
+	struct list_head members; /* children when we are a parent
+				     group (not currently used but
+				     maintained to mirror kernel
+				     handling of qgroups) */
+
+	u64 cur_refcnt;
 };
 
 static struct counts_tree {
@@ -66,6 +75,39 @@ static struct counts_tree {
 static struct rb_root by_bytenr = RB_ROOT;
 
 /*
+ * glue structure to represent the relations between qgroups. Mirrored
+ * from kernel.
+ */
+struct btrfs_qgroup_list {
+	struct list_head next_group;
+	struct list_head next_member;
+	struct qgroup_count *group; /* Parent group */
+	struct qgroup_count *member;
+};
+
+/* allow us to reset ref counts during accounting without zeroing each group */
+static u64 qgroup_seq = 1ULL;
+
+static inline void update_cur_refcnt(struct qgroup_count *c)
+{
+	if (c->cur_refcnt < qgroup_seq)
+		c->cur_refcnt = qgroup_seq;
+	c->cur_refcnt += 1;
+}
+
+static inline u64 group_get_cur_refcnt(struct qgroup_count *c)
+{
+	if (c->cur_refcnt < qgroup_seq)
+		return 0;
+	return c->cur_refcnt - qgroup_seq;
+}
+
+static void inc_qgroup_seq(int root_count)
+{
+	qgroup_seq += root_count + 1;
+}
+
+/*
  * List of interior tree blocks. We walk this list after loading the
  * extent tree to resolve implied refs. For each interior node we'll
  * place a shared ref in the ref tree against each child object. This
@@ -296,9 +338,10 @@ static void find_parent_roots(struct ulist *roots, u64 parent)
 	}
 
 	do {
-		if (ref->root)
-			ulist_add(roots, ref->root, 0, 0);
-		else
+		if (ref->root) {
+			if (is_fstree(ref->root))
+				ulist_add(roots, ref->root, 0, 0);
+		} else
 			find_parent_roots(roots, ref->parent);
 
 		node = rb_next(node);
@@ -307,6 +350,116 @@ static void find_parent_roots(struct ulist *roots, u64 parent)
 	} while (node && ref->bytenr == parent);
 }
 
+static int account_one_extent(struct ulist *roots, u64 bytenr, u64 num_bytes)
+{
+	int ret;
+	u64 id, nr_roots, nr_refs;
+	struct qgroup_count *count;
+	struct ulist *counts = ulist_alloc(0);
+	struct ulist *tmp = ulist_alloc(0);
+	struct ulist_iterator uiter;
+	struct ulist_iterator tmp_uiter;
+	struct ulist_node *unode;
+	struct ulist_node *tmp_unode;
+	struct btrfs_qgroup_list *glist;
+
+	if (!counts || !tmp) {
+		ulist_free(counts);
+		ulist_free(tmp);
+		return ENOMEM;
+	}
+
+	ULIST_ITER_INIT(&uiter);
+	while ((unode = ulist_next(roots, &uiter))) {
+		BUG_ON(unode->val == 0ULL);
+
+		/*
+		 * For each root, find their corresponding tracking group and
+		 * add it to our qgroups list.
+		 */
+		count = find_count(unode->val);
+		if (!count)
+			continue;
+
+		BUG_ON(!is_fstree(unode->val));
+		ret = ulist_add(counts, count->qgroupid, ptr_to_u64(count), 0);
+		if (ret < 0)
+			goto out;
+
+		/*
+		 * Now we look for parents (and parents of
+		 * those...). Use a tmp ulist here to avoid re-walking
+		 * (and re-incrementing) our already added items on every
+		 * loop iteration.
+		 */
+		ulist_reinit(tmp);
+		ret = ulist_add(tmp, count->qgroupid, ptr_to_u64(count), 0);
+		if (ret < 0)
+			goto out;
+
+		ULIST_ITER_INIT(&tmp_uiter);
+		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
+			/* bump the refcount on a node every time we
+			 * see it. */
+			count = u64_to_ptr(tmp_unode->aux);
+			update_cur_refcnt(count);
+
+			list_for_each_entry(glist, &count->groups, next_group) {
+				struct qgroup_count *parent;
+				parent = glist->group;
+				id = parent->qgroupid;
+
+				BUG_ON(!count);
+
+				ret = ulist_add(counts, id, ptr_to_u64(parent),
+						0);
+				if (ret < 0)
+					goto out;
+				ret = ulist_add(tmp, id, ptr_to_u64(parent),
+						0);
+				if (ret < 0)
+					goto out;
+			}
+		}
+	}
+
+	/*
+	 * Now that we have gathered up and counted all the groups, we
+	 * can add bytes for this ref.
+	 */
+	nr_roots = roots->nnodes;
+	ULIST_ITER_INIT(&uiter);
+	while ((unode = ulist_next(counts, &uiter))) {
+		count = u64_to_ptr(unode->aux);
+
+		nr_refs = group_get_cur_refcnt(count);
+		if (nr_refs) {
+			count->info.referenced += num_bytes;
+			count->info.referenced_compressed += num_bytes;
+
+			if (nr_refs == nr_roots) {
+				count->info.exclusive += num_bytes;
+				count->info.exclusive_compressed += num_bytes;
+			}
+		}
+#ifdef QGROUP_VERIFY_DEBUG
+		printf("account (%llu, %llu), qgroup %llu/%llu, rfer %llu,"
+		       " excl %llu, refs %llu, roots %llu\n", bytenr, num_bytes,
+		       btrfs_qgroup_level(count->qgroupid),
+		       btrfs_qgroup_subvid(count->qgroupid),
+		       count->info.referenced, count->info.exclusive, nr_refs,
+		       nr_roots);
+#endif
+	}
+
+	inc_qgroup_seq(roots->nnodes);
+	ret = 0;
+out:
+	ulist_free(counts);
+	ulist_free(tmp);
+	return ret;
+}
+
 static void print_subvol_info(u64 subvolid, u64 bytenr, u64 num_bytes,
 			      struct ulist *roots);
 /*
@@ -318,18 +471,15 @@ static void print_subvol_info(u64 subvolid, u64 bytenr, u64 num_bytes,
  * - resolve all possible roots for shared refs, insert each
  *   of those into ref_roots ulist (this is a recursive process)
  *
- * - Walk ref_roots ulist, adding extent bytes to each qgroup count that
- *    cooresponds to a found root.
+ * - With all roots resolved we can account the ref - this is done in
+ *   account_one_extent().
  */
 static void account_all_refs(int do_qgroups, u64 search_subvol)
 {
-	int exclusive;
 	struct ref *ref;
 	struct rb_node *node;
 	u64 bytenr, num_bytes;
 	struct ulist *roots = ulist_alloc(0);
-	struct ulist_iterator uiter;
-	struct ulist_node *unode;
 
 	node = rb_first(&by_bytenr);
 	while (node) {
@@ -347,10 +497,14 @@ static void account_all_refs(int do_qgroups, u64 search_subvol)
 		do {
 			BUG_ON(ref->bytenr != bytenr);
 			BUG_ON(ref->num_bytes != num_bytes);
-			if (ref->root)
-				ulist_add(roots, ref->root, 0, 0);
-			else
+			if (ref->root) {
+				if (is_fstree(ref->root)) {
+					if (ulist_add(roots, ref->root, 0, 0) < 0)
+						goto enomem;
+				}
+			} else {
 				find_parent_roots(roots, ref->parent);
+			}
 
 			/*
 			 * When we leave this inner loop, node is set
@@ -362,29 +516,23 @@ static void account_all_refs(int do_qgroups, u64 search_subvol)
 				ref = rb_entry(node, struct ref, bytenr_node);
 		} while (node && ref->bytenr == bytenr);
 
-		/*
-		 * Now that we have all roots, we can properly account
-		 * this extent against the corresponding qgroups.
-		 */
-		if (roots->nnodes == 1)
-			exclusive = 1;
-		else
-			exclusive = 0;
-
 		if (search_subvol)
 			print_subvol_info(search_subvol, bytenr, num_bytes,
 					  roots);
 
-		ULIST_ITER_INIT(&uiter);
-		while ((unode = ulist_next(roots, &uiter))) {
-			BUG_ON(unode->val == 0ULL);
-			/* We only want to account fs trees */
-			if (is_fstree(unode->val) && do_qgroups)
-				add_bytes(unode->val, num_bytes, exclusive);
-		}
+		if (!do_qgroups)
+			continue;
+
+		if (account_one_extent(roots, bytenr, num_bytes))
+			goto enomem;
 	}
 
 	ulist_free(roots);
+	return;
+enomem:
+	fprintf(stderr,
+		"ERROR: Out of memory while accounting refs for qgroups!\n");
+	abort();
 }
 
 static u64 resolve_one_root(u64 bytenr)
@@ -668,6 +816,8 @@ static struct qgroup_count *alloc_count(struct btrfs_disk_key *key,
 		item->exclusive = btrfs_qgroup_info_exclusive(leaf, disk);
 		item->exclusive_compressed =
 			btrfs_qgroup_info_exclusive_compressed(leaf, disk);
+		INIT_LIST_HEAD(&c->groups);
+		INIT_LIST_HEAD(&c->members);
 
 		if (insert_count(c)) {
 			free(c);
@@ -677,29 +827,30 @@ static struct qgroup_count *alloc_count(struct btrfs_disk_key *key,
 	return c;
 }
 
-static void add_bytes(u64 root_objectid, u64 num_bytes, int exclusive)
+static int add_qgroup_relation(u64 memberid, u64 parentid)
 {
-	struct qgroup_count *count = find_count(root_objectid);
-	struct qgroup_info *qg;
+	struct qgroup_count *member;
+	struct qgroup_count *parent;
+	struct btrfs_qgroup_list *list;
 
-	BUG_ON(num_bytes < 4096); /* Random sanity check. */
+	if (memberid > parentid)
+		return 0;
 
-	if (!count)
-		return;
+	member = find_count(memberid);
+	parent = find_count(parentid);
+	if (!member || !parent)
+		return -ENOENT;
 
-	qg = &count->info;
+	list = calloc(1, sizeof(*list));
+	if (!list)
+		return -ENOMEM;
 
-	qg->referenced += num_bytes;
-	/*
-	 * count of compressed bytes is unimplemented, so we do the
-	 * same as kernel.
-	 */
-	qg->referenced_compressed += num_bytes;
+	list->group = parent;
+	list->member = member;
+	list_add_tail(&list->next_group, &member->groups);
+	list_add_tail(&list->next_member, &parent->members);
 
-	if (exclusive) {
-		qg->exclusive += num_bytes;
-		qg->exclusive_compressed += num_bytes;
-	}
+	return 0;
 }
 
 static void read_qgroup_status(struct btrfs_path *path,
@@ -728,11 +879,18 @@ static int load_quota_info(struct btrfs_fs_info *info)
 	struct btrfs_qgroup_info_item *item;
 	struct qgroup_count *count;
 	int i, nr;
+	int search_relations = 0;
 
+loop:
+	/*
+	 * Do 2 passes, the first allocates group counts and reads status
+	 * items. The 2nd pass picks up relation items and glues them
+	 * to their respective count structures.
+	 */
 	btrfs_init_path(&path);
 
 	key.offset = 0;
-	key.objectid = 0;
+	key.objectid = search_relations ? 0 : BTRFS_QGROUP_RELATION_KEY;
 	key.type = 0;
 
 	ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
@@ -749,17 +907,27 @@ static int load_quota_info(struct btrfs_fs_info *info)
 			btrfs_item_key(leaf, &disk_key, i);
 			btrfs_disk_key_to_cpu(&key, &disk_key);
 
+			if (search_relations) {
+				if (key.type == BTRFS_QGROUP_RELATION_KEY) {
+					ret = add_qgroup_relation(key.objectid,
+								  key.offset);
+					if (ret) {
+						fprintf(stderr,
+						"ERROR: out of memory\n");
+						goto out;
+					}
+				}
+				continue;
+			}
+
 			if (key.type == BTRFS_QGROUP_STATUS_KEY) {
 				read_qgroup_status(&path, &counts);
 				continue;
 			}
-			if (key.type == BTRFS_QGROUP_RELATION_KEY)
-				printf("Ignoring qgroup relation key %llu\n",
-				       key.objectid);
 
 			/*
-			 * Ignore: BTRFS_QGROUP_LIMIT_KEY,
-			 * 	   BTRFS_QGROUP_RELATION_KEY
+			 * At this point, we can ignore anything that
+			 * isn't a qgroup info.
 			 */
 			if (key.type != BTRFS_QGROUP_INFO_KEY)
 				continue;
@@ -791,6 +959,12 @@ static int load_quota_info(struct btrfs_fs_info *info)
 
 	ret = 0;
 	btrfs_release_path(&path);
+
+	if (!search_relations) {
+		search_relations = 1;
+		goto loop;
+	}
+
 out:
 	return ret;
 }
@@ -1035,6 +1209,11 @@ static void print_fields_signed(long long bytes,
 	       prefix, type, bytes, type, bytes_compressed);
 }
 
+static inline int qgroup_printable(struct qgroup_count *c)
+{
+	return !!(c->subvol_exists || btrfs_qgroup_level(c->qgroupid));
+}
+
 static int report_qgroup_difference(struct qgroup_count *count, int verbose)
 {
 	int is_different;
@@ -1045,9 +1224,10 @@ static int report_qgroup_difference(struct qgroup_count *count, int verbose)
 
 	is_different = excl_diff || ref_diff;
 
-	if (verbose || (is_different && count->subvol_exists)) {
-		printf("Counts for qgroup id: %llu %s\n",
-		       (unsigned long long)count->qgroupid,
+	if (verbose || (is_different && qgroup_printable(count))) {
+		printf("Counts for qgroup id: %llu/%llu %s\n",
+		       btrfs_qgroup_level(count->qgroupid),
+		       btrfs_qgroup_subvid(count->qgroupid),
 		       is_different ? "are different" : "");
 
 		print_fields(info->referenced, info->referenced_compressed,
@@ -1099,10 +1279,27 @@ void free_qgroup_counts(void)
 {
 	struct rb_node *node;
 	struct qgroup_count *c;
+	struct btrfs_qgroup_list *glist, *tmpglist;
+
 	node = rb_first(&counts.root);
 	while (node) {
 		c = rb_entry(node, struct qgroup_count, rb_node);
+
+		list_for_each_entry_safe(glist, tmpglist, &c->groups,
+					 next_group) {
+			list_del(&glist->next_group);
+			list_del(&glist->next_member);
+			free(glist);
+		}
+		list_for_each_entry_safe(glist, tmpglist, &c->members,
+					 next_group) {
+			list_del(&glist->next_group);
+			list_del(&glist->next_member);
+			free(glist);
+		}
+
 		node = rb_next(node);
+
 		rb_erase(&c->rb_node, &counts.root);
 		free(c);
 	}
-- 
2.1.4


  parent reply	other threads:[~2016-06-15 22:50 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-15 22:50 [PATCH 0/2] btrfs-progs: qgroup verification update Mark Fasheh
2016-06-15 22:50 ` [PATCH 1/2] btrfs-progs: free qgroup counts in btrfsck Mark Fasheh
2016-06-17 15:34   ` David Sterba
2016-06-15 22:50 ` Mark Fasheh [this message]
2016-06-17 15:34   ` [PATCH 2/2] btrfs-progs: btrfsck: verify qgroups above level 0 David Sterba

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1466031002-14533-3-git-send-email-mfasheh@suse.de \
    --to=mfasheh@suse.de \
    --cc=dsterba@suse.cz \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=quwenruo@cn.fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.