Linux-BTRFS Archive on lore.kernel.org
 help / color / Atom feed
From: Dennis Zhou <dennis@kernel.org>
To: David Sterba <dsterba@suse.com>, Chris Mason <clm@fb.com>,
	Josef Bacik <josef@toxicpanda.com>,
	Omar Sandoval <osandov@osandov.com>
Cc: kernel-team@fb.com, linux-btrfs@vger.kernel.org,
	Dennis Zhou <dennis@kernel.org>
Subject: [PATCH 03/22] btrfs: keep track of which extents have been discarded
Date: Wed, 23 Oct 2019 18:52:57 -0400
Message-ID: <63a78fbbd4d742ab13484d0cdad2264173ca7411.1571865774.git.dennis@kernel.org> (raw)
In-Reply-To: <cover.1571865774.git.dennis@kernel.org>
In-Reply-To: <cover.1571865774.git.dennis@kernel.org>

Async discard will use the free space cache as backing knowledge for
which extents to discard. This patch plumbs knowledge about which
extents need to be discarded into the free space cache from
unpin_extent_range().

An untrimmed extent can merge with everything as this is a new region.
Absorbing trimmed extents is a tradeoff to for greater coalescing which
makes life better for find_free_extent(). Additionally, it seems the
size of a trim isn't as problematic as the trim io itself.

When reading in the free space cache from disk, if sync is set, mark all
extents as trimmed. The current code ensures at transaction commit that
all free space is trimmed when sync is set, so this reflects that.

Signed-off-by: Dennis Zhou <dennis@kernel.org>
---
 fs/btrfs/extent-tree.c      | 15 +++++++---
 fs/btrfs/free-space-cache.c | 60 ++++++++++++++++++++++++++++++++-----
 fs/btrfs/free-space-cache.h | 17 ++++++++++-
 fs/btrfs/inode-map.c        | 13 ++++----
 4 files changed, 87 insertions(+), 18 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 77a5904756c5..6a40bba3cb19 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2783,6 +2783,7 @@ fetch_cluster_info(struct btrfs_fs_info *fs_info,
 
 static int unpin_extent_range(struct btrfs_fs_info *fs_info,
 			      u64 start, u64 end,
+			      enum btrfs_trim_state trim_state,
 			      const bool return_free_space)
 {
 	struct btrfs_block_group_cache *cache = NULL;
@@ -2816,7 +2817,9 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
 		if (start < cache->last_byte_to_unpin) {
 			len = min(len, cache->last_byte_to_unpin - start);
 			if (return_free_space)
-				btrfs_add_free_space(cache, start, len);
+				__btrfs_add_free_space(fs_info,
+						       cache->free_space_ctl,
+						       start, len, trim_state);
 		}
 
 		start += len;
@@ -2894,6 +2897,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
 
 	while (!trans->aborted) {
 		struct extent_state *cached_state = NULL;
+		enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
 
 		mutex_lock(&fs_info->unused_bg_unpin_mutex);
 		ret = find_first_extent_bit(unpin, 0, &start, &end,
@@ -2903,12 +2907,14 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
 			break;
 		}
 
-		if (btrfs_test_opt(fs_info, DISCARD_SYNC))
+		if (btrfs_test_opt(fs_info, DISCARD_SYNC)) {
 			ret = btrfs_discard_extent(fs_info, start,
 						   end + 1 - start, NULL);
+			trim_state = BTRFS_TRIM_STATE_TRIMMED;
+		}
 
 		clear_extent_dirty(unpin, start, end, &cached_state);
-		unpin_extent_range(fs_info, start, end, true);
+		unpin_extent_range(fs_info, start, end, trim_state, true);
 		mutex_unlock(&fs_info->unused_bg_unpin_mutex);
 		free_extent_state(cached_state);
 		cond_resched();
@@ -5512,7 +5518,8 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
 int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
 				   u64 start, u64 end)
 {
-	return unpin_extent_range(fs_info, start, end, false);
+	return unpin_extent_range(fs_info, start, end,
+				  BTRFS_TRIM_STATE_UNTRIMMED, false);
 }
 
 /*
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index d54dcd0ab230..d7f0cb961496 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -747,6 +747,14 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
 			goto free_cache;
 		}
 
+		/*
+		 * Sync discard ensures that the free space cache is always
+		 * trimmed.  So when reading this in, the state should reflect
+		 * that.
+		 */
+		if (btrfs_test_opt(fs_info, DISCARD_SYNC))
+			e->trim_state = BTRFS_TRIM_STATE_TRIMMED;
+
 		if (!e->bytes) {
 			kmem_cache_free(btrfs_free_space_cachep, e);
 			goto free_cache;
@@ -2157,6 +2165,23 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
 	return ret;
 }
 
+/*
+ * Free space merging rules:
+ *  1) Merge trimmed areas together
+ *  2) Let untrimmed areas coalesce with trimmed areas
+ *  3) Always pull neighboring regions from bitmaps
+ *
+ * The above rules are for when we merge free space based on btrfs_trim_state.
+ * Rules 2 and 3 are subtle because they are suboptimal, but are done for the
+ * same reason: to promote larger extent regions which makes life easier for
+ * find_free_extent().  Rule 2 enables coalescing based on the common path
+ * being returning free space from btrfs_finish_extent_commit().  So when free
+ * space is trimmed, it will prevent aggregating trimmed new region and
+ * untrimmed regions in the rb_tree.  Rule 3 is purely to obtain larger extents
+ * and provide find_free_extent() with the largest extents possible hoping for
+ * the reuse path.
+ *
+ */
 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
 			  struct btrfs_free_space *info, bool update_stat)
 {
@@ -2165,6 +2190,7 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
 	bool merged = false;
 	u64 offset = info->offset;
 	u64 bytes = info->bytes;
+	const bool is_trimmed = btrfs_free_space_trimmed(info);
 
 	/*
 	 * first we want to see if there is free space adjacent to the range we
@@ -2178,7 +2204,9 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
 	else
 		left_info = tree_search_offset(ctl, offset - 1, 0, 0);
 
-	if (right_info && !right_info->bitmap) {
+	/* See try_merge_free_space() comment. */
+	if (right_info && !right_info->bitmap &&
+	    (!is_trimmed || btrfs_free_space_trimmed(right_info))) {
 		if (update_stat)
 			unlink_free_space(ctl, right_info);
 		else
@@ -2188,8 +2216,10 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
 		merged = true;
 	}
 
+	/* See try_merge_free_space() comment. */
 	if (left_info && !left_info->bitmap &&
-	    left_info->offset + left_info->bytes == offset) {
+	    left_info->offset + left_info->bytes == offset &&
+	    (!is_trimmed || btrfs_free_space_trimmed(left_info))) {
 		if (update_stat)
 			unlink_free_space(ctl, left_info);
 		else
@@ -2225,6 +2255,10 @@ static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl,
 	bytes = (j - i) * ctl->unit;
 	info->bytes += bytes;
 
+	/* See try_merge_free_space() comment. */
+	if (!btrfs_free_space_trimmed(bitmap))
+		info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
+
 	if (update_stat)
 		bitmap_clear_bits(ctl, bitmap, end, bytes);
 	else
@@ -2278,6 +2312,10 @@ static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl,
 	info->offset -= bytes;
 	info->bytes += bytes;
 
+	/* See try_merge_free_space() comment. */
+	if (!btrfs_free_space_trimmed(bitmap))
+		info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
+
 	if (update_stat)
 		bitmap_clear_bits(ctl, bitmap, info->offset, bytes);
 	else
@@ -2327,7 +2365,8 @@ static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl,
 
 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
 			   struct btrfs_free_space_ctl *ctl,
-			   u64 offset, u64 bytes)
+			   u64 offset, u64 bytes,
+			   enum btrfs_trim_state trim_state)
 {
 	struct btrfs_free_space *info;
 	int ret = 0;
@@ -2338,6 +2377,7 @@ int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
 
 	info->offset = offset;
 	info->bytes = bytes;
+	info->trim_state = trim_state;
 	RB_CLEAR_NODE(&info->offset_index);
 
 	spin_lock(&ctl->tree_lock);
@@ -2385,7 +2425,7 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
 {
 	return __btrfs_add_free_space(block_group->fs_info,
 				      block_group->free_space_ctl,
-				      bytenr, size);
+				      bytenr, size, 0);
 }
 
 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
@@ -2460,8 +2500,11 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
 			}
 			spin_unlock(&ctl->tree_lock);
 
-			ret = btrfs_add_free_space(block_group, offset + bytes,
-						   old_end - (offset + bytes));
+			ret = __btrfs_add_free_space(block_group->fs_info,
+						     ctl,
+						     offset + bytes,
+						     old_end - (offset + bytes),
+						     info->trim_state);
 			WARN_ON(ret);
 			goto out;
 		}
@@ -2630,6 +2673,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
 	u64 ret = 0;
 	u64 align_gap = 0;
 	u64 align_gap_len = 0;
+	enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
 
 	spin_lock(&ctl->tree_lock);
 	entry = find_free_space(ctl, &offset, &bytes_search,
@@ -2646,6 +2690,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
 		unlink_free_space(ctl, entry);
 		align_gap_len = offset - entry->offset;
 		align_gap = entry->offset;
+		align_gap_trim_state = entry->trim_state;
 
 		entry->offset = offset + bytes;
 		WARN_ON(entry->bytes < bytes + align_gap_len);
@@ -2661,7 +2706,8 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
 
 	if (align_gap_len)
 		__btrfs_add_free_space(block_group->fs_info, ctl,
-				       align_gap, align_gap_len);
+				       align_gap, align_gap_len,
+				       align_gap_trim_state);
 	return ret;
 }
 
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 39c32c8fc24f..98a568dbd5e7 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -6,6 +6,14 @@
 #ifndef BTRFS_FREE_SPACE_CACHE_H
 #define BTRFS_FREE_SPACE_CACHE_H
 
+/*
+ * This is the trim state of an extent or bitmap.
+ */
+enum btrfs_trim_state {
+	BTRFS_TRIM_STATE_TRIMMED,
+	BTRFS_TRIM_STATE_UNTRIMMED,
+};
+
 struct btrfs_free_space {
 	struct rb_node offset_index;
 	u64 offset;
@@ -13,8 +21,14 @@ struct btrfs_free_space {
 	u64 max_extent_size;
 	unsigned long *bitmap;
 	struct list_head list;
+	enum btrfs_trim_state trim_state;
 };
 
+static inline bool btrfs_free_space_trimmed(struct btrfs_free_space *info)
+{
+	return (info->trim_state == BTRFS_TRIM_STATE_TRIMMED);
+}
+
 struct btrfs_free_space_ctl {
 	spinlock_t tree_lock;
 	struct rb_root free_space_offset;
@@ -84,7 +98,8 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group);
 int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
 			   struct btrfs_free_space_ctl *ctl,
-			   u64 bytenr, u64 size);
+			   u64 bytenr, u64 size,
+			   enum btrfs_trim_state trim_state);
 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
 			 u64 bytenr, u64 size);
 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 63cad7865d75..00e225de4fe6 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -107,7 +107,7 @@ static int caching_kthread(void *data)
 
 		if (last != (u64)-1 && last + 1 != key.objectid) {
 			__btrfs_add_free_space(fs_info, ctl, last + 1,
-					       key.objectid - last - 1);
+					       key.objectid - last - 1, 0);
 			wake_up(&root->ino_cache_wait);
 		}
 
@@ -118,7 +118,7 @@ static int caching_kthread(void *data)
 
 	if (last < root->highest_objectid - 1) {
 		__btrfs_add_free_space(fs_info, ctl, last + 1,
-				       root->highest_objectid - last - 1);
+				       root->highest_objectid - last - 1, 0);
 	}
 
 	spin_lock(&root->ino_cache_lock);
@@ -175,7 +175,8 @@ static void start_caching(struct btrfs_root *root)
 	ret = btrfs_find_free_objectid(root, &objectid);
 	if (!ret && objectid <= BTRFS_LAST_FREE_OBJECTID) {
 		__btrfs_add_free_space(fs_info, ctl, objectid,
-				       BTRFS_LAST_FREE_OBJECTID - objectid + 1);
+				       BTRFS_LAST_FREE_OBJECTID - objectid + 1,
+				       0);
 		wake_up(&root->ino_cache_wait);
 	}
 
@@ -221,7 +222,7 @@ void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
 		return;
 again:
 	if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
-		__btrfs_add_free_space(fs_info, pinned, objectid, 1);
+		__btrfs_add_free_space(fs_info, pinned, objectid, 1, 0);
 	} else {
 		down_write(&fs_info->commit_root_sem);
 		spin_lock(&root->ino_cache_lock);
@@ -234,7 +235,7 @@ void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
 
 		start_caching(root);
 
-		__btrfs_add_free_space(fs_info, pinned, objectid, 1);
+		__btrfs_add_free_space(fs_info, pinned, objectid, 1, 0);
 
 		up_write(&fs_info->commit_root_sem);
 	}
@@ -281,7 +282,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
 		spin_unlock(rbroot_lock);
 		if (count)
 			__btrfs_add_free_space(root->fs_info, ctl,
-					       info->offset, count);
+					       info->offset, count, 0);
 		kmem_cache_free(btrfs_free_space_cachep, info);
 	}
 }
-- 
2.17.1


  parent reply index

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-23 22:52 [PATCH v2 00/22] btrfs: async discard support Dennis Zhou
2019-10-23 22:52 ` [PATCH 01/22] bitmap: genericize percpu bitmap region iterators Dennis Zhou
2019-10-23 22:52 ` [PATCH 02/22] btrfs: rename DISCARD opt to DISCARD_SYNC Dennis Zhou
2019-10-23 22:52 ` Dennis Zhou [this message]
2019-11-08 19:05   ` [PATCH 03/22] btrfs: keep track of which extents have been discarded Josef Bacik
2019-11-11 17:20     ` Dennis Zhou
2019-10-23 22:52 ` [PATCH 04/22] btrfs: keep track of cleanliness of the bitmap Dennis Zhou
2019-11-08 19:12   ` Josef Bacik
2019-10-23 22:52 ` [PATCH 05/22] btrfs: add the beginning of async discard, discard workqueue Dennis Zhou
2019-11-11 18:49   ` David Sterba
2019-10-23 22:53 ` [PATCH 06/22] btrfs: handle empty block_group removal Dennis Zhou
2019-11-08 19:19   ` Josef Bacik
2019-10-23 22:53 ` [PATCH 07/22] btrfs: discard one region at a time in async discard Dennis Zhou
2019-11-08 19:27   ` Josef Bacik
2019-10-23 22:53 ` [PATCH 08/22] btrfs: add removal calls for sysfs debug/ Dennis Zhou
2019-11-08 19:28   ` Josef Bacik
2019-10-23 22:53 ` [PATCH 09/22] btrfs: make UUID/debug have its own kobject Dennis Zhou
2019-11-08 19:29   ` Josef Bacik
2019-10-23 22:53 ` [PATCH 10/22] btrfs: add discard sysfs directory Dennis Zhou
2019-11-08 19:29   ` Josef Bacik
2019-10-23 22:53 ` [PATCH 11/22] btrfs: track discardable extents for async discard Dennis Zhou
2019-10-23 22:53 ` [PATCH 12/22] btrfs: keep track of discardable_bytes Dennis Zhou
2019-11-08 19:31   ` Josef Bacik
2019-10-23 22:53 ` [PATCH 13/22] btrfs: calculate discard delay based on number of extents Dennis Zhou
2019-10-23 22:53 ` [PATCH 14/22] btrfs: add bps discard rate limit Dennis Zhou
2019-11-08 19:41   ` Josef Bacik
2019-11-15 17:28     ` David Sterba
2019-10-23 22:53 ` [PATCH 15/22] btrfs: limit max discard size for async discard Dennis Zhou
2019-10-23 22:53 ` [PATCH 16/22] btrfs: make max async discard size tunable Dennis Zhou
2019-10-23 22:53 ` [PATCH 17/22] btrfs: have multiple discard lists Dennis Zhou
2019-11-08 19:44   ` Josef Bacik
2019-10-23 22:53 ` [PATCH 18/22] btrfs: only keep track of data extents for async discard Dennis Zhou
2019-11-08 19:46   ` Josef Bacik
2019-11-08 20:14     ` Dennis Zhou
2019-11-15 17:31       ` David Sterba
2019-10-23 22:53 ` [PATCH 19/22] btrfs: keep track of discard reuse stats Dennis Zhou
2019-10-23 22:53 ` [PATCH 20/22] btrfs: add async discard header Dennis Zhou
2019-10-23 22:53 ` [PATCH 21/22] btrfs: increase the metadata allowance for the free_space_cache Dennis Zhou
2019-10-23 22:53 ` [PATCH 22/22] btrfs: make smaller extents more likely to go into bitmaps Dennis Zhou
2019-11-15 17:40 ` [PATCH v2 00/22] btrfs: async discard support David Sterba
2019-11-20 21:50 [PATCH v3 " Dennis Zhou
2019-11-20 21:51 ` [PATCH 03/22] btrfs: keep track of which extents have been discarded Dennis Zhou
2019-11-25 19:46 [PATCH v4 00/22] btrfs: async discard support Dennis Zhou
2019-11-25 19:46 ` [PATCH 03/22] btrfs: keep track of which extents have been discarded Dennis Zhou

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=63a78fbbd4d742ab13484d0cdad2264173ca7411.1571865774.git.dennis@kernel.org \
    --to=dennis@kernel.org \
    --cc=clm@fb.com \
    --cc=dsterba@suse.com \
    --cc=josef@toxicpanda.com \
    --cc=kernel-team@fb.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=osandov@osandov.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-BTRFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-btrfs/0 linux-btrfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-btrfs linux-btrfs/ https://lore.kernel.org/linux-btrfs \
		linux-btrfs@vger.kernel.org
	public-inbox-index linux-btrfs

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-btrfs


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git