All of lore.kernel.org
 help / color / mirror / Atom feed
From: Naohiro Aota <naohiro.aota@wdc.com>
To: linux-btrfs@vger.kernel.org, David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>, Josef Bacik <josef@toxicpanda.com>,
	Hannes Reinecke <hare@suse.com>,
	linux-fsdevel@vger.kernel.org,
	Naohiro Aota <naohiro.aota@wdc.com>
Subject: [PATCH v7 36/39] btrfs: extend zoned allocator to use dedicated tree-log block group
Date: Fri, 11 Sep 2020 21:32:56 +0900	[thread overview]
Message-ID: <20200911123259.3782926-37-naohiro.aota@wdc.com> (raw)
In-Reply-To: <20200911123259.3782926-1-naohiro.aota@wdc.com>

This is the 1/3 patch to enable tree log on ZONED mode.

The tree-log feature does not work on ZONED mode as is. Blocks for a
tree-log tree are allocated mixed with other metadata blocks, and btrfs
writes and syncs the tree-log blocks to devices at the time of fsync(),
which is different timing from a global transaction commit. As a result,
both writing tree-log blocks and writing other metadata blocks become
non-sequential writes that ZONED mode must avoid.

We can introduce a dedicated block group for tree-log blocks so that
tree-log blocks and other metadata blocks can be separated write streams.
As a result, each write stream can now be written to devices separately.
"fs_info->treelog_bg" tracks the dedicated block group and btrfs assign
"treelog_bg" on-demand on tree-log block allocation time.

This commit extends the zoned block allocator to use the block group.

Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
---
 fs/btrfs/block-group.c |  7 +++++
 fs/btrfs/ctree.h       |  2 ++
 fs/btrfs/extent-tree.c | 68 +++++++++++++++++++++++++++++++++++++-----
 3 files changed, 70 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index be5394c8ec3a..d30eba3c484a 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -939,6 +939,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 	btrfs_return_cluster_to_free_space(block_group, cluster);
 	spin_unlock(&cluster->refill_lock);
 
+	if (btrfs_fs_incompat(fs_info, ZONED)) {
+		spin_lock(&fs_info->treelog_bg_lock);
+		if (fs_info->treelog_bg == block_group->start)
+			fs_info->treelog_bg = 0;
+		spin_unlock(&fs_info->treelog_bg_lock);
+	}
+
 	path = btrfs_alloc_path();
 	if (!path) {
 		ret = -ENOMEM;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e08fe341cd81..6e05eb180a77 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -942,6 +942,8 @@ struct btrfs_fs_info {
 	int send_in_progress;
 
 	struct mutex zoned_meta_io_lock;
+	spinlock_t treelog_bg_lock;
+	u64 treelog_bg;
 
 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
 	spinlock_t ref_verify_lock;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 79ac8fcc5c35..9e576977f416 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3506,6 +3506,9 @@ struct find_free_extent_ctl {
 
 	/* Allocation policy */
 	enum btrfs_extent_allocation_policy policy;
+
+	/* Allocation is called for tree-log */
+	bool for_treelog;
 };
 
 
@@ -3706,23 +3709,54 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
 			       struct find_free_extent_ctl *ffe_ctl,
 			       struct btrfs_block_group **bg_ret)
 {
+	struct btrfs_fs_info *fs_info = block_group->fs_info;
 	struct btrfs_space_info *space_info = block_group->space_info;
 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
 	u64 start = block_group->start;
 	u64 num_bytes = ffe_ctl->num_bytes;
 	u64 avail;
+	u64 bytenr = block_group->start;
+	u64 log_bytenr;
 	int ret = 0;
+	bool skip;
 
 	ASSERT(btrfs_fs_incompat(block_group->fs_info, ZONED));
 
+	/*
+	 * Do not allow non-tree-log blocks in the dedicated tree-log block
+	 * group, and vice versa.
+	 */
+	spin_lock(&fs_info->treelog_bg_lock);
+	log_bytenr = fs_info->treelog_bg;
+	skip = log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) ||
+			      (!ffe_ctl->for_treelog && bytenr == log_bytenr));
+	spin_unlock(&fs_info->treelog_bg_lock);
+	if (skip)
+		return 1;
+
 	spin_lock(&space_info->lock);
 	spin_lock(&block_group->lock);
+	spin_lock(&fs_info->treelog_bg_lock);
+
+	ASSERT(!ffe_ctl->for_treelog ||
+	       block_group->start == fs_info->treelog_bg ||
+	       fs_info->treelog_bg == 0);
 
 	if (block_group->ro) {
 		ret = 1;
 		goto out;
 	}
 
+	/*
+	 * Do not allow currently using block group to be tree-log dedicated
+	 * block group.
+	 */
+	if (ffe_ctl->for_treelog && !fs_info->treelog_bg &&
+	    (block_group->used || block_group->reserved)) {
+		ret = 1;
+		goto out;
+	}
+
 	avail = block_group->length - block_group->alloc_offset;
 	if (avail < num_bytes) {
 		ffe_ctl->max_extent_size = avail;
@@ -3730,6 +3764,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
 		goto out;
 	}
 
+	if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
+		fs_info->treelog_bg = block_group->start;
+
 	ffe_ctl->found_offset = start + block_group->alloc_offset;
 	block_group->alloc_offset += num_bytes;
 	spin_lock(&ctl->tree_lock);
@@ -3737,10 +3774,13 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
 	spin_unlock(&ctl->tree_lock);
 
 	ASSERT(IS_ALIGNED(ffe_ctl->found_offset,
-			  block_group->fs_info->stripesize));
+			  fs_info->stripesize));
 	ffe_ctl->search_start = ffe_ctl->found_offset;
 
 out:
+	if (ret && ffe_ctl->for_treelog)
+		fs_info->treelog_bg = 0;
+	spin_unlock(&fs_info->treelog_bg_lock);
 	spin_unlock(&block_group->lock);
 	spin_unlock(&space_info->lock);
 	return ret;
@@ -3990,7 +4030,12 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info,
 		return prepare_allocation_clustered(fs_info, ffe_ctl,
 						    space_info, ins);
 	case BTRFS_EXTENT_ALLOC_ZONED:
-		/* nothing to do */
+		if (ffe_ctl->for_treelog) {
+			spin_lock(&fs_info->treelog_bg_lock);
+			if (fs_info->treelog_bg)
+				ffe_ctl->hint_byte = fs_info->treelog_bg;
+			spin_unlock(&fs_info->treelog_bg_lock);
+		}
 		return 0;
 	default:
 		BUG();
@@ -4025,7 +4070,7 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info,
 static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
 				u64 ram_bytes, u64 num_bytes, u64 empty_size,
 				u64 hint_byte_orig, struct btrfs_key *ins,
-				u64 flags, int delalloc)
+				u64 flags, int delalloc, bool for_treelog)
 {
 	int ret = 0;
 	int cache_block_group_error = 0;
@@ -4046,6 +4091,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
 	ffe_ctl.orig_have_caching_bg = false;
 	ffe_ctl.found_offset = 0;
 	ffe_ctl.hint_byte = hint_byte_orig;
+	ffe_ctl.for_treelog = for_treelog;
 	ffe_ctl.policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
 
 	/* For clustered allocation */
@@ -4120,8 +4166,15 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
 		struct btrfs_block_group *bg_ret;
 
 		/* If the block group is read-only, we can skip it entirely. */
-		if (unlikely(block_group->ro))
+		if (unlikely(block_group->ro)) {
+			if (btrfs_fs_incompat(fs_info, ZONED) && for_treelog) {
+				spin_lock(&fs_info->treelog_bg_lock);
+				if (block_group->start == fs_info->treelog_bg)
+					fs_info->treelog_bg = 0;
+				spin_unlock(&fs_info->treelog_bg_lock);
+			}
 			continue;
+		}
 
 		btrfs_grab_block_group(block_group, delalloc);
 		ffe_ctl.search_start = block_group->start;
@@ -4309,12 +4362,13 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
 	bool final_tried = num_bytes == min_alloc_size;
 	u64 flags;
 	int ret;
+	bool for_treelog = root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID;
 
 	flags = get_alloc_profile_by_root(root, is_data);
 again:
 	WARN_ON(num_bytes < fs_info->sectorsize);
 	ret = find_free_extent(fs_info, ram_bytes, num_bytes, empty_size,
-			       hint_byte, ins, flags, delalloc);
+			       hint_byte, ins, flags, delalloc, for_treelog);
 	if (!ret && !is_data) {
 		btrfs_dec_block_group_reservations(fs_info, ins->objectid);
 	} else if (ret == -ENOSPC) {
@@ -4332,8 +4386,8 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
 
 			sinfo = btrfs_find_space_info(fs_info, flags);
 			btrfs_err(fs_info,
-				  "allocation failed flags %llu, wanted %llu",
-				  flags, num_bytes);
+			"allocation failed flags %llu, wanted %llu treelog %d",
+				  flags, num_bytes, for_treelog);
 			if (sinfo)
 				btrfs_dump_space_info(fs_info, sinfo,
 						      num_bytes, 1);
-- 
2.27.0


  parent reply	other threads:[~2020-09-11 17:41 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-11 12:32 [PATCH v7 00/39] btrfs: zoned block device support Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 01/39] btrfs: introduce ZONED feature flag Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 02/39] btrfs: Get zone information of zoned block devices Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 03/39] btrfs: Check and enable ZONED mode Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 04/39] btrfs: introduce max_zone_append_size Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 05/39] btrfs: disallow space_cache in ZONED mode Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 06/39] btrfs: disallow NODATACOW " Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 07/39] btrfs: disable fallocate " Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 08/39] btrfs: disallow mixed-bg " Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 09/39] btrfs: disallow inode_cache " Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 10/39] btrfs: implement log-structured superblock for " Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 11/39] btrfs: implement zoned chunk allocator Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 12/39] btrfs: verify device extent is aligned to zone Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 13/39] btrfs: load zone's alloction offset Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 14/39] btrfs: emulate write pointer for conventional zones Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 15/39] btrfs: track unusable bytes for zones Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 16/39] btrfs: do sequential extent allocation in ZONED mode Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 17/39] btrfs: reset zones of unused block groups Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 18/39] btrfs: redirty released extent buffers in ZONED mode Naohiro Aota
2020-09-14 11:48   ` Johannes Thumshirn
2020-09-11 12:32 ` [PATCH v7 19/39] btrfs: limit bio size under max_zone_append_size Naohiro Aota
2020-09-11 14:17   ` Christoph Hellwig
2020-09-12  4:14     ` Naohiro Aota
2020-09-12  5:30       ` Christoph Hellwig
2020-09-17  5:32         ` Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 20/39] btrfs: limit ordered extent size to max_zone_append_size Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 21/39] btrfs: extend btrfs_rmap_block for specifying a device Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 22/39] btrfs: use ZONE_APPEND write for ZONED btrfs Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 23/39] btrfs: handle REQ_OP_ZONE_APPEND as writing Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 24/39] btrfs: enable zone append writing for direct IO Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 25/39] btrfs: introduce dedicated data write path for ZONED mode Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 26/39] btrfs: serialize meta IOs on " Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 27/39] btrfs: wait existing extents before truncating Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 28/39] btrfs: avoid async metadata checksum on ZONED mode Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 29/39] btrfs: mark block groups to copy for device-replace Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 30/39] btrfs: implement cloning for ZONED device-replace Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 31/39] btrfs: implement copying " Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 32/39] btrfs: support dev-replace in ZONED mode Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 33/39] btrfs: enable relocation " Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 34/39] btrfs: relocate block group to repair IO failure in ZONED Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 35/39] btrfs: split alloc_log_tree() Naohiro Aota
2020-09-11 12:32 ` Naohiro Aota [this message]
2020-09-11 12:32 ` [PATCH v7 37/39] btrfs: serialize log transaction on ZONED mode Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 38/39] btrfs: reorder log node allocation Naohiro Aota
2020-09-11 12:32 ` [PATCH v7 39/39] btrfs: enable to mount ZONED incompat flag Naohiro Aota
2020-09-15  8:09 ` [PATCH v7 00/39] btrfs: zoned block device support David Sterba
2020-09-16 17:42   ` Johannes Thumshirn
2020-09-16 19:46     ` David Sterba
2020-09-16 19:50       ` Johannes Thumshirn
2020-09-17  5:40     ` Naohiro Aota
2020-09-17  7:14       ` Johannes Thumshirn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200911123259.3782926-37-naohiro.aota@wdc.com \
    --to=naohiro.aota@wdc.com \
    --cc=clm@fb.com \
    --cc=dsterba@suse.com \
    --cc=hare@suse.com \
    --cc=josef@toxicpanda.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.