All of lore.kernel.org
 help / color / mirror / Atom feed
From: Naohiro Aota <naohiro.aota@wdc.com>
To: linux-btrfs@vger.kernel.org, dsterba@suse.com
Cc: hare@suse.com, linux-fsdevel@vger.kernel.org,
	Jens Axboe <axboe@kernel.dk>,
	Christoph Hellwig <hch@infradead.org>,
	"Darrick J. Wong" <darrick.wong@oracle.com>,
	Naohiro Aota <naohiro.aota@wdc.com>
Subject: [PATCH v10 31/41] btrfs: mark block groups to copy for device-replace
Date: Tue, 10 Nov 2020 20:26:34 +0900	[thread overview]
Message-ID: <b57f5fd21c034e4157295a930913ae2b7db64c5d.1605007037.git.naohiro.aota@wdc.com> (raw)
In-Reply-To: <cover.1605007036.git.naohiro.aota@wdc.com>

This is the 1/4 patch to support device-replace in ZONED mode.

We have two types of I/Os during the device-replace process. One is an I/O
to "copy" (by the scrub functions) all the device extents on the source
device to the destination device.  The other one is an I/O to "clone" (by
handle_ops_on_dev_replace()) new incoming write I/Os from users to the
source device into the target device.

Cloning incoming I/Os can break the sequential write rule in the target
device. When writing is mapped in the middle of a block group, the I/O is
directed in the middle of a target device zone, which breaks the sequential
write rule.

However, the cloning function cannot be merely disabled since incoming I/Os
targeting already copied device extents must be cloned so that the I/O is
executed on the target device.

We cannot use dev_replace->cursor_{left,right} to determine whether bio is
going to not yet copied region.  Since we have a time gap between finishing
btrfs_scrub_dev() and rewriting the mapping tree in
btrfs_dev_replace_finishing(), we can have a newly allocated device extent
which is never cloned nor copied.

So the point is to copy only already existing device extents. This patch
introduces mark_block_group_to_copy() to mark existing block groups as a
target of copying. Then, handle_ops_on_dev_replace() and dev-replace can
check the flag to do their job.

Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
---
 fs/btrfs/block-group.h |   1 +
 fs/btrfs/dev-replace.c | 183 +++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/dev-replace.h |   3 +
 fs/btrfs/scrub.c       |  17 ++++
 4 files changed, 204 insertions(+)

diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 44f68e12f863..ccbcf37eae9c 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -95,6 +95,7 @@ struct btrfs_block_group {
 	unsigned int iref:1;
 	unsigned int has_caching_ctl:1;
 	unsigned int removed:1;
+	unsigned int to_copy:1;
 
 	int disk_cache_state;
 
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index db87f1aa604b..95e75fc8e266 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -22,6 +22,7 @@
 #include "dev-replace.h"
 #include "sysfs.h"
 #include "zoned.h"
+#include "block-group.h"
 
 /*
  * Device replace overview
@@ -437,6 +438,184 @@ static char* btrfs_dev_name(struct btrfs_device *device)
 		return rcu_str_deref(device->name);
 }
 
+static int mark_block_group_to_copy(struct btrfs_fs_info *fs_info,
+				    struct btrfs_device *src_dev)
+{
+	struct btrfs_path *path;
+	struct btrfs_key key;
+	struct btrfs_key found_key;
+	struct btrfs_root *root = fs_info->dev_root;
+	struct btrfs_dev_extent *dev_extent = NULL;
+	struct btrfs_block_group *cache;
+	struct btrfs_trans_handle *trans;
+	int ret = 0;
+	u64 chunk_offset, length;
+
+	/* Do not use "to_copy" on non-ZONED for now */
+	if (!btrfs_is_zoned(fs_info))
+		return 0;
+
+	mutex_lock(&fs_info->chunk_mutex);
+
+	/* Ensure we don't have pending new block group */
+	spin_lock(&fs_info->trans_lock);
+	while (fs_info->running_transaction &&
+	       !list_empty(&fs_info->running_transaction->dev_update_list)) {
+		spin_unlock(&fs_info->trans_lock);
+		mutex_unlock(&fs_info->chunk_mutex);
+		trans = btrfs_attach_transaction(root);
+		if (IS_ERR(trans)) {
+			ret = PTR_ERR(trans);
+			mutex_lock(&fs_info->chunk_mutex);
+			if (ret == -ENOENT)
+				continue;
+			else
+				goto unlock;
+		}
+
+		ret = btrfs_commit_transaction(trans);
+		mutex_lock(&fs_info->chunk_mutex);
+		if (ret)
+			goto unlock;
+
+		spin_lock(&fs_info->trans_lock);
+	}
+	spin_unlock(&fs_info->trans_lock);
+
+	path = btrfs_alloc_path();
+	if (!path) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	path->reada = READA_FORWARD;
+	path->search_commit_root = 1;
+	path->skip_locking = 1;
+
+	key.objectid = src_dev->devid;
+	key.offset = 0;
+	key.type = BTRFS_DEV_EXTENT_KEY;
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0)
+		goto free_path;
+	if (ret > 0) {
+		if (path->slots[0] >=
+		    btrfs_header_nritems(path->nodes[0])) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret < 0)
+				goto free_path;
+			if (ret > 0) {
+				ret = 0;
+				goto free_path;
+			}
+		} else {
+			ret = 0;
+		}
+	}
+
+	while (1) {
+		struct extent_buffer *l = path->nodes[0];
+		int slot = path->slots[0];
+
+		btrfs_item_key_to_cpu(l, &found_key, slot);
+
+		if (found_key.objectid != src_dev->devid)
+			break;
+
+		if (found_key.type != BTRFS_DEV_EXTENT_KEY)
+			break;
+
+		if (found_key.offset < key.offset)
+			break;
+
+		dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
+		length = btrfs_dev_extent_length(l, dev_extent);
+
+		chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
+
+		cache = btrfs_lookup_block_group(fs_info, chunk_offset);
+		if (!cache)
+			goto skip;
+
+		spin_lock(&cache->lock);
+		cache->to_copy = 1;
+		spin_unlock(&cache->lock);
+
+		btrfs_put_block_group(cache);
+
+skip:
+		ret = btrfs_next_item(root, path);
+		if (ret != 0) {
+			if (ret > 0)
+				ret = 0;
+			break;
+		}
+	}
+
+free_path:
+	btrfs_free_path(path);
+unlock:
+	mutex_unlock(&fs_info->chunk_mutex);
+
+	return ret;
+}
+
+bool btrfs_finish_block_group_to_copy(struct btrfs_device *srcdev,
+				      struct btrfs_block_group *cache,
+				      u64 physical)
+{
+	struct btrfs_fs_info *fs_info = cache->fs_info;
+	struct extent_map *em;
+	struct map_lookup *map;
+	u64 chunk_offset = cache->start;
+	int num_extents, cur_extent;
+	int i;
+
+	/* Do not use "to_copy" on non-ZONED for now */
+	if (!btrfs_is_zoned(fs_info))
+		return true;
+
+	spin_lock(&cache->lock);
+	if (cache->removed) {
+		spin_unlock(&cache->lock);
+		return true;
+	}
+	spin_unlock(&cache->lock);
+
+	em = btrfs_get_chunk_map(fs_info, chunk_offset, 1);
+	ASSERT(!IS_ERR(em));
+	map = em->map_lookup;
+
+	num_extents = cur_extent = 0;
+	for (i = 0; i < map->num_stripes; i++) {
+		/* We have more device extent to copy */
+		if (srcdev != map->stripes[i].dev)
+			continue;
+
+		num_extents++;
+		if (physical == map->stripes[i].physical)
+			cur_extent = i;
+	}
+
+	free_extent_map(em);
+
+	if (num_extents > 1 && cur_extent < num_extents - 1) {
+		/*
+		 * Has more stripes on this device. Keep this BG
+		 * readonly until we finish all the stripes.
+		 */
+		return false;
+	}
+
+	/* Last stripe on this device */
+	spin_lock(&cache->lock);
+	cache->to_copy = 0;
+	spin_unlock(&cache->lock);
+
+	return true;
+}
+
 static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
 		const char *tgtdev_name, u64 srcdevid, const char *srcdev_name,
 		int read_src)
@@ -478,6 +657,10 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
 	if (ret)
 		return ret;
 
+	ret = mark_block_group_to_copy(fs_info, src_device);
+	if (ret)
+		return ret;
+
 	down_write(&dev_replace->rwsem);
 	switch (dev_replace->replace_state) {
 	case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
diff --git a/fs/btrfs/dev-replace.h b/fs/btrfs/dev-replace.h
index 60b70dacc299..3911049a5f23 100644
--- a/fs/btrfs/dev-replace.h
+++ b/fs/btrfs/dev-replace.h
@@ -18,5 +18,8 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info);
 void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info);
 int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info);
 int __pure btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace);
+bool btrfs_finish_block_group_to_copy(struct btrfs_device *srcdev,
+				      struct btrfs_block_group *cache,
+				      u64 physical);
 
 #endif
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index aa1b36cf5c88..d0d7db3c8b0b 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3500,6 +3500,17 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
 		if (!cache)
 			goto skip;
 
+
+		if (sctx->is_dev_replace && btrfs_fs_incompat(fs_info, ZONED)) {
+			spin_lock(&cache->lock);
+			if (!cache->to_copy) {
+				spin_unlock(&cache->lock);
+				ro_set = 0;
+				goto done;
+			}
+			spin_unlock(&cache->lock);
+		}
+
 		/*
 		 * Make sure that while we are scrubbing the corresponding block
 		 * group doesn't get its logical address and its device extents
@@ -3631,6 +3642,12 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
 
 		scrub_pause_off(fs_info);
 
+		if (sctx->is_dev_replace &&
+		    !btrfs_finish_block_group_to_copy(dev_replace->srcdev,
+						      cache, found_key.offset))
+			ro_set = 0;
+
+done:
 		down_write(&dev_replace->rwsem);
 		dev_replace->cursor_left = dev_replace->cursor_right;
 		dev_replace->item_needs_writeback = 1;
-- 
2.27.0


  parent reply	other threads:[~2020-11-10 11:29 UTC|newest]

Thread overview: 125+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-10 11:26 [PATCH v10 00/41] btrfs: zoned block device support Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 01/41] block: add bio_add_zone_append_page Naohiro Aota
2020-11-10 17:20   ` Christoph Hellwig
2020-11-11  7:20     ` Johannes Thumshirn
2020-11-10 11:26 ` [PATCH v10 02/41] iomap: support REQ_OP_ZONE_APPEND Naohiro Aota
2020-11-10 17:25   ` Christoph Hellwig
2020-11-10 18:55   ` Darrick J. Wong
2020-11-10 19:01     ` Darrick J. Wong
2020-11-24 11:29     ` Christoph Hellwig
2020-11-30 18:11   ` Darrick J. Wong
2020-12-01 10:16     ` Johannes Thumshirn
2020-12-09  9:31   ` Christoph Hellwig
2020-12-09 10:08     ` Johannes Thumshirn
2020-12-09 10:10       ` hch
2020-12-09 10:16         ` Johannes Thumshirn
2020-12-09 13:38           ` Johannes Thumshirn
2020-12-11  7:26             ` Johannes Thumshirn
2020-12-11 21:24               ` Chaitanya Kulkarni
2020-12-12 10:22                 ` Johannes Thumshirn
2020-11-10 11:26 ` [PATCH v10 03/41] btrfs: introduce ZONED feature flag Naohiro Aota
2020-11-19 21:31   ` David Sterba
2020-11-10 11:26 ` [PATCH v10 04/41] btrfs: get zone information of zoned block devices Naohiro Aota
2020-11-12  6:57   ` Anand Jain
2020-11-12  7:35     ` Johannes Thumshirn
2020-11-12  7:44       ` Damien Le Moal
2020-11-12  9:44         ` Anand Jain
2020-11-13 21:34           ` David Sterba
2020-11-12  9:39     ` Johannes Thumshirn
2020-11-12 12:57     ` Naohiro Aota
2020-11-18 11:17       ` Anand Jain
2020-11-30 11:16         ` Anand Jain
2020-11-25 21:47   ` David Sterba
2020-11-25 22:07     ` David Sterba
2020-11-25 23:50     ` Damien Le Moal
2020-11-26 14:11       ` David Sterba
2020-11-25 22:16   ` David Sterba
2020-11-10 11:26 ` [PATCH v10 05/41] btrfs: check and enable ZONED mode Naohiro Aota
2020-11-18 11:29   ` Anand Jain
2020-11-27 18:44     ` David Sterba
2020-11-30 12:12       ` Anand Jain
2020-11-30 13:15         ` Damien Le Moal
2020-12-01  2:19           ` Anand Jain
2020-12-01  2:29             ` Damien Le Moal
2020-12-01  5:53               ` Anand Jain
2020-12-01  6:09                 ` Damien Le Moal
2020-12-01  7:12                   ` Anand Jain
2020-12-01 10:45               ` Graham Cobb
2020-12-01 11:03                 ` Damien Le Moal
2020-12-01 11:11                   ` hch
2020-12-01 11:27                     ` Damien Le Moal
2020-11-10 11:26 ` [PATCH v10 06/41] btrfs: introduce max_zone_append_size Naohiro Aota
2020-11-19  9:23   ` Anand Jain
2020-11-27 18:47     ` David Sterba
2020-11-10 11:26 ` [PATCH v10 07/41] btrfs: disallow space_cache in ZONED mode Naohiro Aota
2020-11-19 10:42   ` Anand Jain
2020-11-20  4:08     ` Anand Jain
2020-11-10 11:26 ` [PATCH v10 08/41] btrfs: disallow NODATACOW " Naohiro Aota
2020-11-20  4:17   ` Anand Jain
2020-11-23 17:21     ` David Sterba
2020-11-24  3:29       ` Anand Jain
2020-11-10 11:26 ` [PATCH v10 09/41] btrfs: disable fallocate " Naohiro Aota
2020-11-20  4:28   ` Anand Jain
2020-11-10 11:26 ` [PATCH v10 10/41] btrfs: disallow mixed-bg " Naohiro Aota
2020-11-20  4:32   ` Anand Jain
2020-11-10 11:26 ` [PATCH v10 11/41] btrfs: implement log-structured superblock for " Naohiro Aota
2020-11-11  1:34   ` kernel test robot
2020-11-11  2:43   ` kernel test robot
2020-11-23 17:46   ` David Sterba
2020-11-24  9:30     ` Johannes Thumshirn
2020-11-24  6:46   ` Anand Jain
2020-11-24  7:16     ` Hannes Reinecke
2020-11-10 11:26 ` [PATCH v10 12/41] btrfs: implement zoned chunk allocator Naohiro Aota
2020-11-24 11:36   ` Anand Jain
2020-11-25  1:57     ` Naohiro Aota
2020-11-25  7:17       ` Anand Jain
2020-11-25 11:48         ` Naohiro Aota
2020-11-25  9:59       ` Graham Cobb
2020-11-25 11:50         ` Naohiro Aota
2020-12-09  5:27   ` Anand Jain
2020-11-10 11:26 ` [PATCH v10 13/41] btrfs: verify device extent is aligned to zone Naohiro Aota
2020-11-27  6:27   ` Anand Jain
2020-11-10 11:26 ` [PATCH v10 14/41] btrfs: load zone's alloction offset Naohiro Aota
2020-12-08  9:54   ` Anand Jain
2020-11-10 11:26 ` [PATCH v10 15/41] btrfs: emulate write pointer for conventional zones Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 16/41] btrfs: track unusable bytes for zones Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 17/41] btrfs: do sequential extent allocation in ZONED mode Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 18/41] btrfs: reset zones of unused block groups Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 19/41] btrfs: redirty released extent buffers in ZONED mode Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 20/41] btrfs: extract page adding function Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 21/41] btrfs: use bio_add_zone_append_page for zoned btrfs Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 22/41] btrfs: handle REQ_OP_ZONE_APPEND as writing Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 23/41] btrfs: split ordered extent when bio is sent Naohiro Aota
2020-11-11  2:01   ` kernel test robot
2020-11-11  2:01     ` kernel test robot
2020-11-11  2:26   ` kernel test robot
2020-11-11  2:26     ` kernel test robot
2020-11-11  3:46   ` kernel test robot
2020-11-11  3:46     ` kernel test robot
2020-11-11  3:46   ` [RFC PATCH] btrfs: extract_ordered_extent() can be static kernel test robot
2020-11-11  3:46     ` kernel test robot
2020-11-11  4:12   ` [PATCH v10.1 23/41] btrfs: split ordered extent when bio is sent Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 24/41] btrfs: extend btrfs_rmap_block for specifying a device Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 25/41] btrfs: use ZONE_APPEND write for ZONED btrfs Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 26/41] btrfs: enable zone append writing for direct IO Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 27/41] btrfs: introduce dedicated data write path for ZONED mode Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 28/41] btrfs: serialize meta IOs on " Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 29/41] btrfs: wait existing extents before truncating Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 30/41] btrfs: avoid async metadata checksum on ZONED mode Naohiro Aota
2020-11-10 11:26 ` Naohiro Aota [this message]
2020-11-11  3:13   ` [PATCH v10 31/41] btrfs: mark block groups to copy for device-replace kernel test robot
2020-11-11  3:16   ` kernel test robot
2020-11-10 11:26 ` [PATCH v10 32/41] btrfs: implement cloning for ZONED device-replace Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 33/41] btrfs: implement copying " Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 34/41] btrfs: support dev-replace in ZONED mode Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 35/41] btrfs: enable relocation " Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 36/41] btrfs: relocate block group to repair IO failure in ZONED Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 37/41] btrfs: split alloc_log_tree() Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 38/41] btrfs: extend zoned allocator to use dedicated tree-log block group Naohiro Aota
2020-11-11  4:58   ` [PATCH v10.1 " Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 39/41] btrfs: serialize log transaction on ZONED mode Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 40/41] btrfs: reorder log node allocation Naohiro Aota
2020-11-10 11:26 ` [PATCH v10 41/41] btrfs: enable to mount ZONED incompat flag Naohiro Aota
2020-11-10 14:00 ` [PATCH v10 00/41] btrfs: zoned block device support Anand Jain
2020-11-11  5:07   ` Naohiro Aota
2020-11-27 19:28 ` David Sterba

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b57f5fd21c034e4157295a930913ae2b7db64c5d.1605007037.git.naohiro.aota@wdc.com \
    --to=naohiro.aota@wdc.com \
    --cc=axboe@kernel.dk \
    --cc=darrick.wong@oracle.com \
    --cc=dsterba@suse.com \
    --cc=hare@suse.com \
    --cc=hch@infradead.org \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.