All of lore.kernel.org
 help / color / mirror / Atom feed
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 05/12] btrfs: write-intent: write the newly created bitmaps to all disks
Date: Thu,  7 Jul 2022 13:32:30 +0800	[thread overview]
Message-ID: <f311ec8bb10f38f181959bdd926aab4835dbd3c7.1657171615.git.wqu@suse.com> (raw)
In-Reply-To: <cover.1657171615.git.wqu@suse.com>

This write back will happen even for RO mounts.

This will ensure we always have write-intent bitmaps for fses with that
compat RO flags set.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/write-intent.c | 154 ++++++++++++++++++++++++++++++++++++++--
 fs/btrfs/write-intent.h |   6 ++
 2 files changed, 155 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/write-intent.c b/fs/btrfs/write-intent.c
index a7ed21182525..d1c5e8e206ba 100644
--- a/fs/btrfs/write-intent.c
+++ b/fs/btrfs/write-intent.c
@@ -1,8 +1,140 @@
 // SPDX-License-Identifier: GPL-2.0
 
+#include <crypto/hash.h>
+#include <linux/bio.h>
 #include "ctree.h"
 #include "volumes.h"
 #include "write-intent.h"
+#include "rcu-string.h"
+
+struct bitmap_writeback_contrl {
+	atomic_t pending_bios;
+	atomic_t errors;
+	wait_queue_head_t wait;
+};
+
+static void write_intent_end_write(struct bio *bio)
+{
+	struct bitmap_writeback_contrl *wb_ctrl = bio->bi_private;
+
+	if (bio->bi_status)
+		atomic_inc(&wb_ctrl->errors);
+	atomic_dec(&wb_ctrl->pending_bios);
+	wake_up(&wb_ctrl->wait);
+
+	bio_put(bio);
+}
+
+static int submit_one_device(struct btrfs_device *dev,
+			     struct bitmap_writeback_contrl *wb_ctrl)
+{
+	struct btrfs_fs_info *fs_info = dev->fs_info;
+	struct write_intent_ctrl *ctrl = fs_info->wi_ctrl;
+	struct bio *bio;
+
+	if (!dev->bdev)
+		return -EIO;
+
+	if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
+	    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
+		return 0;
+
+	atomic_inc(&wb_ctrl->pending_bios);
+	bio = bio_alloc(dev->bdev, 1, REQ_OP_WRITE | REQ_SYNC |
+			REQ_META | REQ_PRIO | REQ_FUA, GFP_NOFS);
+	bio->bi_iter.bi_sector = BTRFS_DEVICE_RANGE_RESERVED >> SECTOR_SHIFT;
+	bio->bi_private = wb_ctrl;
+	bio->bi_end_io = write_intent_end_write;
+	__bio_add_page(bio, ctrl->commit_page, WRITE_INTENT_BITMAPS_SIZE,
+			offset_in_page(BTRFS_DEVICE_RANGE_RESERVED));
+	submit_bio(bio);
+	return 0;
+}
+
+/* Write back the bitmaps page to all devices. */
+static int write_intent_writeback(struct btrfs_fs_info *fs_info)
+{
+	struct write_intent_ctrl *ctrl = fs_info->wi_ctrl;
+	struct write_intent_super *wis;
+	struct btrfs_device *dev;
+	struct btrfs_device **found_devs;
+	struct bitmap_writeback_contrl wb_ctrl = {0};
+	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
+	const int nr_devs_max = fs_info->fs_devices->open_devices + 4;
+	int nr_devs = 0;
+	int total_errors = 0;
+	int ret;
+	int i;
+
+	ASSERT(ctrl);
+
+	found_devs = kcalloc(nr_devs_max, sizeof(struct btrfs_device *),
+			     GFP_NOFS);
+	if (!found_devs)
+		return -ENOMEM;
+
+	shash->tfm = fs_info->csum_shash;
+
+	spin_lock(&ctrl->lock);
+	wis = page_address(ctrl->page);
+
+	/*
+	 * Bump up the event counter each time this bitmap is going to be
+	 * written.
+	 */
+	wi_set_super_events(wis, wi_super_events(wis) + 1);
+	crypto_shash_digest(shash, (unsigned char *)wis + BTRFS_CSUM_SIZE,
+			    WRITE_INTENT_BITMAPS_SIZE - BTRFS_CSUM_SIZE,
+			    wis->csum);
+	atomic64_inc(&ctrl->event);
+	memcpy_page(ctrl->commit_page, 0, ctrl->page, 0,
+		    WRITE_INTENT_BITMAPS_SIZE);
+	spin_unlock(&ctrl->lock);
+
+	init_waitqueue_head(&wb_ctrl.wait);
+	atomic_set(&wb_ctrl.pending_bios, 0);
+	atomic_set(&wb_ctrl.errors, 0);
+
+	rcu_read_lock();
+	/*
+	 * Record all the writeable devices into found_devs[].
+	 *
+	 * We have to do this to keep a consistent view of writeable devices,
+	 * without holding device_list_mutex.
+	 * As dev-replace/dev-removal will all hold that mutex and wait for
+	 * submitted bios to finish.
+	 * If we try to hold device_list_mutex at bio submission path, we will
+	 * deadlock with above dev-replace/dev-removal
+	 *
+	 * So here we just grab a local list of devices, and since we're at
+	 * bio submission path, the device will never disapper before the bio
+	 * finished.
+	 */
+	list_for_each_entry_rcu(dev, &fs_info->fs_devices->devices, dev_list) {
+		found_devs[nr_devs] = dev;
+		nr_devs++;
+
+		if (unlikely(nr_devs >= nr_devs_max))
+			break;
+	}
+	rcu_read_unlock();
+
+	/* Go through all the recorded devices, and submit the commit_page. */
+	for (i = 0; i < nr_devs; i++) {
+		ret = submit_one_device(found_devs[i], &wb_ctrl);
+		if (ret < 0)
+			total_errors++;
+	}
+	wait_event(wb_ctrl.wait, atomic_read(&wb_ctrl.pending_bios) == 0);
+
+	if (total_errors + atomic_read(&wb_ctrl.errors) >
+	    btrfs_super_num_devices(fs_info->super_copy) - 1) {
+		btrfs_err(fs_info, "failed to writeback write-intent bitmaps");
+		ret = -EIO;
+	}
+	kfree(found_devs);
+	return ret;
+}
 
 /*
  * Return 0 if a valid write intent bitmap can be found.
@@ -53,10 +185,11 @@ static int write_intent_load(struct btrfs_device *device, struct page *dst)
 	return ret;
 }
 
-static void write_intent_init(struct btrfs_fs_info *fs_info)
+static int write_intent_init(struct btrfs_fs_info *fs_info)
 {
 	struct write_intent_ctrl *ctrl = fs_info->wi_ctrl;
 	struct write_intent_super *wis;
+	int ret;
 
 	ASSERT(ctrl);
 	ASSERT(ctrl->page);
@@ -75,7 +208,12 @@ static void write_intent_init(struct btrfs_fs_info *fs_info)
 	wi_set_super_size(wis, WRITE_INTENT_BITMAPS_SIZE);
 	wi_set_super_blocksize(wis, ctrl->blocksize);
 	wi_set_super_nr_entries(wis, 0);
-	btrfs_info(fs_info, "creating new write intent bitmaps");
+
+	ret = write_intent_writeback(fs_info);
+	if (ret < 0)
+		return ret;
+	btrfs_info(fs_info, "new write intent bitmaps created");
+	return 0;
 }
 
 int btrfs_write_intent_init(struct btrfs_fs_info *fs_info)
@@ -95,11 +233,14 @@ int btrfs_write_intent_init(struct btrfs_fs_info *fs_info)
 		return -ENOMEM;
 
 	fs_info->wi_ctrl->page = alloc_page(GFP_NOFS);
-	if (!fs_info->wi_ctrl->page) {
+	fs_info->wi_ctrl->commit_page = alloc_page(GFP_NOFS);
+	if (!fs_info->wi_ctrl->page || !fs_info->wi_ctrl->commit_page) {
 		ret = -ENOMEM;
 		goto cleanup;
 	}
 
+	spin_lock_init(&fs_info->wi_ctrl->lock);
+
 	/*
 	 * Go through every writeable device to find the highest event.
 	 *
@@ -149,12 +290,15 @@ int btrfs_write_intent_init(struct btrfs_fs_info *fs_info)
 	}
 
 	/* No valid bitmap found, create a new one. */
-	write_intent_init(fs_info);
-	return 0;
+	ret = write_intent_init(fs_info);
+
+	return ret;
 cleanup:
 	if (fs_info->wi_ctrl) {
 		if (fs_info->wi_ctrl->page)
 			__free_page(fs_info->wi_ctrl->page);
+		if (fs_info->wi_ctrl->commit_page)
+			__free_page(fs_info->wi_ctrl->commit_page);
 		kfree(fs_info->wi_ctrl);
 		fs_info->wi_ctrl = NULL;
 	}
diff --git a/fs/btrfs/write-intent.h b/fs/btrfs/write-intent.h
index 2c5cd434e978..797e57aef0e1 100644
--- a/fs/btrfs/write-intent.h
+++ b/fs/btrfs/write-intent.h
@@ -111,9 +111,15 @@ struct write_intent_ctrl {
 	/* For the write_intent super and entries. */
 	struct page *page;
 
+	/* A copy for writeback. */
+	struct page *commit_page;
+
 	/* Cached event counter.*/
 	atomic64_t event;
 
+	/* Lock for reading/writing above @page. */
+	spinlock_t lock;
+
 	/* Cached blocksize from write intent super. */
 	u32 blocksize;
 };
-- 
2.36.1


  parent reply	other threads:[~2022-07-07  5:33 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-07  5:32 [PATCH 00/12] btrfs: introduce write-intent bitmaps for RAID56 Qu Wenruo
2022-07-07  5:32 ` [PATCH 01/12] btrfs: introduce new compat RO flag, EXTRA_SUPER_RESERVED Qu Wenruo
2022-07-07  5:32 ` [PATCH 02/12] btrfs: introduce a new experimental compat RO flag, WRITE_INTENT_BITMAP Qu Wenruo
2022-07-07  5:32 ` [PATCH 03/12] btrfs: introduce the on-disk format of btrfs write intent bitmaps Qu Wenruo
2022-07-07  5:32 ` [PATCH 04/12] btrfs: load/create write-intent bitmaps at mount time Qu Wenruo
2022-07-07  5:32 ` Qu Wenruo [this message]
2022-07-07  5:32 ` [PATCH 06/12] btrfs: write-intent: introduce an internal helper to set bits for a range Qu Wenruo
2022-07-08  1:55   ` kernel test robot
2022-07-08  2:22     ` Qu Wenruo
2022-07-08  2:22       ` Qu Wenruo
2022-07-08  7:23   ` kernel test robot
2022-07-07  5:32 ` [PATCH 07/12] btrfs: write-intent: introduce an internal helper to clear " Qu Wenruo
2022-07-07  5:32 ` [PATCH 08/12] btrfs: selftests: add selftests for write-intent bitmaps Qu Wenruo
2022-07-07  5:32 ` [PATCH 09/12] btrfs: write back write intent bitmap after barrier_all_devices() Qu Wenruo
2022-07-07  5:32 ` [PATCH 10/12] btrfs: update and writeback the write-intent bitmap for RAID56 write Qu Wenruo
2022-07-07  5:32 ` [PATCH 11/12] btrfs: raid56: clear write-intent bimaps when a full stripe finishes Qu Wenruo
2022-07-07  5:32 ` [PATCH 12/12] btrfs: warn and clear bitmaps if there is dirty bitmap at mount time Qu Wenruo
2022-07-07  5:36 ` [PATCH 00/12] btrfs: introduce write-intent bitmaps for RAID56 Christoph Hellwig
2022-07-07  5:48   ` Qu Wenruo
2022-07-07  9:37     ` Johannes Thumshirn
2022-07-07  9:45       ` Qu Wenruo
2022-07-07 10:42         ` Qu Wenruo
2022-07-07 12:23         ` Johannes Thumshirn
2022-07-07 13:36     ` Christoph Hellwig
2022-07-07 13:48       ` Qu Wenruo
2022-07-13 16:18 ` Lukas Straub
2022-07-13 23:00   ` Qu Wenruo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f311ec8bb10f38f181959bdd926aab4835dbd3c7.1657171615.git.wqu@suse.com \
    --to=wqu@suse.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.