All of lore.kernel.org
 help / color / mirror / Atom feed
From: Josef Bacik <josef@toxicpanda.com>
To: linux-btrfs@vger.kernel.org, kernel-team@fb.com
Subject: [PATCH][RFC] btrfs: introduce rescue=onlyfs
Date: Wed,  1 Jul 2020 10:44:38 -0400	[thread overview]
Message-ID: <20200701144438.7613-1-josef@toxicpanda.com> (raw)

One of the things that came up consistently in talking with Fedora about
switching to btrfs as default is that btrfs is particularly vulnerable
to metadata corruption.  If any of the core global roots are corrupted,
the fs is unmountable and fsck can't usually do anything for you without
some special options.

Qu addressed this sort of with rescue=skipbg, but that's poorly named as
what it really does is just allow you to operate without an extent root.
However there are a lot of other roots, and I'd rather not have to do

mount -o rescue=skipbg,rescue=nocsum,rescue=nofreespacetree,rescue=blah

Instead take his original idea and modify it so it just works for
everything.  Turn it into rescue=onlyfs, and then any major root we fail
to read just gets left empty and we carry on.

Obviously if the fs roots are screwed then the user is in trouble, but
otherwise this makes it much easier to pull stuff off the disk without
needing our special rescue tools.  I tested this with my TEST_DEV that
had a bunch of data on it by corrupting the csum tree and then reading
files off the disk.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
---

I'm not married to the rescue=onlyfs name, if we can think of something better
I'm good.

Also rescue=skipbg is currently only sitting in misc-next, which is why I'm
killing it with this patch, we haven't sent it upstream so we're good to change
it now before it lands.

 fs/btrfs/block-group.c |  2 +-
 fs/btrfs/ctree.h       |  2 +-
 fs/btrfs/disk-io.c     | 76 ++++++++++++++++++++++--------------------
 fs/btrfs/inode.c       |  6 +++-
 fs/btrfs/super.c       | 27 +++++++--------
 fs/btrfs/volumes.c     |  4 +--
 6 files changed, 63 insertions(+), 54 deletions(-)

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 09b796a081dd..cb5608b2deec 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -2052,7 +2052,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
 	int need_clear = 0;
 	u64 cache_gen;
 
-	if (btrfs_test_opt(info, SKIPBG))
+	if (btrfs_test_opt(info, ONLYFS))
 		return fill_dummy_bgs(info);
 
 	key.objectid = 0;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e40eb210670d..d888d08f7e0d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1265,7 +1265,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
 #define BTRFS_MOUNT_NOLOGREPLAY		(1 << 27)
 #define BTRFS_MOUNT_REF_VERIFY		(1 << 28)
 #define BTRFS_MOUNT_DISCARD_ASYNC	(1 << 29)
-#define BTRFS_MOUNT_SKIPBG		(1 << 30)
+#define BTRFS_MOUNT_ONLYFS		(1 << 30)
 
 #define BTRFS_DEFAULT_COMMIT_INTERVAL	(30)
 #define BTRFS_DEFAULT_MAX_INLINE	(2048)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c27022f13150..9cba0a66b3d5 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2262,11 +2262,10 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
 
 	root = btrfs_read_tree_root(tree_root, &location);
 	if (IS_ERR(root)) {
-		if (!btrfs_test_opt(fs_info, SKIPBG)) {
+		if (!btrfs_test_opt(fs_info, ONLYFS)) {
 			ret = PTR_ERR(root);
 			goto out;
 		}
-		fs_info->extent_root = NULL;
 	} else {
 		set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
 		fs_info->extent_root = root;
@@ -2275,21 +2274,27 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
 	location.objectid = BTRFS_DEV_TREE_OBJECTID;
 	root = btrfs_read_tree_root(tree_root, &location);
 	if (IS_ERR(root)) {
-		ret = PTR_ERR(root);
-		goto out;
+		if (!btrfs_test_opt(fs_info, ONLYFS)) {
+			ret = PTR_ERR(root);
+			goto out;
+		}
+	} else {
+		set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+		fs_info->dev_root = root;
+		btrfs_init_devices_late(fs_info);
 	}
-	set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
-	fs_info->dev_root = root;
-	btrfs_init_devices_late(fs_info);
 
 	location.objectid = BTRFS_CSUM_TREE_OBJECTID;
 	root = btrfs_read_tree_root(tree_root, &location);
 	if (IS_ERR(root)) {
-		ret = PTR_ERR(root);
-		goto out;
+		if (!btrfs_test_opt(fs_info, ONLYFS)) {
+			ret = PTR_ERR(root);
+			goto out;
+		}
+	} else {
+		set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+		fs_info->csum_root = root;
 	}
-	set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
-	fs_info->csum_root = root;
 
 	/*
 	 * This tree can share blocks with some other fs tree during relocation
@@ -2298,11 +2303,14 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
 	root = btrfs_get_fs_root(tree_root->fs_info,
 				 BTRFS_DATA_RELOC_TREE_OBJECTID, true);
 	if (IS_ERR(root)) {
-		ret = PTR_ERR(root);
-		goto out;
+		if (!btrfs_test_opt(fs_info, ONLYFS)) {
+			ret = PTR_ERR(root);
+			goto out;
+		}
+	} else {
+		set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+		fs_info->data_reloc_root = root;
 	}
-	set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
-	fs_info->data_reloc_root = root;
 
 	location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
 	root = btrfs_read_tree_root(tree_root, &location);
@@ -2315,9 +2323,11 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
 	location.objectid = BTRFS_UUID_TREE_OBJECTID;
 	root = btrfs_read_tree_root(tree_root, &location);
 	if (IS_ERR(root)) {
-		ret = PTR_ERR(root);
-		if (ret != -ENOENT)
-			goto out;
+		if (!btrfs_test_opt(fs_info, ONLYFS)) {
+			ret = PTR_ERR(root);
+			if (ret != -ENOENT)
+				goto out;
+		}
 	} else {
 		set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
 		fs_info->uuid_root = root;
@@ -2327,11 +2337,14 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
 		location.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID;
 		root = btrfs_read_tree_root(tree_root, &location);
 		if (IS_ERR(root)) {
-			ret = PTR_ERR(root);
-			goto out;
+			if (!btrfs_test_opt(fs_info, ONLYFS)) {
+				ret = PTR_ERR(root);
+				goto out;
+			}
+		}  else {
+			set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+			fs_info->free_space_root = root;
 		}
-		set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
-		fs_info->free_space_root = root;
 	}
 
 	return 0;
@@ -3047,20 +3060,11 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 	}
 
 	/* Skip bg needs RO and no tree-log to replay */
-	if (btrfs_test_opt(fs_info, SKIPBG)) {
-		if (!sb_rdonly(sb)) {
-			btrfs_err(fs_info,
-			"rescue=skipbg can only be used on read-only mount");
-			err = -EINVAL;
-			goto fail_alloc;
-		}
-		if (btrfs_super_log_root(disk_super) &&
-		    !btrfs_test_opt(fs_info, NOLOGREPLAY)) {
-			btrfs_err(fs_info,
-"rescue=skipbg must be used with rescue=nologreplay when tree-log needs to replayed");
-			err = -EINVAL;
-			goto fail_alloc;
-		}
+	if (btrfs_test_opt(fs_info, ONLYFS) && !sb_rdonly(sb)) {
+		btrfs_err(fs_info,
+			  "rescue=onlyfs can only be used on read-only mount");
+		err = -EINVAL;
+		goto fail_alloc;
 	}
 
 	ret = btrfs_init_workqueues(fs_info, fs_devices);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d301550b9c70..9f8ef22ac65e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2209,7 +2209,8 @@ static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
 	int skip_sum;
 	int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
 
-	skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
+	skip_sum = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
+		btrfs_test_opt(fs_info, ONLYFS);
 
 	if (btrfs_is_free_space_inode(BTRFS_I(inode)))
 		metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
@@ -2866,6 +2867,9 @@ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
 		return 0;
 
+	if (btrfs_test_opt(root->fs_info, ONLYFS))
+		return 0;
+
 	if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
 	    test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
 		clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 3c9ebd4f2b61..7ea9f8f53156 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -345,7 +345,7 @@ enum {
 	Opt_rescue,
 	Opt_usebackuproot,
 	Opt_nologreplay,
-	Opt_rescue_skipbg,
+	Opt_rescue_onlyfs,
 
 	/* Deprecated options */
 	Opt_alloc_start,
@@ -445,7 +445,7 @@ static const match_table_t tokens = {
 static const match_table_t rescue_tokens = {
 	{Opt_usebackuproot, "usebackuproot"},
 	{Opt_nologreplay, "nologreplay"},
-	{Opt_rescue_skipbg, "skipbg"},
+	{Opt_rescue_onlyfs, "onlyfs"},
 	{Opt_err, NULL},
 };
 
@@ -478,9 +478,10 @@ static int parse_rescue_options(struct btrfs_fs_info *info, const char *options)
 			btrfs_set_and_info(info, NOLOGREPLAY,
 					   "disabling log replay at mount time");
 			break;
-		case Opt_rescue_skipbg:
-			btrfs_set_and_info(info, SKIPBG,
-				"skip mount time block group searching");
+		case Opt_rescue_onlyfs:
+			btrfs_set_and_info(info, ONLYFS,
+					   "only reading fs roots, also setting  nologreplay");
+			btrfs_set_opt(info->mount_opt, NOLOGREPLAY);
 			break;
 		case Opt_err:
 			btrfs_info(info, "unrecognized rescue option '%s'", p);
@@ -1418,8 +1419,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
 		seq_puts(seq, ",notreelog");
 	if (btrfs_test_opt(info, NOLOGREPLAY))
 		seq_puts(seq, ",rescue=nologreplay");
-	if (btrfs_test_opt(info, SKIPBG))
-		seq_puts(seq, ",rescue=skipbg");
+	if (btrfs_test_opt(info, ONLYFS))
+		seq_puts(seq, ",rescue=onlyfs");
 	if (btrfs_test_opt(info, FLUSHONCOMMIT))
 		seq_puts(seq, ",flushoncommit");
 	if (btrfs_test_opt(info, DISCARD_SYNC))
@@ -1859,10 +1860,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 	if (ret)
 		goto restore;
 
-	if (btrfs_test_opt(fs_info, SKIPBG) !=
-	    (old_opts & BTRFS_MOUNT_SKIPBG)) {
+	if (btrfs_test_opt(fs_info, ONLYFS) !=
+	    (old_opts & BTRFS_MOUNT_ONLYFS)) {
 		btrfs_err(fs_info,
-		"rescue=skipbg mount option can't be changed during remount");
+		"rescue=onlyfs mount option can't be changed during remount");
 		ret = -EINVAL;
 		goto restore;
 	}
@@ -1932,9 +1933,9 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 			goto restore;
 		}
 
-		if (btrfs_test_opt(fs_info, SKIPBG)) {
+		if (btrfs_test_opt(fs_info, ONLYFS)) {
 			btrfs_err(fs_info,
-		"remounting read-write with rescue=skipbg is not allowed");
+		"remounting read-write with rescue=onlyfs is not allowed");
 			ret = -EINVAL;
 			goto restore;
 		}
@@ -2245,7 +2246,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	 *
 	 * Or if we're rescuing, set available to 0 anyway.
 	 */
-	if (btrfs_test_opt(fs_info, SKIPBG) ||
+	if (btrfs_test_opt(fs_info, ONLYFS) ||
 	    (!mixed && block_rsv->space_info->full &&
 	     total_free_meta - thresh < block_rsv->size))
 		buf->f_bavail = 0;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index aabc6c922e04..a5d124f95ce2 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -7595,10 +7595,10 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
 	int ret = 0;
 
 	/*
-	 * For rescue=skipbg mount option, we're already RO and are salvaging
+	 * For rescue=onlyfs mount option, we're already RO and are salvaging
 	 * data, no need for such strict check.
 	 */
-	if (btrfs_test_opt(fs_info, SKIPBG))
+	if (btrfs_test_opt(fs_info, ONLYFS))
 		return 0;
 
 	key.objectid = 1;
-- 
2.24.1


             reply	other threads:[~2020-07-01 14:44 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-01 14:44 Josef Bacik [this message]
2020-07-01 15:22 ` [PATCH][RFC] btrfs: introduce rescue=onlyfs Lukas Straub
2020-07-01 15:39   ` David Sterba
2020-07-01 15:51     ` Josef Bacik
2020-07-01 19:16     ` Alberto Bursi
2020-07-03  9:16       ` David Sterba
2020-07-02  2:05     ` Zygo Blaxell
2020-07-01 19:43 ` waxhead
2020-07-01 19:45   ` waxhead
2020-07-01 19:53   ` Josef Bacik
2020-07-01 21:17     ` Hans van Kranenburg
2020-07-02  3:09     ` Qu Wenruo
2020-07-02 15:28       ` Josef Bacik
2020-07-02 23:36         ` Qu Wenruo
2020-07-03  0:09           ` Neal Gompa
2020-07-02  3:30     ` Zygo Blaxell
2020-07-02 15:35       ` Josef Bacik
2020-07-02 21:10         ` Zygo Blaxell
2020-07-03  9:43       ` David Sterba
2020-07-03  9:47     ` David Sterba
2020-07-02  0:30 ` Qu Wenruo
2020-07-07 15:16 ` David Sterba
2020-07-07 16:25   ` waxhead

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200701144438.7613-1-josef@toxicpanda.com \
    --to=josef@toxicpanda.com \
    --cc=kernel-team@fb.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.