* [PATCH] btrfs: make device item removal and super block num devices update happen in the same transaction
@ 2022-03-08 5:36 Qu Wenruo
2022-03-09 2:12 ` Anand Jain
2022-03-14 20:03 ` David Sterba
0 siblings, 2 replies; 5+ messages in thread
From: Qu Wenruo @ 2022-03-08 5:36 UTC (permalink / raw)
To: linux-btrfs; +Cc: Luca Béla Palkovics
[BUG]
There is a report that a btrfs has a bad super block num devices.
This makes btrfs to reject the fs completely.
BTRFS error (device sdd3): super_num_devices 3 mismatch with num_devices 2 found here
BTRFS error (device sdd3): failed to read chunk tree: -22
BTRFS error (device sdd3): open_ctree failed
[CAUSE]
During btrfs device removal, chunk tree and super block num devs are
updated in two different transactions:
btrfs_rm_device()
|- btrfs_rm_dev_item(device)
| |- trans = btrfs_start_transaction()
| | Now we got transaction X
| |
| |- btrfs_del_item()
| | Now device item is removed from chunk tree
| |
| |- btrfs_commit_transaction()
| Transaction X got committed, super num devs untouched,
| but device item removed from chunk tree.
| (AKA, super num devs is already incorrect)
|
|- cur_devices->num_devices--;
|- cur_devices->total_devices--;
|- btrfs_set_super_num_devices()
All those operations are not in transaction X, thus it will
only be written back to disk in next transaction.
So after the transaction X in btrfs_rm_dev_item() committed, but before
transaction X+1 (which can be minutes away), a power loss happen, then
we got the super num mismatch.
[FIX]
Instead of starting and committing a transaction inside
btrfs_rm_dev_item(), start a transaction in side btrfs_rm_device() and
pass it to btrfs_rm_dev_item().
And only commit the transaction after everything is done.
Reported-by: Luca Béla Palkovics <luca.bela.palkovics@gmail.com>
Link: https://lore.kernel.org/linux-btrfs/CA+8xDSpvdm_U0QLBAnrH=zqDq_cWCOH5TiV46CKmp3igr44okQ@mail.gmail.com/
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
fs/btrfs/volumes.c | 65 ++++++++++++++++++++--------------------------
1 file changed, 28 insertions(+), 37 deletions(-)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 57a754b33f10..6115c302f4ae 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1896,23 +1896,18 @@ static void update_dev_time(const char *device_path)
path_put(&path);
}
-static int btrfs_rm_dev_item(struct btrfs_device *device)
+static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device)
{
struct btrfs_root *root = device->fs_info->chunk_root;
int ret;
struct btrfs_path *path;
struct btrfs_key key;
- struct btrfs_trans_handle *trans;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans)) {
- btrfs_free_path(path);
- return PTR_ERR(trans);
- }
key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
key.type = BTRFS_DEV_ITEM_KEY;
key.offset = device->devid;
@@ -1923,21 +1918,12 @@ static int btrfs_rm_dev_item(struct btrfs_device *device)
if (ret) {
if (ret > 0)
ret = -ENOENT;
- btrfs_abort_transaction(trans, ret);
- btrfs_end_transaction(trans);
goto out;
}
ret = btrfs_del_item(trans, root, path);
- if (ret) {
- btrfs_abort_transaction(trans, ret);
- btrfs_end_transaction(trans);
- }
-
out:
btrfs_free_path(path);
- if (!ret)
- ret = btrfs_commit_transaction(trans);
return ret;
}
@@ -2078,6 +2064,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
struct btrfs_dev_lookup_args *args,
struct block_device **bdev, fmode_t *mode)
{
+ struct btrfs_trans_handle *trans;
struct btrfs_device *device;
struct btrfs_fs_devices *cur_devices;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
@@ -2098,7 +2085,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
if (ret)
- goto out;
+ return ret;
device = btrfs_find_device(fs_info->fs_devices, args);
if (!device) {
@@ -2106,27 +2093,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
else
ret = -ENOENT;
- goto out;
+ return ret;
}
if (btrfs_pinned_by_swapfile(fs_info, device)) {
btrfs_warn_in_rcu(fs_info,
"cannot remove device %s (devid %llu) due to active swapfile",
rcu_str_deref(device->name), device->devid);
- ret = -ETXTBSY;
- goto out;
+ return -ETXTBSY;
}
- if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
- ret = BTRFS_ERROR_DEV_TGT_REPLACE;
- goto out;
- }
+ if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
+ return BTRFS_ERROR_DEV_TGT_REPLACE;
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
- fs_info->fs_devices->rw_devices == 1) {
- ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
- goto out;
- }
+ fs_info->fs_devices->rw_devices == 1)
+ return BTRFS_ERROR_DEV_ONLY_WRITABLE;
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
mutex_lock(&fs_info->chunk_mutex);
@@ -2139,14 +2121,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
if (ret)
goto error_undo;
- /*
- * TODO: the superblock still includes this device in its num_devices
- * counter although write_all_supers() is not locked out. This
- * could give a filesystem state which requires a degraded mount.
- */
- ret = btrfs_rm_dev_item(device);
- if (ret)
+ trans = btrfs_start_transaction(fs_info->chunk_root, 0);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
goto error_undo;
+ }
+
+ ret = btrfs_rm_dev_item(trans, device);
+ if (ret) {
+ /* Any error in dev item removal is critical */
+ btrfs_crit(fs_info,
+ "failed to remove device item for devid %llu: %d",
+ device->devid, ret);
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ return ret;
+ }
clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
btrfs_scrub_cancel_dev(device);
@@ -2229,7 +2219,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
free_fs_devices(cur_devices);
}
-out:
+ ret = btrfs_commit_transaction(trans);
+
return ret;
error_undo:
@@ -2240,7 +2231,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
device->fs_devices->rw_devices++;
mutex_unlock(&fs_info->chunk_mutex);
}
- goto out;
+ return ret;
}
void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
--
2.35.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] btrfs: make device item removal and super block num devices update happen in the same transaction
2022-03-08 5:36 [PATCH] btrfs: make device item removal and super block num devices update happen in the same transaction Qu Wenruo
@ 2022-03-09 2:12 ` Anand Jain
2022-03-09 2:58 ` Qu Wenruo
2022-03-14 20:03 ` David Sterba
1 sibling, 1 reply; 5+ messages in thread
From: Anand Jain @ 2022-03-09 2:12 UTC (permalink / raw)
To: Qu Wenruo, linux-btrfs; +Cc: Luca Béla Palkovics
On 08/03/2022 13:36, Qu Wenruo wrote:
> [BUG]
> There is a report that a btrfs has a bad super block num devices.
>
> This makes btrfs to reject the fs completely.
>
> BTRFS error (device sdd3): super_num_devices 3 mismatch with num_devices 2 found here
> BTRFS error (device sdd3): failed to read chunk tree: -22
> BTRFS error (device sdd3): open_ctree failed
>
> [CAUSE]
> During btrfs device removal, chunk tree and super block num devs are
> updated in two different transactions:
>
> btrfs_rm_device()
> |- btrfs_rm_dev_item(device)
> | |- trans = btrfs_start_transaction()
> | | Now we got transaction X
> | |
> | |- btrfs_del_item()
> | | Now device item is removed from chunk tree
> | |
> | |- btrfs_commit_transaction()
> | Transaction X got committed, super num devs untouched,
> | but device item removed from chunk tree.
> | (AKA, super num devs is already incorrect)
> |
> |- cur_devices->num_devices--;
> |- cur_devices->total_devices--;
> |- btrfs_set_super_num_devices()
> All those operations are not in transaction X, thus it will
> only be written back to disk in next transaction.
>
> So after the transaction X in btrfs_rm_dev_item() committed, but before
> transaction X+1 (which can be minutes away), a power loss happen, then
> we got the super num mismatch.
>
> [FIX]
> Instead of starting and committing a transaction inside
> btrfs_rm_dev_item(), start a transaction in side btrfs_rm_device() and
> pass it to btrfs_rm_dev_item().
>
> And only commit the transaction after everything is done.
> > Reported-by: Luca Béla Palkovics <luca.bela.palkovics@gmail.com>
> Link: https://lore.kernel.org/linux-btrfs/CA+8xDSpvdm_U0QLBAnrH=zqDq_cWCOH5TiV46CKmp3igr44okQ@mail.gmail.com/
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
> fs/btrfs/volumes.c | 65 ++++++++++++++++++++--------------------------
> 1 file changed, 28 insertions(+), 37 deletions(-)
>
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 57a754b33f10..6115c302f4ae 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -1896,23 +1896,18 @@ static void update_dev_time(const char *device_path)
> path_put(&path);
> }
>
> -static int btrfs_rm_dev_item(struct btrfs_device *device)
> +static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans,
> + struct btrfs_device *device)
> {
> struct btrfs_root *root = device->fs_info->chunk_root;
> int ret;
> struct btrfs_path *path;
> struct btrfs_key key;
> - struct btrfs_trans_handle *trans;
>
> path = btrfs_alloc_path();
> if (!path)
> return -ENOMEM;
>
> - trans = btrfs_start_transaction(root, 0);
> - if (IS_ERR(trans)) {
> - btrfs_free_path(path);
> - return PTR_ERR(trans);
> - }
> key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
> key.type = BTRFS_DEV_ITEM_KEY;
> key.offset = device->devid;
> @@ -1923,21 +1918,12 @@ static int btrfs_rm_dev_item(struct btrfs_device *device)
> if (ret) {
> if (ret > 0)
> ret = -ENOENT;
> - btrfs_abort_transaction(trans, ret);
> - btrfs_end_transaction(trans);
> goto out;
> }
>
> ret = btrfs_del_item(trans, root, path);
> - if (ret) {
> - btrfs_abort_transaction(trans, ret);
> - btrfs_end_transaction(trans);
> - }
> -
> out:
> btrfs_free_path(path);
> - if (!ret)
> - ret = btrfs_commit_transaction(trans);
> return ret;
> }
>
> @@ -2078,6 +2064,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
> struct btrfs_dev_lookup_args *args,
> struct block_device **bdev, fmode_t *mode)
> {
> + struct btrfs_trans_handle *trans;
> struct btrfs_device *device;
> struct btrfs_fs_devices *cur_devices;
> struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
> @@ -2098,7 +2085,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>
> ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
> if (ret)
> - goto out;
> + return ret;
>
> device = btrfs_find_device(fs_info->fs_devices, args);
> if (!device) {
> @@ -2106,27 +2093,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
> ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
> else
> ret = -ENOENT;
> - goto out;
> + return ret;
> }
>
> if (btrfs_pinned_by_swapfile(fs_info, device)) {
> btrfs_warn_in_rcu(fs_info,
> "cannot remove device %s (devid %llu) due to active swapfile",
> rcu_str_deref(device->name), device->devid);
> - ret = -ETXTBSY;
> - goto out;
> + return -ETXTBSY;
> }
>
> - if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
> - ret = BTRFS_ERROR_DEV_TGT_REPLACE;
> - goto out;
> - }
> + if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
> + return BTRFS_ERROR_DEV_TGT_REPLACE;
>
> if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
> - fs_info->fs_devices->rw_devices == 1) {
> - ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
> - goto out;
> - }
> + fs_info->fs_devices->rw_devices == 1)
> + return BTRFS_ERROR_DEV_ONLY_WRITABLE;
>
> if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
> mutex_lock(&fs_info->chunk_mutex);
> @@ -2139,14 +2121,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
> if (ret)
> goto error_undo;
>
> - /*
> - * TODO: the superblock still includes this device in its num_devices
> - * counter although write_all_supers() is not locked out. This
> - * could give a filesystem state which requires a degraded mount.
> - */
> - ret = btrfs_rm_dev_item(device);
> - if (ret)
> + trans = btrfs_start_transaction(fs_info->chunk_root, 0);
> + if (IS_ERR(trans)) {
> + ret = PTR_ERR(trans);
> goto error_undo;
> + }
> +
> + ret = btrfs_rm_dev_item(trans, device);
> + if (ret) {
> + /* Any error in dev item removal is critical */
> + btrfs_crit(fs_info,
> + "failed to remove device item for devid %llu: %d",
> + device->devid, ret);
> + btrfs_abort_transaction(trans, ret);
> + btrfs_end_transaction(trans);
> + return ret;
Missed error_undo part of the undo here.
Thanks, Anand
> + }
>
> clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
> btrfs_scrub_cancel_dev(device);
> @@ -2229,7 +2219,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
> free_fs_devices(cur_devices);
> }
>
> -out:
> + ret = btrfs_commit_transaction(trans);
> +
> return ret;
>
> error_undo:
> @@ -2240,7 +2231,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
> device->fs_devices->rw_devices++;
> mutex_unlock(&fs_info->chunk_mutex);
> }
> - goto out;
> + return ret;
> }
>
> void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] btrfs: make device item removal and super block num devices update happen in the same transaction
2022-03-09 2:12 ` Anand Jain
@ 2022-03-09 2:58 ` Qu Wenruo
2022-03-09 7:31 ` Anand Jain
0 siblings, 1 reply; 5+ messages in thread
From: Qu Wenruo @ 2022-03-09 2:58 UTC (permalink / raw)
To: Anand Jain, Qu Wenruo, linux-btrfs; +Cc: Luca Béla Palkovics
On 2022/3/9 10:12, Anand Jain wrote:
> On 08/03/2022 13:36, Qu Wenruo wrote:
>> [BUG]
>> There is a report that a btrfs has a bad super block num devices.
>>
>> This makes btrfs to reject the fs completely.
>>
>> BTRFS error (device sdd3): super_num_devices 3 mismatch with
>> num_devices 2 found here
>> BTRFS error (device sdd3): failed to read chunk tree: -22
>> BTRFS error (device sdd3): open_ctree failed
>>
>> [CAUSE]
>> During btrfs device removal, chunk tree and super block num devs are
>> updated in two different transactions:
>>
>> btrfs_rm_device()
>> |- btrfs_rm_dev_item(device)
>> | |- trans = btrfs_start_transaction()
>> | | Now we got transaction X
>> | |
>> | |- btrfs_del_item()
>> | | Now device item is removed from chunk tree
>> | |
>> | |- btrfs_commit_transaction()
>> | Transaction X got committed, super num devs untouched,
>> | but device item removed from chunk tree.
>> | (AKA, super num devs is already incorrect)
>> |
>> |- cur_devices->num_devices--;
>> |- cur_devices->total_devices--;
>> |- btrfs_set_super_num_devices()
>> All those operations are not in transaction X, thus it will
>> only be written back to disk in next transaction.
>>
>> So after the transaction X in btrfs_rm_dev_item() committed, but before
>> transaction X+1 (which can be minutes away), a power loss happen, then
>> we got the super num mismatch.
>>
>> [FIX]
>> Instead of starting and committing a transaction inside
>> btrfs_rm_dev_item(), start a transaction in side btrfs_rm_device() and
>> pass it to btrfs_rm_dev_item().
>>
>> And only commit the transaction after everything is done.
>> > Reported-by: Luca Béla Palkovics <luca.bela.palkovics@gmail.com>
>> Link:
>> https://lore.kernel.org/linux-btrfs/CA+8xDSpvdm_U0QLBAnrH=zqDq_cWCOH5TiV46CKmp3igr44okQ@mail.gmail.com/
>>
>> Signed-off-by: Qu Wenruo <wqu@suse.com>
>> ---
>> fs/btrfs/volumes.c | 65 ++++++++++++++++++++--------------------------
>> 1 file changed, 28 insertions(+), 37 deletions(-)
>>
>> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
>> index 57a754b33f10..6115c302f4ae 100644
>> --- a/fs/btrfs/volumes.c
>> +++ b/fs/btrfs/volumes.c
>> @@ -1896,23 +1896,18 @@ static void update_dev_time(const char
>> *device_path)
>> path_put(&path);
>> }
>> -static int btrfs_rm_dev_item(struct btrfs_device *device)
>> +static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans,
>> + struct btrfs_device *device)
>> {
>> struct btrfs_root *root = device->fs_info->chunk_root;
>> int ret;
>> struct btrfs_path *path;
>> struct btrfs_key key;
>> - struct btrfs_trans_handle *trans;
>> path = btrfs_alloc_path();
>> if (!path)
>> return -ENOMEM;
>> - trans = btrfs_start_transaction(root, 0);
>> - if (IS_ERR(trans)) {
>> - btrfs_free_path(path);
>> - return PTR_ERR(trans);
>> - }
>> key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
>> key.type = BTRFS_DEV_ITEM_KEY;
>> key.offset = device->devid;
>> @@ -1923,21 +1918,12 @@ static int btrfs_rm_dev_item(struct
>> btrfs_device *device)
>> if (ret) {
>> if (ret > 0)
>> ret = -ENOENT;
>> - btrfs_abort_transaction(trans, ret);
>> - btrfs_end_transaction(trans);
>> goto out;
>> }
>> ret = btrfs_del_item(trans, root, path);
>> - if (ret) {
>> - btrfs_abort_transaction(trans, ret);
>> - btrfs_end_transaction(trans);
>> - }
>> -
>> out:
>> btrfs_free_path(path);
>> - if (!ret)
>> - ret = btrfs_commit_transaction(trans);
>> return ret;
>> }
>> @@ -2078,6 +2064,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>> struct btrfs_dev_lookup_args *args,
>> struct block_device **bdev, fmode_t *mode)
>> {
>> + struct btrfs_trans_handle *trans;
>> struct btrfs_device *device;
>> struct btrfs_fs_devices *cur_devices;
>> struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
>> @@ -2098,7 +2085,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>> ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
>> if (ret)
>> - goto out;
>> + return ret;
>> device = btrfs_find_device(fs_info->fs_devices, args);
>> if (!device) {
>> @@ -2106,27 +2093,22 @@ int btrfs_rm_device(struct btrfs_fs_info
>> *fs_info,
>> ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
>> else
>> ret = -ENOENT;
>> - goto out;
>> + return ret;
>> }
>> if (btrfs_pinned_by_swapfile(fs_info, device)) {
>> btrfs_warn_in_rcu(fs_info,
>> "cannot remove device %s (devid %llu) due to active
>> swapfile",
>> rcu_str_deref(device->name), device->devid);
>> - ret = -ETXTBSY;
>> - goto out;
>> + return -ETXTBSY;
>> }
>> - if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
>> - ret = BTRFS_ERROR_DEV_TGT_REPLACE;
>> - goto out;
>> - }
>> + if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
>> + return BTRFS_ERROR_DEV_TGT_REPLACE;
>> if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
>> - fs_info->fs_devices->rw_devices == 1) {
>> - ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
>> - goto out;
>> - }
>> + fs_info->fs_devices->rw_devices == 1)
>> + return BTRFS_ERROR_DEV_ONLY_WRITABLE;
>> if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
>> mutex_lock(&fs_info->chunk_mutex);
>> @@ -2139,14 +2121,22 @@ int btrfs_rm_device(struct btrfs_fs_info
>> *fs_info,
>> if (ret)
>> goto error_undo;
>> - /*
>> - * TODO: the superblock still includes this device in its
>> num_devices
>> - * counter although write_all_supers() is not locked out. This
>> - * could give a filesystem state which requires a degraded mount.
>> - */
>> - ret = btrfs_rm_dev_item(device);
>> - if (ret)
>> + trans = btrfs_start_transaction(fs_info->chunk_root, 0);
>> + if (IS_ERR(trans)) {
>> + ret = PTR_ERR(trans);
>> goto error_undo;
>> + }
>> +
>> + ret = btrfs_rm_dev_item(trans, device);
>> + if (ret) {
>> + /* Any error in dev item removal is critical */
>> + btrfs_crit(fs_info,
>> + "failed to remove device item for devid %llu: %d",
>> + device->devid, ret);
>> + btrfs_abort_transaction(trans, ret);
>> + btrfs_end_transaction(trans);
>> + return ret;
>
> Missed error_undo part of the undo here.
Nope, that's exactly expected.
We abort transaction, thus nothing committed, no need to undo.
In fact, after the btrfs_rm_dev_item() call, there is no real way to
rollback the delete.
Thanks,
Qu
>
> Thanks, Anand
>
>> + }
>> clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
>> btrfs_scrub_cancel_dev(device);
>> @@ -2229,7 +2219,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>> free_fs_devices(cur_devices);
>> }
>> -out:
>> + ret = btrfs_commit_transaction(trans);
>> +
>> return ret;
>> error_undo:
>> @@ -2240,7 +2231,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>> device->fs_devices->rw_devices++;
>> mutex_unlock(&fs_info->chunk_mutex);
>> }
>> - goto out;
>> + return ret;
>> }
>> void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] btrfs: make device item removal and super block num devices update happen in the same transaction
2022-03-09 2:58 ` Qu Wenruo
@ 2022-03-09 7:31 ` Anand Jain
0 siblings, 0 replies; 5+ messages in thread
From: Anand Jain @ 2022-03-09 7:31 UTC (permalink / raw)
To: Qu Wenruo, Qu Wenruo, linux-btrfs; +Cc: Luca Béla Palkovics
On 09/03/2022 10:58, Qu Wenruo wrote:
>
>
> On 2022/3/9 10:12, Anand Jain wrote:
>> On 08/03/2022 13:36, Qu Wenruo wrote:
>>> [BUG]
>>> There is a report that a btrfs has a bad super block num devices.
>>>
>>> This makes btrfs to reject the fs completely.
>>>
>>> BTRFS error (device sdd3): super_num_devices 3 mismatch with
>>> num_devices 2 found here
>>> BTRFS error (device sdd3): failed to read chunk tree: -22
>>> BTRFS error (device sdd3): open_ctree failed
>>>
>>> [CAUSE]
>>> During btrfs device removal, chunk tree and super block num devs are
>>> updated in two different transactions:
>>>
>>> btrfs_rm_device()
>>> |- btrfs_rm_dev_item(device)
>>> | |- trans = btrfs_start_transaction()
>>> | | Now we got transaction X
>>> | |
>>> | |- btrfs_del_item()
>>> | | Now device item is removed from chunk tree
>>> | |
>>> | |- btrfs_commit_transaction()
>>> | Transaction X got committed, super num devs untouched,
>>> | but device item removed from chunk tree.
>>> | (AKA, super num devs is already incorrect)
>>> |
>>> |- cur_devices->num_devices--;
>>> |- cur_devices->total_devices--;
>>> |- btrfs_set_super_num_devices()
>>> All those operations are not in transaction X, thus it will
>>> only be written back to disk in next transaction.
>>>
>>> So after the transaction X in btrfs_rm_dev_item() committed, but before
>>> transaction X+1 (which can be minutes away), a power loss happen, then
>>> we got the super num mismatch.
>>>
>>> [FIX]
>>> Instead of starting and committing a transaction inside
>>> btrfs_rm_dev_item(), start a transaction in side btrfs_rm_device() and
>>> pass it to btrfs_rm_dev_item().
>>>
>>> And only commit the transaction after everything is done.
>>> > Reported-by: Luca Béla Palkovics <luca.bela.palkovics@gmail.com>
>>> Link:
>>> https://lore.kernel.org/linux-btrfs/CA+8xDSpvdm_U0QLBAnrH=zqDq_cWCOH5TiV46CKmp3igr44okQ@mail.gmail.com/
>>>
>>> Signed-off-by: Qu Wenruo <wqu@suse.com>
>>> ---
>>> fs/btrfs/volumes.c | 65 ++++++++++++++++++++--------------------------
>>> 1 file changed, 28 insertions(+), 37 deletions(-)
>>>
>>> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
>>> index 57a754b33f10..6115c302f4ae 100644
>>> --- a/fs/btrfs/volumes.c
>>> +++ b/fs/btrfs/volumes.c
>>> @@ -1896,23 +1896,18 @@ static void update_dev_time(const char
>>> *device_path)
>>> path_put(&path);
>>> }
>>> -static int btrfs_rm_dev_item(struct btrfs_device *device)
>>> +static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans,
>>> + struct btrfs_device *device)
>>> {
>>> struct btrfs_root *root = device->fs_info->chunk_root;
>>> int ret;
>>> struct btrfs_path *path;
>>> struct btrfs_key key;
>>> - struct btrfs_trans_handle *trans;
>>> path = btrfs_alloc_path();
>>> if (!path)
>>> return -ENOMEM;
>>> - trans = btrfs_start_transaction(root, 0);
>>> - if (IS_ERR(trans)) {
>>> - btrfs_free_path(path);
>>> - return PTR_ERR(trans);
>>> - }
>>> key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
>>> key.type = BTRFS_DEV_ITEM_KEY;
>>> key.offset = device->devid;
>>> @@ -1923,21 +1918,12 @@ static int btrfs_rm_dev_item(struct
>>> btrfs_device *device)
>>> if (ret) {
>>> if (ret > 0)
>>> ret = -ENOENT;
>>> - btrfs_abort_transaction(trans, ret);
>>> - btrfs_end_transaction(trans);
>>> goto out;
>>> }
>>> ret = btrfs_del_item(trans, root, path);
>>> - if (ret) {
>>> - btrfs_abort_transaction(trans, ret);
>>> - btrfs_end_transaction(trans);
>>> - }
>>> -
>>> out:
>>> btrfs_free_path(path);
>>> - if (!ret)
>>> - ret = btrfs_commit_transaction(trans);
>>> return ret;
>>> }
>>> @@ -2078,6 +2064,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>>> struct btrfs_dev_lookup_args *args,
>>> struct block_device **bdev, fmode_t *mode)
>>> {
>>> + struct btrfs_trans_handle *trans;
>>> struct btrfs_device *device;
>>> struct btrfs_fs_devices *cur_devices;
>>> struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
>>> @@ -2098,7 +2085,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>>> ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
>>> if (ret)
>>> - goto out;
>>> + return ret;
>>> device = btrfs_find_device(fs_info->fs_devices, args);
>>> if (!device) {
>>> @@ -2106,27 +2093,22 @@ int btrfs_rm_device(struct btrfs_fs_info
>>> *fs_info,
>>> ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
>>> else
>>> ret = -ENOENT;
>>> - goto out;
>>> + return ret;
>>> }
>>> if (btrfs_pinned_by_swapfile(fs_info, device)) {
>>> btrfs_warn_in_rcu(fs_info,
>>> "cannot remove device %s (devid %llu) due to active
>>> swapfile",
>>> rcu_str_deref(device->name), device->devid);
>>> - ret = -ETXTBSY;
>>> - goto out;
>>> + return -ETXTBSY;
>>> }
>>> - if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
>>> - ret = BTRFS_ERROR_DEV_TGT_REPLACE;
>>> - goto out;
>>> - }
>>> + if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
>>> + return BTRFS_ERROR_DEV_TGT_REPLACE;
>>> if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
>>> - fs_info->fs_devices->rw_devices == 1) {
>>> - ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
>>> - goto out;
>>> - }
>>> + fs_info->fs_devices->rw_devices == 1)
>>> + return BTRFS_ERROR_DEV_ONLY_WRITABLE;
>>> if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
>>> mutex_lock(&fs_info->chunk_mutex);
>>> @@ -2139,14 +2121,22 @@ int btrfs_rm_device(struct btrfs_fs_info
>>> *fs_info,
>>> if (ret)
>>> goto error_undo;
>>> - /*
>>> - * TODO: the superblock still includes this device in its
>>> num_devices
>>> - * counter although write_all_supers() is not locked out. This
>>> - * could give a filesystem state which requires a degraded mount.
>>> - */
>>> - ret = btrfs_rm_dev_item(device);
>>> - if (ret)
>>> + trans = btrfs_start_transaction(fs_info->chunk_root, 0);
>>> + if (IS_ERR(trans)) {
>>> + ret = PTR_ERR(trans);
>>> goto error_undo;
>>> + }
>>> +
>>> + ret = btrfs_rm_dev_item(trans, device);
>>> + if (ret) {
>>> + /* Any error in dev item removal is critical */
>>> + btrfs_crit(fs_info,
>>> + "failed to remove device item for devid %llu: %d",
>>> + device->devid, ret);
>>> + btrfs_abort_transaction(trans, ret);
>>> + btrfs_end_transaction(trans);
>>> + return ret;
>>
>> Missed error_undo part of the undo here.
>
> Nope, that's exactly expected.
>
> We abort transaction, thus nothing committed, no need to undo.
My concern is device->fs_devices->rw_devices
is not equal to
device->fs_devices->num_devices
and fs is ro at this stage.
I am a bit nervous if our close devices would be ok.
But it looks ok.
Anyway, after the unmount and mount recycle the
rw_devices == num_devices again. But a device shall have zero
disk_total_bytes. Which is fine. The user can try rm device again.
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Thanks, Anand
> In fact, after the btrfs_rm_dev_item() call, there is no real way to
> rollback the delete.
> Thanks,
> Qu
>>
>> Thanks, Anand
>>
>>> + }
>>> clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
>>> btrfs_scrub_cancel_dev(device);
>>> @@ -2229,7 +2219,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>>> free_fs_devices(cur_devices);
>>> }
>>> -out:
>>> + ret = btrfs_commit_transaction(trans);
>>> +
>>> return ret;
>>> error_undo:
>>> @@ -2240,7 +2231,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>>> device->fs_devices->rw_devices++;
>>> mutex_unlock(&fs_info->chunk_mutex);
>>> }
>>> - goto out;
>>> + return ret;
>>> }
>>> void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
>>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] btrfs: make device item removal and super block num devices update happen in the same transaction
2022-03-08 5:36 [PATCH] btrfs: make device item removal and super block num devices update happen in the same transaction Qu Wenruo
2022-03-09 2:12 ` Anand Jain
@ 2022-03-14 20:03 ` David Sterba
1 sibling, 0 replies; 5+ messages in thread
From: David Sterba @ 2022-03-14 20:03 UTC (permalink / raw)
To: Qu Wenruo; +Cc: linux-btrfs, Luca Béla Palkovics
On Tue, Mar 08, 2022 at 01:36:38PM +0800, Qu Wenruo wrote:
> [BUG]
> There is a report that a btrfs has a bad super block num devices.
People have reported this on IRC in the past too. In some cases it was a
report with two devices expected but one found and "I have never added
nor removed a device on this filesystem", so it was a bit mysterious.
The split update over the transaction is otherwise a clear cause.
Added to misc-next, thanks.
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2022-03-14 20:07 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-08 5:36 [PATCH] btrfs: make device item removal and super block num devices update happen in the same transaction Qu Wenruo
2022-03-09 2:12 ` Anand Jain
2022-03-09 2:58 ` Qu Wenruo
2022-03-09 7:31 ` Anand Jain
2022-03-14 20:03 ` David Sterba
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.