All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] btrfs: make device item removal and super block num devices update happen in the same transaction
@ 2022-03-08  5:36 Qu Wenruo
  2022-03-09  2:12 ` Anand Jain
  2022-03-14 20:03 ` David Sterba
  0 siblings, 2 replies; 5+ messages in thread
From: Qu Wenruo @ 2022-03-08  5:36 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Luca Béla Palkovics

[BUG]
There is a report that a btrfs has a bad super block num devices.

This makes btrfs to reject the fs completely.

  BTRFS error (device sdd3): super_num_devices 3 mismatch with num_devices 2 found here
  BTRFS error (device sdd3): failed to read chunk tree: -22
  BTRFS error (device sdd3): open_ctree failed

[CAUSE]
During btrfs device removal, chunk tree and super block num devs are
updated in two different transactions:

  btrfs_rm_device()
  |- btrfs_rm_dev_item(device)
  |  |- trans = btrfs_start_transaction()
  |  |  Now we got transaction X
  |  |
  |  |- btrfs_del_item()
  |  |  Now device item is removed from chunk tree
  |  |
  |  |- btrfs_commit_transaction()
  |     Transaction X got committed, super num devs untouched,
  |     but device item removed from chunk tree.
  |     (AKA, super num devs is already incorrect)
  |
  |- cur_devices->num_devices--;
  |- cur_devices->total_devices--;
  |- btrfs_set_super_num_devices()
     All those operations are not in transaction X, thus it will
     only be written back to disk in next transaction.

So after the transaction X in btrfs_rm_dev_item() committed, but before
transaction X+1 (which can be minutes away), a power loss happen, then
we got the super num mismatch.

[FIX]
Instead of starting and committing a transaction inside
btrfs_rm_dev_item(), start a transaction in side btrfs_rm_device() and
pass it to btrfs_rm_dev_item().

And only commit the transaction after everything is done.

Reported-by: Luca Béla Palkovics <luca.bela.palkovics@gmail.com>
Link: https://lore.kernel.org/linux-btrfs/CA+8xDSpvdm_U0QLBAnrH=zqDq_cWCOH5TiV46CKmp3igr44okQ@mail.gmail.com/
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/volumes.c | 65 ++++++++++++++++++++--------------------------
 1 file changed, 28 insertions(+), 37 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 57a754b33f10..6115c302f4ae 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1896,23 +1896,18 @@ static void update_dev_time(const char *device_path)
 	path_put(&path);
 }
 
-static int btrfs_rm_dev_item(struct btrfs_device *device)
+static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans,
+			     struct btrfs_device *device)
 {
 	struct btrfs_root *root = device->fs_info->chunk_root;
 	int ret;
 	struct btrfs_path *path;
 	struct btrfs_key key;
-	struct btrfs_trans_handle *trans;
 
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
 
-	trans = btrfs_start_transaction(root, 0);
-	if (IS_ERR(trans)) {
-		btrfs_free_path(path);
-		return PTR_ERR(trans);
-	}
 	key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
 	key.type = BTRFS_DEV_ITEM_KEY;
 	key.offset = device->devid;
@@ -1923,21 +1918,12 @@ static int btrfs_rm_dev_item(struct btrfs_device *device)
 	if (ret) {
 		if (ret > 0)
 			ret = -ENOENT;
-		btrfs_abort_transaction(trans, ret);
-		btrfs_end_transaction(trans);
 		goto out;
 	}
 
 	ret = btrfs_del_item(trans, root, path);
-	if (ret) {
-		btrfs_abort_transaction(trans, ret);
-		btrfs_end_transaction(trans);
-	}
-
 out:
 	btrfs_free_path(path);
-	if (!ret)
-		ret = btrfs_commit_transaction(trans);
 	return ret;
 }
 
@@ -2078,6 +2064,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
 		    struct btrfs_dev_lookup_args *args,
 		    struct block_device **bdev, fmode_t *mode)
 {
+	struct btrfs_trans_handle *trans;
 	struct btrfs_device *device;
 	struct btrfs_fs_devices *cur_devices;
 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
@@ -2098,7 +2085,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
 
 	ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
 	if (ret)
-		goto out;
+		return ret;
 
 	device = btrfs_find_device(fs_info->fs_devices, args);
 	if (!device) {
@@ -2106,27 +2093,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
 			ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
 		else
 			ret = -ENOENT;
-		goto out;
+		return ret;
 	}
 
 	if (btrfs_pinned_by_swapfile(fs_info, device)) {
 		btrfs_warn_in_rcu(fs_info,
 		  "cannot remove device %s (devid %llu) due to active swapfile",
 				  rcu_str_deref(device->name), device->devid);
-		ret = -ETXTBSY;
-		goto out;
+		return -ETXTBSY;
 	}
 
-	if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
-		ret = BTRFS_ERROR_DEV_TGT_REPLACE;
-		goto out;
-	}
+	if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
+		return BTRFS_ERROR_DEV_TGT_REPLACE;
 
 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
-	    fs_info->fs_devices->rw_devices == 1) {
-		ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
-		goto out;
-	}
+	    fs_info->fs_devices->rw_devices == 1)
+		return BTRFS_ERROR_DEV_ONLY_WRITABLE;
 
 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
 		mutex_lock(&fs_info->chunk_mutex);
@@ -2139,14 +2121,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
 	if (ret)
 		goto error_undo;
 
-	/*
-	 * TODO: the superblock still includes this device in its num_devices
-	 * counter although write_all_supers() is not locked out. This
-	 * could give a filesystem state which requires a degraded mount.
-	 */
-	ret = btrfs_rm_dev_item(device);
-	if (ret)
+	trans = btrfs_start_transaction(fs_info->chunk_root, 0);
+	if (IS_ERR(trans)) {
+		ret = PTR_ERR(trans);
 		goto error_undo;
+	}
+
+	ret = btrfs_rm_dev_item(trans, device);
+	if (ret) {
+		/* Any error in dev item removal is critical */
+		btrfs_crit(fs_info,
+			   "failed to remove device item for devid %llu: %d",
+			   device->devid, ret);
+		btrfs_abort_transaction(trans, ret);
+		btrfs_end_transaction(trans);
+		return ret;
+	}
 
 	clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
 	btrfs_scrub_cancel_dev(device);
@@ -2229,7 +2219,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
 		free_fs_devices(cur_devices);
 	}
 
-out:
+	ret = btrfs_commit_transaction(trans);
+
 	return ret;
 
 error_undo:
@@ -2240,7 +2231,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
 		device->fs_devices->rw_devices++;
 		mutex_unlock(&fs_info->chunk_mutex);
 	}
-	goto out;
+	return ret;
 }
 
 void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] btrfs: make device item removal and super block num devices update happen in the same transaction
  2022-03-08  5:36 [PATCH] btrfs: make device item removal and super block num devices update happen in the same transaction Qu Wenruo
@ 2022-03-09  2:12 ` Anand Jain
  2022-03-09  2:58   ` Qu Wenruo
  2022-03-14 20:03 ` David Sterba
  1 sibling, 1 reply; 5+ messages in thread
From: Anand Jain @ 2022-03-09  2:12 UTC (permalink / raw)
  To: Qu Wenruo, linux-btrfs; +Cc: Luca Béla Palkovics

On 08/03/2022 13:36, Qu Wenruo wrote:
> [BUG]
> There is a report that a btrfs has a bad super block num devices.
> 
> This makes btrfs to reject the fs completely.
> 
>    BTRFS error (device sdd3): super_num_devices 3 mismatch with num_devices 2 found here
>    BTRFS error (device sdd3): failed to read chunk tree: -22
>    BTRFS error (device sdd3): open_ctree failed
> 
> [CAUSE]
> During btrfs device removal, chunk tree and super block num devs are
> updated in two different transactions:
> 
>    btrfs_rm_device()
>    |- btrfs_rm_dev_item(device)
>    |  |- trans = btrfs_start_transaction()
>    |  |  Now we got transaction X
>    |  |
>    |  |- btrfs_del_item()
>    |  |  Now device item is removed from chunk tree
>    |  |
>    |  |- btrfs_commit_transaction()
>    |     Transaction X got committed, super num devs untouched,
>    |     but device item removed from chunk tree.
>    |     (AKA, super num devs is already incorrect)
>    |
>    |- cur_devices->num_devices--;
>    |- cur_devices->total_devices--;
>    |- btrfs_set_super_num_devices()
>       All those operations are not in transaction X, thus it will
>       only be written back to disk in next transaction.
> 
> So after the transaction X in btrfs_rm_dev_item() committed, but before
> transaction X+1 (which can be minutes away), a power loss happen, then
> we got the super num mismatch.
> 
> [FIX]
> Instead of starting and committing a transaction inside
> btrfs_rm_dev_item(), start a transaction in side btrfs_rm_device() and
> pass it to btrfs_rm_dev_item().
> 
> And only commit the transaction after everything is done.
>  > Reported-by: Luca Béla Palkovics <luca.bela.palkovics@gmail.com>
> Link: https://lore.kernel.org/linux-btrfs/CA+8xDSpvdm_U0QLBAnrH=zqDq_cWCOH5TiV46CKmp3igr44okQ@mail.gmail.com/
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
>   fs/btrfs/volumes.c | 65 ++++++++++++++++++++--------------------------
>   1 file changed, 28 insertions(+), 37 deletions(-)
> 
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 57a754b33f10..6115c302f4ae 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -1896,23 +1896,18 @@ static void update_dev_time(const char *device_path)
>   	path_put(&path);
>   }
>   
> -static int btrfs_rm_dev_item(struct btrfs_device *device)
> +static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans,
> +			     struct btrfs_device *device)
>   {
>   	struct btrfs_root *root = device->fs_info->chunk_root;
>   	int ret;
>   	struct btrfs_path *path;
>   	struct btrfs_key key;
> -	struct btrfs_trans_handle *trans;
>   
>   	path = btrfs_alloc_path();
>   	if (!path)
>   		return -ENOMEM;
>   
> -	trans = btrfs_start_transaction(root, 0);
> -	if (IS_ERR(trans)) {
> -		btrfs_free_path(path);
> -		return PTR_ERR(trans);
> -	}
>   	key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
>   	key.type = BTRFS_DEV_ITEM_KEY;
>   	key.offset = device->devid;
> @@ -1923,21 +1918,12 @@ static int btrfs_rm_dev_item(struct btrfs_device *device)
>   	if (ret) {
>   		if (ret > 0)
>   			ret = -ENOENT;
> -		btrfs_abort_transaction(trans, ret);
> -		btrfs_end_transaction(trans);
>   		goto out;
>   	}
>   
>   	ret = btrfs_del_item(trans, root, path);
> -	if (ret) {
> -		btrfs_abort_transaction(trans, ret);
> -		btrfs_end_transaction(trans);
> -	}
> -
>   out:
>   	btrfs_free_path(path);
> -	if (!ret)
> -		ret = btrfs_commit_transaction(trans);
>   	return ret;
>   }
>   
> @@ -2078,6 +2064,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>   		    struct btrfs_dev_lookup_args *args,
>   		    struct block_device **bdev, fmode_t *mode)
>   {
> +	struct btrfs_trans_handle *trans;
>   	struct btrfs_device *device;
>   	struct btrfs_fs_devices *cur_devices;
>   	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
> @@ -2098,7 +2085,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>   
>   	ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
>   	if (ret)
> -		goto out;
> +		return ret;
>   
>   	device = btrfs_find_device(fs_info->fs_devices, args);
>   	if (!device) {
> @@ -2106,27 +2093,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>   			ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
>   		else
>   			ret = -ENOENT;
> -		goto out;
> +		return ret;
>   	}
>   
>   	if (btrfs_pinned_by_swapfile(fs_info, device)) {
>   		btrfs_warn_in_rcu(fs_info,
>   		  "cannot remove device %s (devid %llu) due to active swapfile",
>   				  rcu_str_deref(device->name), device->devid);
> -		ret = -ETXTBSY;
> -		goto out;
> +		return -ETXTBSY;
>   	}
>   
> -	if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
> -		ret = BTRFS_ERROR_DEV_TGT_REPLACE;
> -		goto out;
> -	}
> +	if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
> +		return BTRFS_ERROR_DEV_TGT_REPLACE;
>   
>   	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
> -	    fs_info->fs_devices->rw_devices == 1) {
> -		ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
> -		goto out;
> -	}
> +	    fs_info->fs_devices->rw_devices == 1)
> +		return BTRFS_ERROR_DEV_ONLY_WRITABLE;
>   
>   	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
>   		mutex_lock(&fs_info->chunk_mutex);
> @@ -2139,14 +2121,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>   	if (ret)
>   		goto error_undo;
>   
> -	/*
> -	 * TODO: the superblock still includes this device in its num_devices
> -	 * counter although write_all_supers() is not locked out. This
> -	 * could give a filesystem state which requires a degraded mount.
> -	 */
> -	ret = btrfs_rm_dev_item(device);
> -	if (ret)
> +	trans = btrfs_start_transaction(fs_info->chunk_root, 0);
> +	if (IS_ERR(trans)) {
> +		ret = PTR_ERR(trans);
>   		goto error_undo;
> +	}
> +
> +	ret = btrfs_rm_dev_item(trans, device);
> +	if (ret) {
> +		/* Any error in dev item removal is critical */
> +		btrfs_crit(fs_info,
> +			   "failed to remove device item for devid %llu: %d",
> +			   device->devid, ret);
> +		btrfs_abort_transaction(trans, ret);
> +		btrfs_end_transaction(trans);
> +		return ret;

  Missed error_undo part of the undo here.

Thanks, Anand

> +	}
>   
>   	clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
>   	btrfs_scrub_cancel_dev(device);
> @@ -2229,7 +2219,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>   		free_fs_devices(cur_devices);
>   	}
>   
> -out:
> +	ret = btrfs_commit_transaction(trans);
> +
>   	return ret;
>   
>   error_undo:
> @@ -2240,7 +2231,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>   		device->fs_devices->rw_devices++;
>   		mutex_unlock(&fs_info->chunk_mutex);
>   	}
> -	goto out;
> +	return ret;
>   }
>   
>   void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] btrfs: make device item removal and super block num devices update happen in the same transaction
  2022-03-09  2:12 ` Anand Jain
@ 2022-03-09  2:58   ` Qu Wenruo
  2022-03-09  7:31     ` Anand Jain
  0 siblings, 1 reply; 5+ messages in thread
From: Qu Wenruo @ 2022-03-09  2:58 UTC (permalink / raw)
  To: Anand Jain, Qu Wenruo, linux-btrfs; +Cc: Luca Béla Palkovics



On 2022/3/9 10:12, Anand Jain wrote:
> On 08/03/2022 13:36, Qu Wenruo wrote:
>> [BUG]
>> There is a report that a btrfs has a bad super block num devices.
>>
>> This makes btrfs to reject the fs completely.
>>
>>    BTRFS error (device sdd3): super_num_devices 3 mismatch with 
>> num_devices 2 found here
>>    BTRFS error (device sdd3): failed to read chunk tree: -22
>>    BTRFS error (device sdd3): open_ctree failed
>>
>> [CAUSE]
>> During btrfs device removal, chunk tree and super block num devs are
>> updated in two different transactions:
>>
>>    btrfs_rm_device()
>>    |- btrfs_rm_dev_item(device)
>>    |  |- trans = btrfs_start_transaction()
>>    |  |  Now we got transaction X
>>    |  |
>>    |  |- btrfs_del_item()
>>    |  |  Now device item is removed from chunk tree
>>    |  |
>>    |  |- btrfs_commit_transaction()
>>    |     Transaction X got committed, super num devs untouched,
>>    |     but device item removed from chunk tree.
>>    |     (AKA, super num devs is already incorrect)
>>    |
>>    |- cur_devices->num_devices--;
>>    |- cur_devices->total_devices--;
>>    |- btrfs_set_super_num_devices()
>>       All those operations are not in transaction X, thus it will
>>       only be written back to disk in next transaction.
>>
>> So after the transaction X in btrfs_rm_dev_item() committed, but before
>> transaction X+1 (which can be minutes away), a power loss happen, then
>> we got the super num mismatch.
>>
>> [FIX]
>> Instead of starting and committing a transaction inside
>> btrfs_rm_dev_item(), start a transaction in side btrfs_rm_device() and
>> pass it to btrfs_rm_dev_item().
>>
>> And only commit the transaction after everything is done.
>>  > Reported-by: Luca Béla Palkovics <luca.bela.palkovics@gmail.com>
>> Link: 
>> https://lore.kernel.org/linux-btrfs/CA+8xDSpvdm_U0QLBAnrH=zqDq_cWCOH5TiV46CKmp3igr44okQ@mail.gmail.com/ 
>>
>> Signed-off-by: Qu Wenruo <wqu@suse.com>
>> ---
>>   fs/btrfs/volumes.c | 65 ++++++++++++++++++++--------------------------
>>   1 file changed, 28 insertions(+), 37 deletions(-)
>>
>> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
>> index 57a754b33f10..6115c302f4ae 100644
>> --- a/fs/btrfs/volumes.c
>> +++ b/fs/btrfs/volumes.c
>> @@ -1896,23 +1896,18 @@ static void update_dev_time(const char 
>> *device_path)
>>       path_put(&path);
>>   }
>> -static int btrfs_rm_dev_item(struct btrfs_device *device)
>> +static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans,
>> +                 struct btrfs_device *device)
>>   {
>>       struct btrfs_root *root = device->fs_info->chunk_root;
>>       int ret;
>>       struct btrfs_path *path;
>>       struct btrfs_key key;
>> -    struct btrfs_trans_handle *trans;
>>       path = btrfs_alloc_path();
>>       if (!path)
>>           return -ENOMEM;
>> -    trans = btrfs_start_transaction(root, 0);
>> -    if (IS_ERR(trans)) {
>> -        btrfs_free_path(path);
>> -        return PTR_ERR(trans);
>> -    }
>>       key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
>>       key.type = BTRFS_DEV_ITEM_KEY;
>>       key.offset = device->devid;
>> @@ -1923,21 +1918,12 @@ static int btrfs_rm_dev_item(struct 
>> btrfs_device *device)
>>       if (ret) {
>>           if (ret > 0)
>>               ret = -ENOENT;
>> -        btrfs_abort_transaction(trans, ret);
>> -        btrfs_end_transaction(trans);
>>           goto out;
>>       }
>>       ret = btrfs_del_item(trans, root, path);
>> -    if (ret) {
>> -        btrfs_abort_transaction(trans, ret);
>> -        btrfs_end_transaction(trans);
>> -    }
>> -
>>   out:
>>       btrfs_free_path(path);
>> -    if (!ret)
>> -        ret = btrfs_commit_transaction(trans);
>>       return ret;
>>   }
>> @@ -2078,6 +2064,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>>               struct btrfs_dev_lookup_args *args,
>>               struct block_device **bdev, fmode_t *mode)
>>   {
>> +    struct btrfs_trans_handle *trans;
>>       struct btrfs_device *device;
>>       struct btrfs_fs_devices *cur_devices;
>>       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
>> @@ -2098,7 +2085,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>>       ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
>>       if (ret)
>> -        goto out;
>> +        return ret;
>>       device = btrfs_find_device(fs_info->fs_devices, args);
>>       if (!device) {
>> @@ -2106,27 +2093,22 @@ int btrfs_rm_device(struct btrfs_fs_info 
>> *fs_info,
>>               ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
>>           else
>>               ret = -ENOENT;
>> -        goto out;
>> +        return ret;
>>       }
>>       if (btrfs_pinned_by_swapfile(fs_info, device)) {
>>           btrfs_warn_in_rcu(fs_info,
>>             "cannot remove device %s (devid %llu) due to active 
>> swapfile",
>>                     rcu_str_deref(device->name), device->devid);
>> -        ret = -ETXTBSY;
>> -        goto out;
>> +        return -ETXTBSY;
>>       }
>> -    if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
>> -        ret = BTRFS_ERROR_DEV_TGT_REPLACE;
>> -        goto out;
>> -    }
>> +    if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
>> +        return BTRFS_ERROR_DEV_TGT_REPLACE;
>>       if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
>> -        fs_info->fs_devices->rw_devices == 1) {
>> -        ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
>> -        goto out;
>> -    }
>> +        fs_info->fs_devices->rw_devices == 1)
>> +        return BTRFS_ERROR_DEV_ONLY_WRITABLE;
>>       if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
>>           mutex_lock(&fs_info->chunk_mutex);
>> @@ -2139,14 +2121,22 @@ int btrfs_rm_device(struct btrfs_fs_info 
>> *fs_info,
>>       if (ret)
>>           goto error_undo;
>> -    /*
>> -     * TODO: the superblock still includes this device in its 
>> num_devices
>> -     * counter although write_all_supers() is not locked out. This
>> -     * could give a filesystem state which requires a degraded mount.
>> -     */
>> -    ret = btrfs_rm_dev_item(device);
>> -    if (ret)
>> +    trans = btrfs_start_transaction(fs_info->chunk_root, 0);
>> +    if (IS_ERR(trans)) {
>> +        ret = PTR_ERR(trans);
>>           goto error_undo;
>> +    }
>> +
>> +    ret = btrfs_rm_dev_item(trans, device);
>> +    if (ret) {
>> +        /* Any error in dev item removal is critical */
>> +        btrfs_crit(fs_info,
>> +               "failed to remove device item for devid %llu: %d",
>> +               device->devid, ret);
>> +        btrfs_abort_transaction(trans, ret);
>> +        btrfs_end_transaction(trans);
>> +        return ret;
> 
>   Missed error_undo part of the undo here.

Nope, that's exactly expected.

We abort transaction, thus nothing committed, no need to undo.

In fact, after the btrfs_rm_dev_item() call, there is no real way to 
rollback the delete.

Thanks,
Qu
> 
> Thanks, Anand
> 
>> +    }
>>       clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
>>       btrfs_scrub_cancel_dev(device);
>> @@ -2229,7 +2219,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>>           free_fs_devices(cur_devices);
>>       }
>> -out:
>> +    ret = btrfs_commit_transaction(trans);
>> +
>>       return ret;
>>   error_undo:
>> @@ -2240,7 +2231,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>>           device->fs_devices->rw_devices++;
>>           mutex_unlock(&fs_info->chunk_mutex);
>>       }
>> -    goto out;
>> +    return ret;
>>   }
>>   void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] btrfs: make device item removal and super block num devices update happen in the same transaction
  2022-03-09  2:58   ` Qu Wenruo
@ 2022-03-09  7:31     ` Anand Jain
  0 siblings, 0 replies; 5+ messages in thread
From: Anand Jain @ 2022-03-09  7:31 UTC (permalink / raw)
  To: Qu Wenruo, Qu Wenruo, linux-btrfs; +Cc: Luca Béla Palkovics



On 09/03/2022 10:58, Qu Wenruo wrote:
> 
> 
> On 2022/3/9 10:12, Anand Jain wrote:
>> On 08/03/2022 13:36, Qu Wenruo wrote:
>>> [BUG]
>>> There is a report that a btrfs has a bad super block num devices.
>>>
>>> This makes btrfs to reject the fs completely.
>>>
>>>    BTRFS error (device sdd3): super_num_devices 3 mismatch with 
>>> num_devices 2 found here
>>>    BTRFS error (device sdd3): failed to read chunk tree: -22
>>>    BTRFS error (device sdd3): open_ctree failed
>>>
>>> [CAUSE]
>>> During btrfs device removal, chunk tree and super block num devs are
>>> updated in two different transactions:
>>>
>>>    btrfs_rm_device()
>>>    |- btrfs_rm_dev_item(device)
>>>    |  |- trans = btrfs_start_transaction()
>>>    |  |  Now we got transaction X
>>>    |  |
>>>    |  |- btrfs_del_item()
>>>    |  |  Now device item is removed from chunk tree
>>>    |  |
>>>    |  |- btrfs_commit_transaction()
>>>    |     Transaction X got committed, super num devs untouched,
>>>    |     but device item removed from chunk tree.
>>>    |     (AKA, super num devs is already incorrect)
>>>    |
>>>    |- cur_devices->num_devices--;
>>>    |- cur_devices->total_devices--;
>>>    |- btrfs_set_super_num_devices()
>>>       All those operations are not in transaction X, thus it will
>>>       only be written back to disk in next transaction.
>>>
>>> So after the transaction X in btrfs_rm_dev_item() committed, but before
>>> transaction X+1 (which can be minutes away), a power loss happen, then
>>> we got the super num mismatch.
>>>
>>> [FIX]
>>> Instead of starting and committing a transaction inside
>>> btrfs_rm_dev_item(), start a transaction in side btrfs_rm_device() and
>>> pass it to btrfs_rm_dev_item().
>>>
>>> And only commit the transaction after everything is done.
>>>  > Reported-by: Luca Béla Palkovics <luca.bela.palkovics@gmail.com>
>>> Link: 
>>> https://lore.kernel.org/linux-btrfs/CA+8xDSpvdm_U0QLBAnrH=zqDq_cWCOH5TiV46CKmp3igr44okQ@mail.gmail.com/ 
>>>
>>> Signed-off-by: Qu Wenruo <wqu@suse.com>
>>> ---
>>>   fs/btrfs/volumes.c | 65 ++++++++++++++++++++--------------------------
>>>   1 file changed, 28 insertions(+), 37 deletions(-)
>>>
>>> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
>>> index 57a754b33f10..6115c302f4ae 100644
>>> --- a/fs/btrfs/volumes.c
>>> +++ b/fs/btrfs/volumes.c
>>> @@ -1896,23 +1896,18 @@ static void update_dev_time(const char 
>>> *device_path)
>>>       path_put(&path);
>>>   }
>>> -static int btrfs_rm_dev_item(struct btrfs_device *device)
>>> +static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans,
>>> +                 struct btrfs_device *device)
>>>   {
>>>       struct btrfs_root *root = device->fs_info->chunk_root;
>>>       int ret;
>>>       struct btrfs_path *path;
>>>       struct btrfs_key key;
>>> -    struct btrfs_trans_handle *trans;
>>>       path = btrfs_alloc_path();
>>>       if (!path)
>>>           return -ENOMEM;
>>> -    trans = btrfs_start_transaction(root, 0);
>>> -    if (IS_ERR(trans)) {
>>> -        btrfs_free_path(path);
>>> -        return PTR_ERR(trans);
>>> -    }
>>>       key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
>>>       key.type = BTRFS_DEV_ITEM_KEY;
>>>       key.offset = device->devid;
>>> @@ -1923,21 +1918,12 @@ static int btrfs_rm_dev_item(struct 
>>> btrfs_device *device)
>>>       if (ret) {
>>>           if (ret > 0)
>>>               ret = -ENOENT;
>>> -        btrfs_abort_transaction(trans, ret);
>>> -        btrfs_end_transaction(trans);
>>>           goto out;
>>>       }
>>>       ret = btrfs_del_item(trans, root, path);
>>> -    if (ret) {
>>> -        btrfs_abort_transaction(trans, ret);
>>> -        btrfs_end_transaction(trans);
>>> -    }
>>> -
>>>   out:
>>>       btrfs_free_path(path);
>>> -    if (!ret)
>>> -        ret = btrfs_commit_transaction(trans);
>>>       return ret;
>>>   }
>>> @@ -2078,6 +2064,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>>>               struct btrfs_dev_lookup_args *args,
>>>               struct block_device **bdev, fmode_t *mode)
>>>   {
>>> +    struct btrfs_trans_handle *trans;
>>>       struct btrfs_device *device;
>>>       struct btrfs_fs_devices *cur_devices;
>>>       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
>>> @@ -2098,7 +2085,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>>>       ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
>>>       if (ret)
>>> -        goto out;
>>> +        return ret;
>>>       device = btrfs_find_device(fs_info->fs_devices, args);
>>>       if (!device) {
>>> @@ -2106,27 +2093,22 @@ int btrfs_rm_device(struct btrfs_fs_info 
>>> *fs_info,
>>>               ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
>>>           else
>>>               ret = -ENOENT;
>>> -        goto out;
>>> +        return ret;
>>>       }
>>>       if (btrfs_pinned_by_swapfile(fs_info, device)) {
>>>           btrfs_warn_in_rcu(fs_info,
>>>             "cannot remove device %s (devid %llu) due to active 
>>> swapfile",
>>>                     rcu_str_deref(device->name), device->devid);
>>> -        ret = -ETXTBSY;
>>> -        goto out;
>>> +        return -ETXTBSY;
>>>       }
>>> -    if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
>>> -        ret = BTRFS_ERROR_DEV_TGT_REPLACE;
>>> -        goto out;
>>> -    }
>>> +    if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
>>> +        return BTRFS_ERROR_DEV_TGT_REPLACE;
>>>       if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
>>> -        fs_info->fs_devices->rw_devices == 1) {
>>> -        ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
>>> -        goto out;
>>> -    }
>>> +        fs_info->fs_devices->rw_devices == 1)
>>> +        return BTRFS_ERROR_DEV_ONLY_WRITABLE;
>>>       if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
>>>           mutex_lock(&fs_info->chunk_mutex);
>>> @@ -2139,14 +2121,22 @@ int btrfs_rm_device(struct btrfs_fs_info 
>>> *fs_info,
>>>       if (ret)
>>>           goto error_undo;
>>> -    /*
>>> -     * TODO: the superblock still includes this device in its 
>>> num_devices
>>> -     * counter although write_all_supers() is not locked out. This
>>> -     * could give a filesystem state which requires a degraded mount.
>>> -     */
>>> -    ret = btrfs_rm_dev_item(device);
>>> -    if (ret)
>>> +    trans = btrfs_start_transaction(fs_info->chunk_root, 0);
>>> +    if (IS_ERR(trans)) {
>>> +        ret = PTR_ERR(trans);
>>>           goto error_undo;
>>> +    }
>>> +
>>> +    ret = btrfs_rm_dev_item(trans, device);
>>> +    if (ret) {
>>> +        /* Any error in dev item removal is critical */
>>> +        btrfs_crit(fs_info,
>>> +               "failed to remove device item for devid %llu: %d",
>>> +               device->devid, ret);
>>> +        btrfs_abort_transaction(trans, ret);
>>> +        btrfs_end_transaction(trans);
>>> +        return ret;
>>
>>   Missed error_undo part of the undo here.
> 
> Nope, that's exactly expected.
> 


> We abort transaction, thus nothing committed, no need to undo.

My concern is device->fs_devices->rw_devices
   is not equal to
device->fs_devices->num_devices
  and fs is ro at this stage.

I am a bit nervous if our close devices would be ok.
But it looks ok.
Anyway, after the unmount and mount recycle the
rw_devices == num_devices again. But a device shall have zero 
disk_total_bytes. Which is fine. The user can try rm device again.


Reviewed-by: Anand Jain <anand.jain@oracle.com>



Thanks, Anand




> In fact, after the btrfs_rm_dev_item() call, there is no real way to 
> rollback the delete.





> Thanks,
> Qu
>>
>> Thanks, Anand
>>
>>> +    }
>>>       clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
>>>       btrfs_scrub_cancel_dev(device);
>>> @@ -2229,7 +2219,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>>>           free_fs_devices(cur_devices);
>>>       }
>>> -out:
>>> +    ret = btrfs_commit_transaction(trans);
>>> +
>>>       return ret;
>>>   error_undo:
>>> @@ -2240,7 +2231,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
>>>           device->fs_devices->rw_devices++;
>>>           mutex_unlock(&fs_info->chunk_mutex);
>>>       }
>>> -    goto out;
>>> +    return ret;
>>>   }
>>>   void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
>>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] btrfs: make device item removal and super block num devices update happen in the same transaction
  2022-03-08  5:36 [PATCH] btrfs: make device item removal and super block num devices update happen in the same transaction Qu Wenruo
  2022-03-09  2:12 ` Anand Jain
@ 2022-03-14 20:03 ` David Sterba
  1 sibling, 0 replies; 5+ messages in thread
From: David Sterba @ 2022-03-14 20:03 UTC (permalink / raw)
  To: Qu Wenruo; +Cc: linux-btrfs, Luca Béla Palkovics

On Tue, Mar 08, 2022 at 01:36:38PM +0800, Qu Wenruo wrote:
> [BUG]
> There is a report that a btrfs has a bad super block num devices.

People have reported this on IRC in the past too. In some cases it was a
report with two devices expected but one found and "I have never added
nor removed a device on this filesystem", so it was a bit mysterious.
The split update over the transaction is otherwise a clear cause.
Added to misc-next, thanks.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-03-14 20:07 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-08  5:36 [PATCH] btrfs: make device item removal and super block num devices update happen in the same transaction Qu Wenruo
2022-03-09  2:12 ` Anand Jain
2022-03-09  2:58   ` Qu Wenruo
2022-03-09  7:31     ` Anand Jain
2022-03-14 20:03 ` David Sterba

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.