linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC] btrfs/163: replace sprout instead of seed
@ 2020-08-21 13:15 ` Anand Jain
  2020-08-21 13:15   ` [PATCH 1/2] btrfs: initialize sysfs devid and device link for seed device Anand Jain
  2020-08-30 14:41   ` [PATCH] fstests: btrfs/163: replace sprout instead of seed Anand Jain
  0 siblings, 2 replies; 38+ messages in thread
From: Anand Jain @ 2020-08-21 13:15 UTC (permalink / raw)
  To: linux-btrfs; +Cc: josef

Make this test case inline with the kernel patch [1] changes
[1]
   btrfs: fail replace of seed device

So use the sprout device as the replace target instead of the seed device.
As because the replace seed is not supported any more.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
I will send this to fstests ML once RFC is cleared.

 tests/btrfs/163     | 21 ++++++++++++++++-----
 tests/btrfs/163.out |  5 ++++-
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/tests/btrfs/163 b/tests/btrfs/163
index 24c725afb6b9..354d88502d47 100755
--- a/tests/btrfs/163
+++ b/tests/btrfs/163
@@ -4,11 +4,15 @@
 #
 # FS QA Test 163
 #
-# Test case to verify that a seed device can be replaced
+# Test case to verify that a sprouted device can be replaced
 #  Create a seed device
 #  Create a sprout device
 #  Remount RW
-#  Run device replace on the seed device
+#  Run device replace on the sprout device
+#
+# Depends on the kernel patch
+#   btrfs: fail replace of seed device
+
 seq=`basename $0`
 seqres=$RESULT_DIR/$seq
 echo "QA output created by $seq"
@@ -39,6 +43,7 @@ _supported_fs btrfs
 _supported_os Linux
 _require_command "$BTRFS_TUNE_PROG" btrfstune
 _require_scratch_dev_pool 3
+_require_btrfs_forget_or_module_loadable
 
 _scratch_dev_pool_get 3
 
@@ -52,7 +57,7 @@ create_seed()
 	run_check _mount $dev_seed $SCRATCH_MNT
 	$XFS_IO_PROG -f -d -c "pwrite -S 0xab 0 4M" $SCRATCH_MNT/foobar >\
 		/dev/null
-	echo -- gloden --
+	echo -- golden --
 	od -x $SCRATCH_MNT/foobar
 	_run_btrfs_util_prog filesystem show -m $SCRATCH_MNT
 	_scratch_unmount
@@ -64,22 +69,28 @@ add_sprout()
 {
 	_run_btrfs_util_prog device add -f $dev_sprout $SCRATCH_MNT
 	_run_btrfs_util_prog filesystem show -m $SCRATCH_MNT
+	_mount -o remount,rw $dev_sprout $SCRATCH_MNT
+	$XFS_IO_PROG -f -d -c "pwrite -S 0xcd 0 4M" $SCRATCH_MNT/foobar2 >\
+		/dev/null
 }
 
 replace_seed()
 {
-	_run_btrfs_util_prog replace start -fB $dev_seed $dev_replace_tgt $SCRATCH_MNT
+	_run_btrfs_util_prog replace start -fB $dev_sprout $dev_replace_tgt $SCRATCH_MNT
 	_run_btrfs_util_prog filesystem show -m $SCRATCH_MNT
 	_scratch_unmount
-	run_check _mount $dev_replace_tgt $SCRATCH_MNT
+	_btrfs_forget_or_module_reload
+	run_check _mount -o device=$dev_seed $dev_replace_tgt $SCRATCH_MNT
 	echo -- sprout --
 	od -x $SCRATCH_MNT/foobar
+	od -x $SCRATCH_MNT/foobar2
 	_scratch_unmount
 
 }
 
 seed_is_mountable()
 {
+	_btrfs_forget_or_module_reload
 	run_check _mount $dev_seed $SCRATCH_MNT
 	_run_btrfs_util_prog filesystem show -m $SCRATCH_MNT
 	_scratch_unmount
diff --git a/tests/btrfs/163.out b/tests/btrfs/163.out
index 91f6f5b6f48a..351ef7b040b2 100644
--- a/tests/btrfs/163.out
+++ b/tests/btrfs/163.out
@@ -1,5 +1,5 @@
 QA output created by 163
--- gloden --
+-- golden --
 0000000 abab abab abab abab abab abab abab abab
 *
 20000000
@@ -7,3 +7,6 @@ QA output created by 163
 0000000 abab abab abab abab abab abab abab abab
 *
 20000000
+0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+*
+20000000
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 1/2] btrfs: initialize sysfs devid and device link for seed device
  2020-08-21 13:15 ` [PATCH RFC] btrfs/163: replace sprout instead of seed Anand Jain
@ 2020-08-21 13:15   ` Anand Jain
  2020-08-21 13:15     ` [PATCH RFC 2/2] btrfs: fix replace of " Anand Jain
                       ` (2 more replies)
  2020-08-30 14:41   ` [PATCH] fstests: btrfs/163: replace sprout instead of seed Anand Jain
  1 sibling, 3 replies; 38+ messages in thread
From: Anand Jain @ 2020-08-21 13:15 UTC (permalink / raw)
  To: linux-btrfs; +Cc: josef

The following test case leads to null kobject-being-freed error.

 mount seed /mnt
 add sprout to /mnt
 umount /mnt
 mount sprout to /mnt
 delete seed

 kobject: '(null)' (00000000dd2b87e4): is not initialized, yet kobject_put() is being called.
 WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350
 RIP: 0010:kobject_put+0x80/0x350
 ::
 Call Trace:
 btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs]
 btrfs_rm_device.cold+0xa8/0x298 [btrfs]
 btrfs_ioctl+0x206c/0x22a0 [btrfs]
 ksys_ioctl+0xe2/0x140
 __x64_sys_ioctl+0x1e/0x29
 do_syscall_64+0x96/0x150
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7f4047c6288b
 ::

This is because, at the end of the seed device-delete, we try to remove
the seed's devid sysfs entry. But for the seed devices under the sprout
fs, we don't initialize the devid kobject yet. So this patch initializes
the seed device devid kobject and the device link in the sysfs. This takes
care of the Warning.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/sysfs.c | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 88fd4ce937b8..85403fc3d5c7 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1154,20 +1154,20 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
 /* when one_device is NULL, it removes all device links */
 
 int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
-		struct btrfs_device *one_device)
+				   struct btrfs_device *one_device)
 {
 	struct hd_struct *disk;
 	struct kobject *disk_kobj;
+	struct kobject *devices_kobj = fs_devices->devices_kobj;
 
-	if (!fs_devices->devices_kobj)
+	if (!devices_kobj)
 		return -EINVAL;
 
 	if (one_device) {
 		if (one_device->bdev) {
 			disk = one_device->bdev->bd_part;
 			disk_kobj = &part_to_dev(disk)->kobj;
-			sysfs_remove_link(fs_devices->devices_kobj,
-					  disk_kobj->name);
+			sysfs_remove_link(devices_kobj, disk_kobj->name);
 		}
 
 		kobject_del(&one_device->devid_kobj);
@@ -1178,19 +1178,23 @@ int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
 		return 0;
 	}
 
+again:
 	list_for_each_entry(one_device, &fs_devices->devices, dev_list) {
 
 		if (one_device->bdev) {
 			disk = one_device->bdev->bd_part;
 			disk_kobj = &part_to_dev(disk)->kobj;
-			sysfs_remove_link(fs_devices->devices_kobj,
-					  disk_kobj->name);
+			sysfs_remove_link(devices_kobj, disk_kobj->name);
 		}
 		kobject_del(&one_device->devid_kobj);
 		kobject_put(&one_device->devid_kobj);
 
 		wait_for_completion(&one_device->kobj_unregister);
 	}
+	while (fs_devices->seed) {
+		fs_devices = fs_devices->seed;
+		goto again;
+	}
 
 	return 0;
 }
@@ -1279,8 +1283,11 @@ int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
 	int error = 0;
 	struct btrfs_device *dev;
 	unsigned int nofs_flag;
+	struct kobject *devices_kobj = fs_devices->devices_kobj;
+	struct kobject *devinfo_kobj = fs_devices->devinfo_kobj;
 
 	nofs_flag = memalloc_nofs_save();
+again:
 	list_for_each_entry(dev, &fs_devices->devices, dev_list) {
 
 		if (one_device && one_device != dev)
@@ -1293,21 +1300,24 @@ int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
 			disk = dev->bdev->bd_part;
 			disk_kobj = &part_to_dev(disk)->kobj;
 
-			error = sysfs_create_link(fs_devices->devices_kobj,
-						  disk_kobj, disk_kobj->name);
+			error = sysfs_create_link(devices_kobj, disk_kobj,
+						  disk_kobj->name);
 			if (error)
 				break;
 		}
 
 		init_completion(&dev->kobj_unregister);
 		error = kobject_init_and_add(&dev->devid_kobj, &devid_ktype,
-					     fs_devices->devinfo_kobj, "%llu",
-					     dev->devid);
+					     devinfo_kobj, "%llu", dev->devid);
 		if (error) {
 			kobject_put(&dev->devid_kobj);
 			break;
 		}
 	}
+	while(fs_devices->seed) {
+		fs_devices = fs_devices->seed;
+		goto again;
+	}
 	memalloc_nofs_restore(nofs_flag);
 
 	return error;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH RFC 2/2] btrfs: fix replace of seed device
  2020-08-21 13:15   ` [PATCH 1/2] btrfs: initialize sysfs devid and device link for seed device Anand Jain
@ 2020-08-21 13:15     ` Anand Jain
  2020-08-21 14:38       ` Josef Bacik
  2020-08-21 14:36     ` [PATCH 1/2] btrfs: initialize sysfs devid and device link for " Josef Bacik
  2020-08-29 11:44     ` Anand Jain
  2 siblings, 1 reply; 38+ messages in thread
From: Anand Jain @ 2020-08-21 13:15 UTC (permalink / raw)
  To: linux-btrfs; +Cc: josef

If you replace a seed device in a sprouted fs, it appears to have
successfully replaced the seed device, but if you look closely, it didn't.

Here is an example.

mkfs.btrfs -fq /dev/sda
btrfstune -S1 /dev/sda
mount /dev/sda /btrfs
btrfs dev add /dev/sdb /btrfs
umount /btrfs; btrfs dev scan --forget
mount -o device=/dev/sda /dev/sdb /btrfs
btrfs rep start -f /dev/sda /dev/sdc /btrfs; echo $?
0

  BTRFS info (device sdb): dev_replace from /dev/sda (devid 1) to /dev/sdc started
  BTRFS info (device sdb): dev_replace from /dev/sda (devid 1) to /dev/sdc finished

btrfs fi show
Label: none  uuid: ab2c88b7-be81-4a7e-9849-c3666e7f9f4f
	Total devices 2 FS bytes used 256.00KiB
	devid    1 size 3.00GiB used 520.00MiB path /dev/sdc
	devid    2 size 3.00GiB used 896.00MiB path /dev/sdb

Label: none  uuid: 10bd3202-0415-43af-96a8-d5409f310a7e
	Total devices 1 FS bytes used 128.00KiB
	devid    1 size 3.00GiB used 536.00MiB path /dev/sda

So as per the replace start command and kernel log replace was successful.

Now let's try to clean mount.

umount /btrfs;  btrfs dev scan --forget

mount -o device=/dev/sdc /dev/sdb /btrfs
mount: /btrfs: wrong fs type, bad option, bad superblock on /dev/sdb, missing codepage or helper program, or other error.

[  636.157517] BTRFS error (device sdc): failed to read chunk tree: -2
[  636.180177] BTRFS error (device sdc): open_ctree failed

That's because per dev items it is still looking for the original seed
device.

btrfs in dump-tree -d /dev/sdb

	item 0 key (DEV_ITEMS DEV_ITEM 1) itemoff 16185 itemsize 98
		devid 1 total_bytes 3221225472 bytes_used 545259520
		io_align 4096 io_width 4096 sector_size 4096 type 0
		generation 6 start_offset 0 dev_group 0
		seek_speed 0 bandwidth 0
		uuid 59368f50-9af2-4b17-91da-8a783cc418d4  <--- seed uuid
		fsid 10bd3202-0415-43af-96a8-d5409f310a7e  <--- seed fsid
	item 1 key (DEV_ITEMS DEV_ITEM 2) itemoff 16087 itemsize 98
		devid 2 total_bytes 3221225472 bytes_used 939524096
		io_align 4096 io_width 4096 sector_size 4096 type 0
		generation 0 start_offset 0 dev_group 0
		seek_speed 0 bandwidth 0
		uuid 56a0a6bc-4630-4998-8daf-3c3030c4256a  <- sprout uuid
		fsid ab2c88b7-be81-4a7e-9849-c3666e7f9f4f <- sprout fsid

But the replaced target has the following uuid+fsid in its superblock
which doesn't match with the expected uuid+fsid in its devitem.

btrfs in dump-super /dev/sdc | egrep '^generation|dev_item.uuid|dev_item.fsid|devid'
generation	20
dev_item.uuid	59368f50-9af2-4b17-91da-8a783cc418d4
dev_item.fsid	ab2c88b7-be81-4a7e-9849-c3666e7f9f4f [match]
dev_item.devid	1

So if you provide the original seed device the mount shall be successful.
Which so long happening in the test case btrfs/163.

btrfs dev scan --forget
mount -o device=/dev/sda /dev/sdb /btrfs

Fix in this patch:
Make it as you can't replace a seed device, you can only add a new device
and then delete the seed device. If replace is attempted then returns -EINVAL.
As in the below changes.

Another possible fix:
If we want to keep the ability to replace for seed-device, then we could
update the fsid of the replace-target blocks. And after replacement, you
have seed device but with sprout fsid. But then I don't know what is the
point and if there is any such use case.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---

To test this patch you need the patch 1/2.
Tested with fstests group volume

 fs/btrfs/dev-replace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 1d09bd3101e2..1aff10337b90 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -225,7 +225,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 	int ret = 0;
 
 	*device_out = NULL;
-	if (fs_info->fs_devices->seeding) {
+	if (srcdev->fs_devices->seeding) {
 		btrfs_err(fs_info, "the filesystem is a seed filesystem!");
 		return -EINVAL;
 	}
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [PATCH 1/2] btrfs: initialize sysfs devid and device link for seed device
  2020-08-21 13:15   ` [PATCH 1/2] btrfs: initialize sysfs devid and device link for seed device Anand Jain
  2020-08-21 13:15     ` [PATCH RFC 2/2] btrfs: fix replace of " Anand Jain
@ 2020-08-21 14:36     ` Josef Bacik
  2020-08-23 13:05       ` Anand Jain
  2020-08-29 11:44     ` Anand Jain
  2 siblings, 1 reply; 38+ messages in thread
From: Josef Bacik @ 2020-08-21 14:36 UTC (permalink / raw)
  To: Anand Jain, linux-btrfs

On 8/21/20 9:15 AM, Anand Jain wrote:
> The following test case leads to null kobject-being-freed error.
> 
>   mount seed /mnt
>   add sprout to /mnt
>   umount /mnt
>   mount sprout to /mnt
>   delete seed
> 
>   kobject: '(null)' (00000000dd2b87e4): is not initialized, yet kobject_put() is being called.
>   WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350
>   RIP: 0010:kobject_put+0x80/0x350
>   ::
>   Call Trace:
>   btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs]
>   btrfs_rm_device.cold+0xa8/0x298 [btrfs]
>   btrfs_ioctl+0x206c/0x22a0 [btrfs]
>   ksys_ioctl+0xe2/0x140
>   __x64_sys_ioctl+0x1e/0x29
>   do_syscall_64+0x96/0x150
>   entry_SYSCALL_64_after_hwframe+0x44/0xa9
>   RIP: 0033:0x7f4047c6288b
>   ::
> 
> This is because, at the end of the seed device-delete, we try to remove
> the seed's devid sysfs entry. But for the seed devices under the sprout
> fs, we don't initialize the devid kobject yet. So this patch initializes
> the seed device devid kobject and the device link in the sysfs. This takes
> care of the Warning.
> 
> Signed-off-by: Anand Jain <anand.jain@oracle.com>
> ---
>   fs/btrfs/sysfs.c | 30 ++++++++++++++++++++----------
>   1 file changed, 20 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
> index 88fd4ce937b8..85403fc3d5c7 100644
> --- a/fs/btrfs/sysfs.c
> +++ b/fs/btrfs/sysfs.c
> @@ -1154,20 +1154,20 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
>   /* when one_device is NULL, it removes all device links */
>   
>   int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
> -		struct btrfs_device *one_device)
> +				   struct btrfs_device *one_device)
>   {
>   	struct hd_struct *disk;
>   	struct kobject *disk_kobj;
> +	struct kobject *devices_kobj = fs_devices->devices_kobj;
>   
> -	if (!fs_devices->devices_kobj)
> +	if (!devices_kobj)
>   		return -EINVAL;
>   
>   	if (one_device) {
>   		if (one_device->bdev) {
>   			disk = one_device->bdev->bd_part;
>   			disk_kobj = &part_to_dev(disk)->kobj;
> -			sysfs_remove_link(fs_devices->devices_kobj,
> -					  disk_kobj->name);
> +			sysfs_remove_link(devices_kobj, disk_kobj->name);
>   		}
>   
>   		kobject_del(&one_device->devid_kobj);
> @@ -1178,19 +1178,23 @@ int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
>   		return 0;
>   	}
>   
> +again:
>   	list_for_each_entry(one_device, &fs_devices->devices, dev_list) {
>   
>   		if (one_device->bdev) {
>   			disk = one_device->bdev->bd_part;
>   			disk_kobj = &part_to_dev(disk)->kobj;
> -			sysfs_remove_link(fs_devices->devices_kobj,
> -					  disk_kobj->name);
> +			sysfs_remove_link(devices_kobj, disk_kobj->name);
>   		}
>   		kobject_del(&one_device->devid_kobj);
>   		kobject_put(&one_device->devid_kobj);
>   
>   		wait_for_completion(&one_device->kobj_unregister);
>   	}
> +	while (fs_devices->seed) {
> +		fs_devices = fs_devices->seed;
> +		goto again;
> +	}
>   
>   	return 0;
>   }
> @@ -1279,8 +1283,11 @@ int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
>   	int error = 0;
>   	struct btrfs_device *dev;
>   	unsigned int nofs_flag;
> +	struct kobject *devices_kobj = fs_devices->devices_kobj;
> +	struct kobject *devinfo_kobj = fs_devices->devinfo_kobj;
>   
>   	nofs_flag = memalloc_nofs_save();
> +again:
>   	list_for_each_entry(dev, &fs_devices->devices, dev_list) {
>   
>   		if (one_device && one_device != dev)
> @@ -1293,21 +1300,24 @@ int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
>   			disk = dev->bdev->bd_part;
>   			disk_kobj = &part_to_dev(disk)->kobj;
>   
> -			error = sysfs_create_link(fs_devices->devices_kobj,
> -						  disk_kobj, disk_kobj->name);
> +			error = sysfs_create_link(devices_kobj, disk_kobj,
> +						  disk_kobj->name);
>   			if (error)
>   				break;
>   		}
>   
>   		init_completion(&dev->kobj_unregister);
>   		error = kobject_init_and_add(&dev->devid_kobj, &devid_ktype,
> -					     fs_devices->devinfo_kobj, "%llu",
> -					     dev->devid);
> +					     devinfo_kobj, "%llu", dev->devid);
>   		if (error) {
>   			kobject_put(&dev->devid_kobj);
>   			break;
>   		}
>   	}
> +	while(fs_devices->seed) {
> +		fs_devices = fs_devices->seed;
> +		goto again;
> +	}
>   	memalloc_nofs_restore(nofs_flag);
>   
>   	return error;
> 

So now we're using the main fs_devices->devices_kobj, which is the main 
fs_devices with fs_devices->seed being the seed fs_devices.  This is 
fine, except when we actually mount a seed device, and in that case we 
have fs_devices as the seed devices being used, and then if we add a 
device we'll actually swap in the new fs_devices for the main 
fs_devices, and we have the seed devices with the actual devices_kobj 
that we used set in fs_devices->seed, and thus we'll leak the sysfs 
objects for the seed devices.  Thanks,

Josef

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC 2/2] btrfs: fix replace of seed device
  2020-08-21 13:15     ` [PATCH RFC 2/2] btrfs: fix replace of " Anand Jain
@ 2020-08-21 14:38       ` Josef Bacik
  2020-08-23 15:05         ` Anand Jain
  0 siblings, 1 reply; 38+ messages in thread
From: Josef Bacik @ 2020-08-21 14:38 UTC (permalink / raw)
  To: Anand Jain, linux-btrfs; +Cc: boris

On 8/21/20 9:15 AM, Anand Jain wrote:
> If you replace a seed device in a sprouted fs, it appears to have
> successfully replaced the seed device, but if you look closely, it didn't.
> 
> Here is an example.
> 
> mkfs.btrfs -fq /dev/sda
> btrfstune -S1 /dev/sda
> mount /dev/sda /btrfs
> btrfs dev add /dev/sdb /btrfs
> umount /btrfs; btrfs dev scan --forget
> mount -o device=/dev/sda /dev/sdb /btrfs
> btrfs rep start -f /dev/sda /dev/sdc /btrfs; echo $?
> 0
> 
>    BTRFS info (device sdb): dev_replace from /dev/sda (devid 1) to /dev/sdc started
>    BTRFS info (device sdb): dev_replace from /dev/sda (devid 1) to /dev/sdc finished
> 
> btrfs fi show
> Label: none  uuid: ab2c88b7-be81-4a7e-9849-c3666e7f9f4f
> 	Total devices 2 FS bytes used 256.00KiB
> 	devid    1 size 3.00GiB used 520.00MiB path /dev/sdc
> 	devid    2 size 3.00GiB used 896.00MiB path /dev/sdb
> 
> Label: none  uuid: 10bd3202-0415-43af-96a8-d5409f310a7e
> 	Total devices 1 FS bytes used 128.00KiB
> 	devid    1 size 3.00GiB used 536.00MiB path /dev/sda
> 
> So as per the replace start command and kernel log replace was successful.
> 
> Now let's try to clean mount.
> 
> umount /btrfs;  btrfs dev scan --forget
> 
> mount -o device=/dev/sdc /dev/sdb /btrfs
> mount: /btrfs: wrong fs type, bad option, bad superblock on /dev/sdb, missing codepage or helper program, or other error.
> 
> [  636.157517] BTRFS error (device sdc): failed to read chunk tree: -2
> [  636.180177] BTRFS error (device sdc): open_ctree failed
> 
> That's because per dev items it is still looking for the original seed
> device.
> 
> btrfs in dump-tree -d /dev/sdb
> 
> 	item 0 key (DEV_ITEMS DEV_ITEM 1) itemoff 16185 itemsize 98
> 		devid 1 total_bytes 3221225472 bytes_used 545259520
> 		io_align 4096 io_width 4096 sector_size 4096 type 0
> 		generation 6 start_offset 0 dev_group 0
> 		seek_speed 0 bandwidth 0
> 		uuid 59368f50-9af2-4b17-91da-8a783cc418d4  <--- seed uuid
> 		fsid 10bd3202-0415-43af-96a8-d5409f310a7e  <--- seed fsid
> 	item 1 key (DEV_ITEMS DEV_ITEM 2) itemoff 16087 itemsize 98
> 		devid 2 total_bytes 3221225472 bytes_used 939524096
> 		io_align 4096 io_width 4096 sector_size 4096 type 0
> 		generation 0 start_offset 0 dev_group 0
> 		seek_speed 0 bandwidth 0
> 		uuid 56a0a6bc-4630-4998-8daf-3c3030c4256a  <- sprout uuid
> 		fsid ab2c88b7-be81-4a7e-9849-c3666e7f9f4f <- sprout fsid
> 
> But the replaced target has the following uuid+fsid in its superblock
> which doesn't match with the expected uuid+fsid in its devitem.
> 
> btrfs in dump-super /dev/sdc | egrep '^generation|dev_item.uuid|dev_item.fsid|devid'
> generation	20
> dev_item.uuid	59368f50-9af2-4b17-91da-8a783cc418d4
> dev_item.fsid	ab2c88b7-be81-4a7e-9849-c3666e7f9f4f [match]
> dev_item.devid	1
> 
> So if you provide the original seed device the mount shall be successful.
> Which so long happening in the test case btrfs/163.
> 
> btrfs dev scan --forget
> mount -o device=/dev/sda /dev/sdb /btrfs
> 
> Fix in this patch:
> Make it as you can't replace a seed device, you can only add a new device
> and then delete the seed device. If replace is attempted then returns -EINVAL.
> As in the below changes.
> 
> Another possible fix:
> If we want to keep the ability to replace for seed-device, then we could
> update the fsid of the replace-target blocks. And after replacement, you
> have seed device but with sprout fsid. But then I don't know what is the
> point and if there is any such use case.
> 
> Signed-off-by: Anand Jain <anand.jain@oracle.com>
> ---

This is something Boris dug into until I told him to drop it.  I _think_ 
I'm ok with this, but really what I'd rather do is restripe the whole fs 
with the new UUID for this case.  Where did that redo the UUID work go? 
Thanks,

Josef

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 1/2] btrfs: initialize sysfs devid and device link for seed device
  2020-08-21 14:36     ` [PATCH 1/2] btrfs: initialize sysfs devid and device link for " Josef Bacik
@ 2020-08-23 13:05       ` Anand Jain
  0 siblings, 0 replies; 38+ messages in thread
From: Anand Jain @ 2020-08-23 13:05 UTC (permalink / raw)
  To: Josef Bacik, linux-btrfs


> 
> So now we're using the main fs_devices->devices_kobj, which is the main 
> fs_devices with fs_devices->seed being the seed fs_devices.  This is 
> fine, except when we actually mount a seed device, and in that case we 
> have fs_devices as the seed devices being used, and then if we add a 
> device we'll actually swap in the new fs_devices for the main 
> fs_devices, and we have the seed devices with the actual devices_kobj 
> that we used set in fs_devices->seed, and thus we'll leak the sysfs 
> objects for the seed devices.  Thanks,

Do you mean leaking the devinfo_kobj instead of devices_kobj? If so,
then yes, and this patch fixed it as well (I just found out).

Otherwise, no, there isn't devices_kobj leak.  We make sure only mounted
fsid has the devices_kobj initialized. So during sprouting- the
devices_kobj remains with the fs_info->fs_devices, as we move the seed
devices below the seed_devices.

static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
::
  list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
  synchronize_rcu);
  list_for_each_entry(device, &seed_devices->devices, dev_list)
  device->fs_devices = seed_devices;

And during unmount, we clean up the dev links and devices_kobj.

close_ctree()
   btrfs_sysfs_remove_mounted()
   btrfs_sysfs_remove_fsid()


Anand


> Josef



^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH RFC 2/2] btrfs: fix replace of seed device
  2020-08-21 14:38       ` Josef Bacik
@ 2020-08-23 15:05         ` Anand Jain
  0 siblings, 0 replies; 38+ messages in thread
From: Anand Jain @ 2020-08-23 15:05 UTC (permalink / raw)
  To: Josef Bacik, linux-btrfs; +Cc: boris



On 21/8/20 10:38 pm, Josef Bacik wrote:
> On 8/21/20 9:15 AM, Anand Jain wrote:
>> If you replace a seed device in a sprouted fs, it appears to have
>> successfully replaced the seed device, but if you look closely, it 
>> didn't.
>>
>> Here is an example.
>>
>> mkfs.btrfs -fq /dev/sda
>> btrfstune -S1 /dev/sda
>> mount /dev/sda /btrfs
>> btrfs dev add /dev/sdb /btrfs
>> umount /btrfs; btrfs dev scan --forget
>> mount -o device=/dev/sda /dev/sdb /btrfs
>> btrfs rep start -f /dev/sda /dev/sdc /btrfs; echo $?
>> 0
>>
>>    BTRFS info (device sdb): dev_replace from /dev/sda (devid 1) to 
>> /dev/sdc started
>>    BTRFS info (device sdb): dev_replace from /dev/sda (devid 1) to 
>> /dev/sdc finished
>>
>> btrfs fi show
>> Label: none  uuid: ab2c88b7-be81-4a7e-9849-c3666e7f9f4f
>>     Total devices 2 FS bytes used 256.00KiB
>>     devid    1 size 3.00GiB used 520.00MiB path /dev/sdc
>>     devid    2 size 3.00GiB used 896.00MiB path /dev/sdb
>>
>> Label: none  uuid: 10bd3202-0415-43af-96a8-d5409f310a7e
>>     Total devices 1 FS bytes used 128.00KiB
>>     devid    1 size 3.00GiB used 536.00MiB path /dev/sda
>>
>> So as per the replace start command and kernel log replace was 
>> successful.
>>
>> Now let's try to clean mount.
>>
>> umount /btrfs;  btrfs dev scan --forget
>>
>> mount -o device=/dev/sdc /dev/sdb /btrfs
>> mount: /btrfs: wrong fs type, bad option, bad superblock on /dev/sdb, 
>> missing codepage or helper program, or other error.
>>
>> [  636.157517] BTRFS error (device sdc): failed to read chunk tree: -2
>> [  636.180177] BTRFS error (device sdc): open_ctree failed
>>
>> That's because per dev items it is still looking for the original seed
>> device.
>>
>> btrfs in dump-tree -d /dev/sdb
>>
>>     item 0 key (DEV_ITEMS DEV_ITEM 1) itemoff 16185 itemsize 98
>>         devid 1 total_bytes 3221225472 bytes_used 545259520
>>         io_align 4096 io_width 4096 sector_size 4096 type 0
>>         generation 6 start_offset 0 dev_group 0
>>         seek_speed 0 bandwidth 0
>>         uuid 59368f50-9af2-4b17-91da-8a783cc418d4  <--- seed uuid
>>         fsid 10bd3202-0415-43af-96a8-d5409f310a7e  <--- seed fsid
>>     item 1 key (DEV_ITEMS DEV_ITEM 2) itemoff 16087 itemsize 98
>>         devid 2 total_bytes 3221225472 bytes_used 939524096
>>         io_align 4096 io_width 4096 sector_size 4096 type 0
>>         generation 0 start_offset 0 dev_group 0
>>         seek_speed 0 bandwidth 0
>>         uuid 56a0a6bc-4630-4998-8daf-3c3030c4256a  <- sprout uuid
>>         fsid ab2c88b7-be81-4a7e-9849-c3666e7f9f4f <- sprout fsid
>>
>> But the replaced target has the following uuid+fsid in its superblock
>> which doesn't match with the expected uuid+fsid in its devitem.
>>
>> btrfs in dump-super /dev/sdc | egrep 
>> '^generation|dev_item.uuid|dev_item.fsid|devid'
>> generation    20
>> dev_item.uuid    59368f50-9af2-4b17-91da-8a783cc418d4
>> dev_item.fsid    ab2c88b7-be81-4a7e-9849-c3666e7f9f4f [match]
>> dev_item.devid    1
>>
>> So if you provide the original seed device the mount shall be successful.
>> Which so long happening in the test case btrfs/163.
>>
>> btrfs dev scan --forget
>> mount -o device=/dev/sda /dev/sdb /btrfs
>>
>> Fix in this patch:
>> Make it as you can't replace a seed device, you can only add a new device
>> and then delete the seed device. If replace is attempted then returns 
>> -EINVAL.
>> As in the below changes.
>>
>> Another possible fix:
>> If we want to keep the ability to replace for seed-device, then we could
>> update the fsid of the replace-target blocks. And after replacement, you
>> have seed device but with sprout fsid. But then I don't know what is the
>> point and if there is any such use case.
>>
>> Signed-off-by: Anand Jain <anand.jain@oracle.com>
>> ---
> 
> This is something Boris dug into until I told him to drop it.  I _think_ 
> I'm ok with this, but really what I'd rather do is restripe the whole fs 
> with the new UUID for this case.  Where did that redo the UUID work go? 

Redo the UUID part is not yet implemented. Perhaps restripe the whole
fs- is another approach, both of these approaches have the same question
unanswered what use case does the replace of seed device solve and would
it be a redundant operation to device add and delete ops.

Thanks, Anand

> Thanks,
> 
> Josef

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 1/2] btrfs: initialize sysfs devid and device link for seed device
  2020-08-21 13:15   ` [PATCH 1/2] btrfs: initialize sysfs devid and device link for seed device Anand Jain
  2020-08-21 13:15     ` [PATCH RFC 2/2] btrfs: fix replace of " Anand Jain
  2020-08-21 14:36     ` [PATCH 1/2] btrfs: initialize sysfs devid and device link for " Josef Bacik
@ 2020-08-29 11:44     ` Anand Jain
  2 siblings, 0 replies; 38+ messages in thread
From: Anand Jain @ 2020-08-29 11:44 UTC (permalink / raw)
  To: linux-btrfs; +Cc: josef




On 21/8/20 9:15 pm, Anand Jain wrote:
> The following test case leads to null kobject-being-freed error.
> 
>   mount seed /mnt
>   add sprout to /mnt
>   umount /mnt
>   mount sprout to /mnt
>   delete seed
> 
>   kobject: '(null)' (00000000dd2b87e4): is not initialized, yet kobject_put() is being called.
>   WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350
>   RIP: 0010:kobject_put+0x80/0x350
>   ::
>   Call Trace:
>   btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs]
>   btrfs_rm_device.cold+0xa8/0x298 [btrfs]
>   btrfs_ioctl+0x206c/0x22a0 [btrfs]
>   ksys_ioctl+0xe2/0x140
>   __x64_sys_ioctl+0x1e/0x29
>   do_syscall_64+0x96/0x150
>   entry_SYSCALL_64_after_hwframe+0x44/0xa9
>   RIP: 0033:0x7f4047c6288b
>   ::
> 
> This is because, at the end of the seed device-delete, we try to remove
> the seed's devid sysfs entry. But for the seed devices under the sprout
> fs, we don't initialize the devid kobject yet. So this patch initializes
> the seed device devid kobject and the device link in the sysfs. This takes
> care of the Warning.
> 
> Signed-off-by: Anand Jain <anand.jain@oracle.com>
> ---
>   fs/btrfs/sysfs.c | 30 ++++++++++++++++++++----------
>   1 file changed, 20 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
> index 88fd4ce937b8..85403fc3d5c7 100644
> --- a/fs/btrfs/sysfs.c
> +++ b/fs/btrfs/sysfs.c
> @@ -1154,20 +1154,20 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
>   /* when one_device is NULL, it removes all device links */
>   
>   int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
> -		struct btrfs_device *one_device)
> +				   struct btrfs_device *one_device)
>   {
>   	struct hd_struct *disk;
>   	struct kobject *disk_kobj;
> +	struct kobject *devices_kobj = fs_devices->devices_kobj;
>   
> -	if (!fs_devices->devices_kobj)
> +	if (!devices_kobj)
>   		return -EINVAL;
>   
>   	if (one_device) {
>   		if (one_device->bdev) {
>   			disk = one_device->bdev->bd_part;
>   			disk_kobj = &part_to_dev(disk)->kobj;
> -			sysfs_remove_link(fs_devices->devices_kobj,
> -					  disk_kobj->name);
> +			sysfs_remove_link(devices_kobj, disk_kobj->name);
>   		}
>   
>   		kobject_del(&one_device->devid_kobj);
> @@ -1178,19 +1178,23 @@ int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
>   		return 0;
>   	}
>   
> +again:
>   	list_for_each_entry(one_device, &fs_devices->devices, dev_list) {
>   
>   		if (one_device->bdev) {
>   			disk = one_device->bdev->bd_part;
>   			disk_kobj = &part_to_dev(disk)->kobj;
> -			sysfs_remove_link(fs_devices->devices_kobj,
> -					  disk_kobj->name);
> +			sysfs_remove_link(devices_kobj, disk_kobj->name);
>   		}
>   		kobject_del(&one_device->devid_kobj);
>   		kobject_put(&one_device->devid_kobj);
>   
>   		wait_for_completion(&one_device->kobj_unregister);
>   	}
> +	while (fs_devices->seed) {
> +		fs_devices = fs_devices->seed;
> +		goto again;
> +	}
>   
>   	return 0;
>   }
> @@ -1279,8 +1283,11 @@ int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
>   	int error = 0;
>   	struct btrfs_device *dev;
>   	unsigned int nofs_flag;
> +	struct kobject *devices_kobj = fs_devices->devices_kobj;
> +	struct kobject *devinfo_kobj = fs_devices->devinfo_kobj;
>   
>   	nofs_flag = memalloc_nofs_save();
> +again:
>   	list_for_each_entry(dev, &fs_devices->devices, dev_list) {
>   
>   		if (one_device && one_device != dev)
> @@ -1293,21 +1300,24 @@ int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
>   			disk = dev->bdev->bd_part;
>   			disk_kobj = &part_to_dev(disk)->kobj;
>   
> -			error = sysfs_create_link(fs_devices->devices_kobj,
> -						  disk_kobj, disk_kobj->name);
> +			error = sysfs_create_link(devices_kobj, disk_kobj,
> +						  disk_kobj->name);
>   			if (error)
>   				break;
>   		}
>   
>   		init_completion(&dev->kobj_unregister);
>   		error = kobject_init_and_add(&dev->devid_kobj, &devid_ktype,
> -					     fs_devices->devinfo_kobj, "%llu",
> -					     dev->devid);
> +					     devinfo_kobj, "%llu", dev->devid);
>   		if (error) {
>   			kobject_put(&dev->devid_kobj);
>   			break;
>   		}
>   	}
> +	while(fs_devices->seed) {
> +		fs_devices = fs_devices->seed;
> +		goto again;
> +	}
>   	memalloc_nofs_restore(nofs_flag);
>   
>   	return error;
> 


Ping?

Thanks, Anand

^ permalink raw reply	[flat|nested] 38+ messages in thread

* (no subject)
  2020-08-31  1:38 [PATCH 0/11] btrfs: seed fix null ptr, use only main device_list_mutex, and cleanups Anand Jain
  2020-08-21 13:15 ` [PATCH RFC] btrfs/163: replace sprout instead of seed Anand Jain
@ 2020-08-30 14:40 ` Anand Jain
  2020-08-30 14:40 ` [PATCH 01/11] btrfs: initialize sysfs devid and device link for seed device Anand Jain
                   ` (10 subsequent siblings)
  12 siblings, 0 replies; 38+ messages in thread
From: Anand Jain @ 2020-08-30 14:40 UTC (permalink / raw)
  To: linux-btrfs; +Cc: dsterba, josef

Subject: [PATCH 0/11] btrfs: seed fix null ptr, use only main device_list_mutex, and cleanups

v2:
Rebased on latest misc-next which contains the seed_list from Nicolas.
This patchset has been sent before as two sets, here it is been merged.

Patches 1/11 fix the null kobject being put, which is observed with kernel
compiled with memory poisoning.
Patches 2/11 and 3/11 are cleanups to maintain better flow in the
functions btrfs_sysfs_add_devices_dir() and btrfs_sysfs_remove_devices_dir().
Patches 4/11 and 5/11 drops the last two users of seed device_list_mutex,
so now they hold main filesystem device_list_mutex. 
Patches [6-10]/11 are cleanups.
Patches 11/11 fix the replacement of seed device in a sprouted. It is safe
to block that because we also block the replace of seed if it isn't a sprouted
device.

This set has passed through fstests/volume.

---- original cover-letter ---------
In a sprouted file system, the seed's fs_devices are cloned and 
listed under the sprout's fs_devices. The fs_info::fs_devices
points to the sprout::fs_devices and various critical operations
like device-delete holds the top-level main device_list_mutex
which is sprout::fs_devices::device_list_mutex and _not_ the seed level
mutex such as sprout::fs_devices::seed::fs_devices::device_list_mutex.

Also all related readers (except for two threads- reada and init_devices_late)
hold the sprout::fs_devices::device_list_mutex too. And those two threads
which are missing to hold the correct lock are being fixed here.

I take the approach to fix the read end instead of fixing the writer end
which are not holding the seed level mutex because to keep things
simple and there isn't much benefit burning extra CPU cycles in going
through the lock/unlock process as we traverse through the
fs_devices::seed fs_devices (for example as in reada and init_devices_late
threads).

The first two patches (1/7, 2/7) fixes the threads using the
seed::device_list_mutex.

And rest of the patches ([3-7]/7) are cleanups and these patches
are independent by themself.

These patchset has been tested with full xfstests btrfs test cases and
found to have no new regressions.


Anand Jain (11):
  btrfs: initialize sysfs devid and device link for seed device
  btrfs: refactor btrfs_sysfs_add_devices_dir
  btrfs: refactor btrfs_sysfs_remove_devices_dir
  btrfs: reada: use sprout device_list_mutex
  btrfs: btrfs_init_devices_late: use sprout device_list_mutex
  btrfs: open code list_head pointer in btrfs_init_dev_replace_tgtdev
  btrfs: cleanup btrfs_remove_chunk
  btrfs: cleanup btrfs_assign_next_active_device()
  btrfs: cleanup unnecessary goto in open_seed_device
  btrfs: btrfs_dev_replace_update_device_in_mapping_tree drop file
    global declare
  btrfs: fix replace of seed device

 fs/btrfs/dev-replace.c |  66 ++++++++----------
 fs/btrfs/reada.c       |   5 +-
 fs/btrfs/sysfs.c       | 151 ++++++++++++++++++++++++-----------------
 fs/btrfs/sysfs.h       |   8 +--
 fs/btrfs/volumes.c     |  40 +++++------
 5 files changed, 143 insertions(+), 127 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 38+ messages in thread

* [PATCH 01/11] btrfs: initialize sysfs devid and device link for seed device
  2020-08-31  1:38 [PATCH 0/11] btrfs: seed fix null ptr, use only main device_list_mutex, and cleanups Anand Jain
  2020-08-21 13:15 ` [PATCH RFC] btrfs/163: replace sprout instead of seed Anand Jain
  2020-08-30 14:40 ` Anand Jain
@ 2020-08-30 14:40 ` Anand Jain
  2020-08-31  9:07   ` Nikolay Borisov
  2020-08-31 16:21   ` Josef Bacik
  2020-08-30 14:40 ` [PATCH 02/11] btrfs: refactor btrfs_sysfs_add_devices_dir Anand Jain
                   ` (9 subsequent siblings)
  12 siblings, 2 replies; 38+ messages in thread
From: Anand Jain @ 2020-08-30 14:40 UTC (permalink / raw)
  To: linux-btrfs; +Cc: dsterba, josef

The following test case leads to null kobject-being-freed error.

 mount seed /mnt
 add sprout to /mnt
 umount /mnt
 mount sprout to /mnt
 delete seed

 kobject: '(null)' (00000000dd2b87e4): is not initialized, yet kobject_put() is being called.
 WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350
 RIP: 0010:kobject_put+0x80/0x350
 ::
 Call Trace:
 btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs]
 btrfs_rm_device.cold+0xa8/0x298 [btrfs]
 btrfs_ioctl+0x206c/0x22a0 [btrfs]
 ksys_ioctl+0xe2/0x140
 __x64_sys_ioctl+0x1e/0x29
 do_syscall_64+0x96/0x150
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7f4047c6288b
 ::

This is because, at the end of the seed device-delete, we try to remove
the seed's devid sysfs entry. But for the seed devices under the sprout
fs, we don't initialize the devid kobject yet. So this patch initializes
the seed device devid kobject and the device link in the sysfs. This takes
care of the Warning.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/sysfs.c | 146 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 93 insertions(+), 53 deletions(-)

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 190e59152be5..9b5e58091fae 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1149,45 +1149,48 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
 	return 0;
 }
 
-/* when one_device is NULL, it removes all device links */
-
-int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
-		struct btrfs_device *one_device)
+static void btrfs_sysfs_remove_device(struct btrfs_device *device)
 {
 	struct hd_struct *disk;
 	struct kobject *disk_kobj;
+	struct kobject *devices_kobj;
 
-	if (!fs_devices->devices_kobj)
-		return -EINVAL;
+	/*
+	 * Seed fs_devices devices_kobj aren't used, fetch kobject from the
+	 * fs_info::fs_devices.
+	 */
+	devices_kobj = device->fs_info->fs_devices->devices_kobj;
+	ASSERT(devices_kobj);
 
-	if (one_device) {
-		if (one_device->bdev) {
-			disk = one_device->bdev->bd_part;
-			disk_kobj = &part_to_dev(disk)->kobj;
-			sysfs_remove_link(fs_devices->devices_kobj,
-					  disk_kobj->name);
-		}
+	if (device->bdev) {
+		disk = device->bdev->bd_part;
+		disk_kobj = &part_to_dev(disk)->kobj;
+		sysfs_remove_link(devices_kobj, disk_kobj->name);
+	}
 
-		kobject_del(&one_device->devid_kobj);
-		kobject_put(&one_device->devid_kobj);
+	kobject_del(&device->devid_kobj);
+	kobject_put(&device->devid_kobj);
 
-		wait_for_completion(&one_device->kobj_unregister);
+	wait_for_completion(&device->kobj_unregister);
+}
 
+/* when 2nd argument device is NULL, it removes all devices link */
+int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
+				   struct btrfs_device *device)
+{
+	struct btrfs_fs_devices *seed_fs_devices;
+
+	if (device) {
+		btrfs_sysfs_remove_device(device);
 		return 0;
 	}
 
-	list_for_each_entry(one_device, &fs_devices->devices, dev_list) {
-
-		if (one_device->bdev) {
-			disk = one_device->bdev->bd_part;
-			disk_kobj = &part_to_dev(disk)->kobj;
-			sysfs_remove_link(fs_devices->devices_kobj,
-					  disk_kobj->name);
-		}
-		kobject_del(&one_device->devid_kobj);
-		kobject_put(&one_device->devid_kobj);
+	list_for_each_entry(device, &fs_devices->devices, dev_list)
+		btrfs_sysfs_remove_device(device);
 
-		wait_for_completion(&one_device->kobj_unregister);
+	list_for_each_entry(seed_fs_devices, &fs_devices->seed_list, seed_list) {
+		list_for_each_entry(device, &seed_fs_devices->devices, dev_list)
+			btrfs_sysfs_remove_device(device);
 	}
 
 	return 0;
@@ -1271,44 +1274,81 @@ static struct kobj_type devid_ktype = {
 	.release	= btrfs_release_devid_kobj,
 };
 
+static int btrfs_sysfs_add_device(struct btrfs_device *device)
+{
+	int ret;
+	struct kobject *devices_kobj;
+        struct kobject *devinfo_kobj;
+
+	/*
+	 * make sure we use the fs_info::fs_devices to fetch the kobjects
+	 * even for the seed fs_devices
+	 */
+	devices_kobj = device->fs_devices->fs_info->fs_devices->devices_kobj;
+	devinfo_kobj = device->fs_devices->fs_info->fs_devices->devinfo_kobj;
+	ASSERT(devices_kobj);
+	ASSERT(devinfo_kobj);
+
+	if (device->bdev) {
+		struct hd_struct *disk;
+		struct kobject *disk_kobj;
+
+		disk = device->bdev->bd_part;
+		disk_kobj = &part_to_dev(disk)->kobj;
+
+		ret = sysfs_create_link(devices_kobj, disk_kobj,
+					disk_kobj->name);
+		if (ret) {
+			btrfs_warn(device->fs_info,
+			   "sysfs create device link failed %d devid %llu",
+				   ret, device->devid);
+			return ret;
+		}
+	}
+
+	init_completion(&device->kobj_unregister);
+	ret = kobject_init_and_add(&device->devid_kobj, &devid_ktype,
+				   devinfo_kobj, "%llu", device->devid);
+	if (ret) {
+		kobject_put(&device->devid_kobj);
+		btrfs_warn(device->fs_info,
+			   "sysfs devinfo init failed %d devid %llu",
+			   ret, device->devid);
+	}
+
+	return ret;
+}
+
 int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
-				struct btrfs_device *one_device)
+				struct btrfs_device *device)
 {
-	int error = 0;
-	struct btrfs_device *dev;
+	int ret = 0;
 	unsigned int nofs_flag;
+	struct btrfs_fs_devices *seed_fs_devices;
 
 	nofs_flag = memalloc_nofs_save();
-	list_for_each_entry(dev, &fs_devices->devices, dev_list) {
 
-		if (one_device && one_device != dev)
-			continue;
+	if (device)
+		return btrfs_sysfs_add_device(device);
 
-		if (dev->bdev) {
-			struct hd_struct *disk;
-			struct kobject *disk_kobj;
-
-			disk = dev->bdev->bd_part;
-			disk_kobj = &part_to_dev(disk)->kobj;
-
-			error = sysfs_create_link(fs_devices->devices_kobj,
-						  disk_kobj, disk_kobj->name);
-			if (error)
-				break;
-		}
+	list_for_each_entry(device, &fs_devices->devices, dev_list) {
+		ret = btrfs_sysfs_add_device(device);
+		if (ret)
+			goto out;
+	}
 
-		init_completion(&dev->kobj_unregister);
-		error = kobject_init_and_add(&dev->devid_kobj, &devid_ktype,
-					     fs_devices->devinfo_kobj, "%llu",
-					     dev->devid);
-		if (error) {
-			kobject_put(&dev->devid_kobj);
-			break;
+	list_for_each_entry(seed_fs_devices, &fs_devices->seed_list, seed_list) {
+		list_for_each_entry(device, &seed_fs_devices->devices, dev_list) {
+			ret = btrfs_sysfs_add_device(device);
+			if (ret)
+				goto out;
 		}
 	}
+
+out:
 	memalloc_nofs_restore(nofs_flag);
 
-	return error;
+	return ret;
 }
 
 void btrfs_kobject_uevent(struct block_device *bdev, enum kobject_action action)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 02/11] btrfs: refactor btrfs_sysfs_add_devices_dir
  2020-08-31  1:38 [PATCH 0/11] btrfs: seed fix null ptr, use only main device_list_mutex, and cleanups Anand Jain
                   ` (2 preceding siblings ...)
  2020-08-30 14:40 ` [PATCH 01/11] btrfs: initialize sysfs devid and device link for seed device Anand Jain
@ 2020-08-30 14:40 ` Anand Jain
  2020-08-30 14:40 ` [PATCH 03/11] btrfs: refactor btrfs_sysfs_remove_devices_dir Anand Jain
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 38+ messages in thread
From: Anand Jain @ 2020-08-30 14:40 UTC (permalink / raw)
  To: linux-btrfs; +Cc: dsterba, josef

When we add device we need to add a device to the sysfs, so instead of
using the btrfs_sysfs_add_devices_dir() 2nd argument to specify whether
to add a device or all of fs_devices, call the helper function directly
btrfs_sysfs_add_device() and thus make it non static.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/dev-replace.c |  2 +-
 fs/btrfs/sysfs.c       | 11 ++++-------
 fs/btrfs/sysfs.h       |  4 ++--
 fs/btrfs/volumes.c     |  2 +-
 4 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 580e60fe07d0..979b40754cb4 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -512,7 +512,7 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
 	atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0);
 	up_write(&dev_replace->rwsem);
 
-	ret = btrfs_sysfs_add_devices_dir(tgt_device->fs_devices, tgt_device);
+	ret = btrfs_sysfs_add_device(tgt_device);
 	if (ret)
 		btrfs_err(fs_info, "kobj add dev failed %d", ret);
 
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 9b5e58091fae..afc2e2ab4d27 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1274,7 +1274,7 @@ static struct kobj_type devid_ktype = {
 	.release	= btrfs_release_devid_kobj,
 };
 
-static int btrfs_sysfs_add_device(struct btrfs_device *device)
+int btrfs_sysfs_add_device(struct btrfs_device *device)
 {
 	int ret;
 	struct kobject *devices_kobj;
@@ -1319,18 +1319,15 @@ static int btrfs_sysfs_add_device(struct btrfs_device *device)
 	return ret;
 }
 
-int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
-				struct btrfs_device *device)
+int btrfs_sysfs_add_fs_devices(struct btrfs_fs_devices *fs_devices)
 {
 	int ret = 0;
 	unsigned int nofs_flag;
+	struct btrfs_device *device;
 	struct btrfs_fs_devices *seed_fs_devices;
 
 	nofs_flag = memalloc_nofs_save();
 
-	if (device)
-		return btrfs_sysfs_add_device(device);
-
 	list_for_each_entry(device, &fs_devices->devices, dev_list) {
 		ret = btrfs_sysfs_add_device(device);
 		if (ret)
@@ -1438,7 +1435,7 @@ int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info)
 	struct btrfs_fs_devices *fs_devs = fs_info->fs_devices;
 	struct kobject *fsid_kobj = &fs_devs->fsid_kobj;
 
-	error = btrfs_sysfs_add_devices_dir(fs_devs, NULL);
+	error = btrfs_sysfs_add_fs_devices(fs_devs);
 	if (error)
 		return error;
 
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index 4217823e255c..2a3a44aa0709 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -14,8 +14,8 @@ enum btrfs_feature_set {
 
 char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
 const char *btrfs_feature_set_name(enum btrfs_feature_set set);
-int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
-		struct btrfs_device *one_device);
+int btrfs_sysfs_add_device(struct btrfs_device *device);
+int btrfs_sysfs_add_fs_devices(struct btrfs_fs_devices *fs_devices);
 int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
                 struct btrfs_device *one_device);
 int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 3f8bd1af29eb..8952f7031f4b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2599,7 +2599,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 				    orig_super_num_devices + 1);
 
 	/* add sysfs device entry */
-	btrfs_sysfs_add_devices_dir(fs_devices, device);
+	btrfs_sysfs_add_device(device);
 
 	/*
 	 * we've got more storage, clear any full flags on the space
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 03/11] btrfs: refactor btrfs_sysfs_remove_devices_dir
  2020-08-31  1:38 [PATCH 0/11] btrfs: seed fix null ptr, use only main device_list_mutex, and cleanups Anand Jain
                   ` (3 preceding siblings ...)
  2020-08-30 14:40 ` [PATCH 02/11] btrfs: refactor btrfs_sysfs_add_devices_dir Anand Jain
@ 2020-08-30 14:40 ` Anand Jain
  2020-08-31  8:58   ` Nikolay Borisov
  2020-08-30 14:40 ` [PATCH 04/11] btrfs: reada: use sprout device_list_mutex Anand Jain
                   ` (7 subsequent siblings)
  12 siblings, 1 reply; 38+ messages in thread
From: Anand Jain @ 2020-08-30 14:40 UTC (permalink / raw)
  To: linux-btrfs; +Cc: dsterba, josef

Similar to btrfs_sysfs_add_devices_dir() refactor, refactor
btrfs_sysfs_remove_devices_dir() so that we don't have to use the 2nd
argument to indicate whether to free all devices or just one device. So
this patch also adds a bit of cleanups and return value is dropped to
void.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/dev-replace.c |  2 +-
 fs/btrfs/sysfs.c       | 18 +++++-------------
 fs/btrfs/sysfs.h       |  4 ++--
 fs/btrfs/volumes.c     |  8 ++++----
 4 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 979b40754cb4..a7b1ad4e5706 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -743,7 +743,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
 
 	/* replace the sysfs entry */
-	btrfs_sysfs_remove_devices_dir(fs_info->fs_devices, src_device);
+	btrfs_sysfs_remove_device(src_device);
 	btrfs_sysfs_update_devid(tgt_device);
 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &src_device->dev_state))
 		btrfs_scratch_superblocks(fs_info, src_device->bdev,
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index afc2e2ab4d27..69e5b57a33b4 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -962,7 +962,7 @@ void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info)
 	addrm_unknown_feature_attrs(fs_info, false);
 	sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group);
 	sysfs_remove_files(fsid_kobj, btrfs_attrs);
-	btrfs_sysfs_remove_devices_dir(fs_info->fs_devices, NULL);
+	btrfs_sysfs_remove_fs_devices(fs_info->fs_devices);
 }
 
 static const char * const btrfs_feature_set_names[FEAT_MAX] = {
@@ -1149,7 +1149,7 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
 	return 0;
 }
 
-static void btrfs_sysfs_remove_device(struct btrfs_device *device)
+void btrfs_sysfs_remove_device(struct btrfs_device *device)
 {
 	struct hd_struct *disk;
 	struct kobject *disk_kobj;
@@ -1174,17 +1174,11 @@ static void btrfs_sysfs_remove_device(struct btrfs_device *device)
 	wait_for_completion(&device->kobj_unregister);
 }
 
-/* when 2nd argument device is NULL, it removes all devices link */
-int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
-				   struct btrfs_device *device)
+void btrfs_sysfs_remove_fs_devices(struct btrfs_fs_devices *fs_devices)
 {
+	struct btrfs_device *device;
 	struct btrfs_fs_devices *seed_fs_devices;
 
-	if (device) {
-		btrfs_sysfs_remove_device(device);
-		return 0;
-	}
-
 	list_for_each_entry(device, &fs_devices->devices, dev_list)
 		btrfs_sysfs_remove_device(device);
 
@@ -1192,8 +1186,6 @@ int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
 		list_for_each_entry(device, &seed_fs_devices->devices, dev_list)
 			btrfs_sysfs_remove_device(device);
 	}
-
-	return 0;
 }
 
 static ssize_t btrfs_devinfo_in_fs_metadata_show(struct kobject *kobj,
@@ -1441,7 +1433,7 @@ int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info)
 
 	error = sysfs_create_files(fsid_kobj, btrfs_attrs);
 	if (error) {
-		btrfs_sysfs_remove_devices_dir(fs_devs, NULL);
+		btrfs_sysfs_remove_fs_devices(fs_devs);
 		return error;
 	}
 
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index 2a3a44aa0709..085e34b81fba 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -16,8 +16,8 @@ char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
 const char *btrfs_feature_set_name(enum btrfs_feature_set set);
 int btrfs_sysfs_add_device(struct btrfs_device *device);
 int btrfs_sysfs_add_fs_devices(struct btrfs_fs_devices *fs_devices);
-int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
-                struct btrfs_device *one_device);
+void btrfs_sysfs_remove_device(struct btrfs_device *device);
+void btrfs_sysfs_remove_fs_devices(struct btrfs_fs_devices *fs_devices);
 int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs);
 void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
 void btrfs_sysfs_update_sprout_fsid(struct btrfs_fs_devices *fs_devices);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8952f7031f4b..9921b43ef839 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2040,7 +2040,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
 }
 
 int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
-		u64 devid)
+		    u64 devid)
 {
 	struct btrfs_device *device;
 	struct btrfs_fs_devices *cur_devices;
@@ -2144,7 +2144,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
 	if (device->bdev) {
 		cur_devices->open_devices--;
 		/* remove sysfs entry */
-		btrfs_sysfs_remove_devices_dir(fs_devices, device);
+		btrfs_sysfs_remove_device(device);
 	}
 
 	num_devices = btrfs_super_num_devices(fs_info->super_copy) - 1;
@@ -2245,7 +2245,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
 
 	mutex_lock(&fs_devices->device_list_mutex);
 
-	btrfs_sysfs_remove_devices_dir(fs_devices, tgtdev);
+	btrfs_sysfs_remove_device(tgtdev);
 
 	if (tgtdev->bdev)
 		fs_devices->open_devices--;
@@ -2680,7 +2680,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	return ret;
 
 error_sysfs:
-	btrfs_sysfs_remove_devices_dir(fs_devices, device);
+	btrfs_sysfs_remove_device(device);
 	mutex_lock(&fs_info->fs_devices->device_list_mutex);
 	mutex_lock(&fs_info->chunk_mutex);
 	list_del_rcu(&device->dev_list);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 04/11] btrfs: reada: use sprout device_list_mutex
  2020-08-31  1:38 [PATCH 0/11] btrfs: seed fix null ptr, use only main device_list_mutex, and cleanups Anand Jain
                   ` (4 preceding siblings ...)
  2020-08-30 14:40 ` [PATCH 03/11] btrfs: refactor btrfs_sysfs_remove_devices_dir Anand Jain
@ 2020-08-30 14:40 ` Anand Jain
  2020-08-31  8:54   ` Nikolay Borisov
  2020-08-31 16:08   ` Josef Bacik
  2020-08-30 14:41 ` [PATCH 05/11] btrfs: btrfs_init_devices_late: " Anand Jain
                   ` (6 subsequent siblings)
  12 siblings, 2 replies; 38+ messages in thread
From: Anand Jain @ 2020-08-30 14:40 UTC (permalink / raw)
  To: linux-btrfs; +Cc: dsterba, josef

On an fs mounted using a sprout-device, the seed fs_devices are maintained
in a linked list under fs_info->fs_devices. Each seed's fs_devices also
have device_list_mutex initialized to protect against the potential race
with delete threads. But the delete thread (at btrfs_rm_device()) is holding
the fs_info::fs_devices::device_list_mutex mutex which is sprout's
device_list_mutex instead of seed's device_list_mutex. Moreover, there
aren't any significient benefits in using the seed::device_list_mutex
instead of sprout::device_list_mutex.

So this patch converts them of using the seed::device_list_mutex to
sprout::device_list_mutex.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/reada.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index c0035fc0ec67..9b54a51ba860 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -776,13 +776,11 @@ static int __reada_start_for_fsdevs(struct btrfs_fs_devices *fs_devices)
 
 	do {
 		enqueued = 0;
-		mutex_lock(&fs_devices->device_list_mutex);
 		list_for_each_entry(device, &fs_devices->devices, dev_list) {
 			if (atomic_read(&device->reada_in_flight) <
 			    MAX_IN_FLIGHT)
 				enqueued += reada_start_machine_dev(device);
 		}
-		mutex_unlock(&fs_devices->device_list_mutex);
 		total += enqueued;
 	} while (enqueued && total < 10000);
 
@@ -795,10 +793,13 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
 	int i;
 	u64 enqueued = 0;
 
+	mutex_lock(&fs_devices->device_list_mutex);
+
 	enqueued += __reada_start_for_fsdevs(fs_devices);
 	list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list)
 		enqueued += __reada_start_for_fsdevs(seed_devs);
 
+	mutex_unlock(&fs_devices->device_list_mutex);
 	if (enqueued == 0)
 		return;
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 05/11] btrfs: btrfs_init_devices_late: use sprout device_list_mutex
  2020-08-31  1:38 [PATCH 0/11] btrfs: seed fix null ptr, use only main device_list_mutex, and cleanups Anand Jain
                   ` (5 preceding siblings ...)
  2020-08-30 14:40 ` [PATCH 04/11] btrfs: reada: use sprout device_list_mutex Anand Jain
@ 2020-08-30 14:41 ` Anand Jain
  2020-08-31  8:37   ` Nikolay Borisov
  2020-08-30 14:41 ` [PATCH 06/11] btrfs: open code list_head pointer in btrfs_init_dev_replace_tgtdev Anand Jain
                   ` (5 subsequent siblings)
  12 siblings, 1 reply; 38+ messages in thread
From: Anand Jain @ 2020-08-30 14:41 UTC (permalink / raw)
  To: linux-btrfs; +Cc: dsterba, josef

In a mounted sprout FS, all threads now are using the
sprout::device_list_mutex, and this is the only piece of code using
the seed::device_list_mutex. This patch converts to use the sprouts
fs_info->fs_devices->device_list_mutex.

The same reasoning holds true here, that device delete is holding
the sprout::device_list_mutex.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/volumes.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9921b43ef839..7639a048c6cf 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -7184,16 +7184,14 @@ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
 	mutex_lock(&fs_devices->device_list_mutex);
 	list_for_each_entry(device, &fs_devices->devices, dev_list)
 		device->fs_info = fs_info;
-	mutex_unlock(&fs_devices->device_list_mutex);
 
 	list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) {
-		mutex_lock(&seed_devs->device_list_mutex);
 		list_for_each_entry(device, &seed_devs->devices, dev_list)
 			device->fs_info = fs_info;
-		mutex_unlock(&seed_devs->device_list_mutex);
 
 		seed_devs->fs_info = fs_info;
 	}
+	mutex_unlock(&fs_devices->device_list_mutex);
 }
 
 static u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 06/11] btrfs: open code list_head pointer in btrfs_init_dev_replace_tgtdev
  2020-08-31  1:38 [PATCH 0/11] btrfs: seed fix null ptr, use only main device_list_mutex, and cleanups Anand Jain
                   ` (6 preceding siblings ...)
  2020-08-30 14:41 ` [PATCH 05/11] btrfs: btrfs_init_devices_late: " Anand Jain
@ 2020-08-30 14:41 ` Anand Jain
  2020-08-31  8:38   ` Nikolay Borisov
  2020-08-30 14:41 ` [PATCH 07/11] btrfs: cleanup btrfs_remove_chunk Anand Jain
                   ` (4 subsequent siblings)
  12 siblings, 1 reply; 38+ messages in thread
From: Anand Jain @ 2020-08-30 14:41 UTC (permalink / raw)
  To: linux-btrfs; +Cc: dsterba, josef

In the function btrfs_init_dev_replace_tgtdev(), the local
variable struct list_head *devices is used only once, instead
just open code the same.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/dev-replace.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index a7b1ad4e5706..aea1c782c009 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -224,7 +224,6 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 {
 	struct btrfs_device *device;
 	struct block_device *bdev;
-	struct list_head *devices;
 	struct rcu_string *name;
 	u64 devid = BTRFS_DEV_REPLACE_DEVID;
 	int ret = 0;
@@ -244,8 +243,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 
 	sync_blockdev(bdev);
 
-	devices = &fs_info->fs_devices->devices;
-	list_for_each_entry(device, devices, dev_list) {
+	list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
 		if (device->bdev == bdev) {
 			btrfs_err(fs_info,
 				  "target device is in the filesystem!");
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 07/11] btrfs: cleanup btrfs_remove_chunk
  2020-08-31  1:38 [PATCH 0/11] btrfs: seed fix null ptr, use only main device_list_mutex, and cleanups Anand Jain
                   ` (7 preceding siblings ...)
  2020-08-30 14:41 ` [PATCH 06/11] btrfs: open code list_head pointer in btrfs_init_dev_replace_tgtdev Anand Jain
@ 2020-08-30 14:41 ` Anand Jain
  2020-08-31  8:43   ` Nikolay Borisov
  2020-08-30 14:41 ` [PATCH 08/11] btrfs: cleanup btrfs_assign_next_active_device() Anand Jain
                   ` (3 subsequent siblings)
  12 siblings, 1 reply; 38+ messages in thread
From: Anand Jain @ 2020-08-30 14:41 UTC (permalink / raw)
  To: linux-btrfs; +Cc: dsterba, josef

In the function btrfs_remove_chunk() remove the local variable
%fs_devices, instead use the assigned pointer directly.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/volumes.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7639a048c6cf..3e7a7d94a211 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2922,7 +2922,6 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
 	struct map_lookup *map;
 	u64 dev_extent_len = 0;
 	int i, ret = 0;
-	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
 
 	em = btrfs_get_chunk_map(fs_info, chunk_offset, 1);
 	if (IS_ERR(em)) {
@@ -2944,14 +2943,14 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
 	 * a device replace operation that replaces the device object associated
 	 * with map stripes (dev-replace.c:btrfs_dev_replace_finishing()).
 	 */
-	mutex_lock(&fs_devices->device_list_mutex);
+	mutex_lock(&fs_info->fs_devices->device_list_mutex);
 	for (i = 0; i < map->num_stripes; i++) {
 		struct btrfs_device *device = map->stripes[i].dev;
 		ret = btrfs_free_dev_extent(trans, device,
 					    map->stripes[i].physical,
 					    &dev_extent_len);
 		if (ret) {
-			mutex_unlock(&fs_devices->device_list_mutex);
+			mutex_unlock(&fs_info->fs_devices->device_list_mutex);
 			btrfs_abort_transaction(trans, ret);
 			goto out;
 		}
@@ -2967,12 +2966,12 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
 
 		ret = btrfs_update_device(trans, device);
 		if (ret) {
-			mutex_unlock(&fs_devices->device_list_mutex);
+			mutex_unlock(&fs_info->fs_devices->device_list_mutex);
 			btrfs_abort_transaction(trans, ret);
 			goto out;
 		}
 	}
-	mutex_unlock(&fs_devices->device_list_mutex);
+	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
 
 	ret = btrfs_free_chunk(trans, chunk_offset);
 	if (ret) {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 08/11] btrfs: cleanup btrfs_assign_next_active_device()
  2020-08-31  1:38 [PATCH 0/11] btrfs: seed fix null ptr, use only main device_list_mutex, and cleanups Anand Jain
                   ` (8 preceding siblings ...)
  2020-08-30 14:41 ` [PATCH 07/11] btrfs: cleanup btrfs_remove_chunk Anand Jain
@ 2020-08-30 14:41 ` Anand Jain
  2020-08-31  8:44   ` Nikolay Borisov
  2020-08-30 14:41 ` [PATCH 09/11] btrfs: cleanup unnecessary goto in open_seed_device Anand Jain
                   ` (2 subsequent siblings)
  12 siblings, 1 reply; 38+ messages in thread
From: Anand Jain @ 2020-08-30 14:41 UTC (permalink / raw)
  To: linux-btrfs; +Cc: dsterba, josef

Cleanup btrfs_assign_next_active_device(), drop %this_dev.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/volumes.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 3e7a7d94a211..c8b0d9eb4468 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1961,16 +1961,13 @@ static struct btrfs_device * btrfs_find_next_active_device(
  * this_dev) which is active.
  */
 void __cold btrfs_assign_next_active_device(struct btrfs_device *device,
-				     struct btrfs_device *this_dev)
+					    struct btrfs_device *next_device)
 {
 	struct btrfs_fs_info *fs_info = device->fs_info;
-	struct btrfs_device *next_device;
 
-	if (this_dev)
-		next_device = this_dev;
-	else
+	if (!next_device)
 		next_device = btrfs_find_next_active_device(fs_info->fs_devices,
-								device);
+							    device);
 	ASSERT(next_device);
 
 	if (fs_info->sb->s_bdev &&
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 09/11] btrfs: cleanup unnecessary goto in open_seed_device
  2020-08-31  1:38 [PATCH 0/11] btrfs: seed fix null ptr, use only main device_list_mutex, and cleanups Anand Jain
                   ` (9 preceding siblings ...)
  2020-08-30 14:41 ` [PATCH 08/11] btrfs: cleanup btrfs_assign_next_active_device() Anand Jain
@ 2020-08-30 14:41 ` Anand Jain
  2020-08-31  8:44   ` Nikolay Borisov
  2020-08-30 14:41 ` [PATCH 10/11] btrfs: btrfs_dev_replace_update_device_in_mapping_tree drop file global declare Anand Jain
  2020-08-30 14:41 ` [PATCH 11/11] btrfs: fix replace of seed device Anand Jain
  12 siblings, 1 reply; 38+ messages in thread
From: Anand Jain @ 2020-08-30 14:41 UTC (permalink / raw)
  To: linux-btrfs; +Cc: dsterba, josef

open_seed_devices() does goto to just return. So drop goto and just return
instead.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/volumes.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c8b0d9eb4468..dc81646b13c0 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6741,19 +6741,17 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
 	ret = open_fs_devices(fs_devices, FMODE_READ, fs_info->bdev_holder);
 	if (ret) {
 		free_fs_devices(fs_devices);
-		fs_devices = ERR_PTR(ret);
-		goto out;
+		return ERR_PTR(ret);
 	}
 
 	if (!fs_devices->seeding) {
 		close_fs_devices(fs_devices);
 		free_fs_devices(fs_devices);
-		fs_devices = ERR_PTR(-EINVAL);
-		goto out;
+		return ERR_PTR(-EINVAL);
 	}
 
 	list_add_tail(&fs_devices->seed_list, &fs_info->fs_devices->seed_list);
-out:
+
 	return fs_devices;
 }
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 10/11] btrfs: btrfs_dev_replace_update_device_in_mapping_tree drop file global declare
  2020-08-31  1:38 [PATCH 0/11] btrfs: seed fix null ptr, use only main device_list_mutex, and cleanups Anand Jain
                   ` (10 preceding siblings ...)
  2020-08-30 14:41 ` [PATCH 09/11] btrfs: cleanup unnecessary goto in open_seed_device Anand Jain
@ 2020-08-30 14:41 ` Anand Jain
  2020-08-31  8:46   ` Nikolay Borisov
  2020-08-30 14:41 ` [PATCH 11/11] btrfs: fix replace of seed device Anand Jain
  12 siblings, 1 reply; 38+ messages in thread
From: Anand Jain @ 2020-08-30 14:41 UTC (permalink / raw)
  To: linux-btrfs; +Cc: dsterba, josef

There isn't any convoluted child functions inside the
btrfs_dev_replace_update_device_in_mapping_tree() function. With the
function moved before where it is called, we can drop its file local
declare.

No functional changes.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/dev-replace.c | 56 ++++++++++++++++++++----------------------
 1 file changed, 26 insertions(+), 30 deletions(-)

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index aea1c782c009..656d8ba642af 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -64,10 +64,6 @@
 
 static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 				       int scrub_ret);
-static void btrfs_dev_replace_update_device_in_mapping_tree(
-						struct btrfs_fs_info *fs_info,
-						struct btrfs_device *srcdev,
-						struct btrfs_device *tgtdev);
 static int btrfs_dev_replace_kthread(void *data);
 
 int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
@@ -597,6 +593,32 @@ static void btrfs_rm_dev_replace_unblocked(struct btrfs_fs_info *fs_info)
 	wake_up(&fs_info->dev_replace.replace_wait);
 }
 
+static void btrfs_dev_replace_update_device_in_mapping_tree(
+						struct btrfs_fs_info *fs_info,
+						struct btrfs_device *srcdev,
+						struct btrfs_device *tgtdev)
+{
+	struct extent_map_tree *em_tree = &fs_info->mapping_tree;
+	struct extent_map *em;
+	struct map_lookup *map;
+	u64 start = 0;
+	int i;
+
+	write_lock(&em_tree->lock);
+	do {
+		em = lookup_extent_mapping(em_tree, start, (u64)-1);
+		if (!em)
+			break;
+		map = em->map_lookup;
+		for (i = 0; i < map->num_stripes; i++)
+			if (srcdev == map->stripes[i].dev)
+				map->stripes[i].dev = tgtdev;
+		start = em->start + em->len;
+		free_extent_map(em);
+	} while (start);
+	write_unlock(&em_tree->lock);
+}
+
 static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 				       int scrub_ret)
 {
@@ -759,32 +781,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 	return 0;
 }
 
-static void btrfs_dev_replace_update_device_in_mapping_tree(
-						struct btrfs_fs_info *fs_info,
-						struct btrfs_device *srcdev,
-						struct btrfs_device *tgtdev)
-{
-	struct extent_map_tree *em_tree = &fs_info->mapping_tree;
-	struct extent_map *em;
-	struct map_lookup *map;
-	u64 start = 0;
-	int i;
-
-	write_lock(&em_tree->lock);
-	do {
-		em = lookup_extent_mapping(em_tree, start, (u64)-1);
-		if (!em)
-			break;
-		map = em->map_lookup;
-		for (i = 0; i < map->num_stripes; i++)
-			if (srcdev == map->stripes[i].dev)
-				map->stripes[i].dev = tgtdev;
-		start = em->start + em->len;
-		free_extent_map(em);
-	} while (start);
-	write_unlock(&em_tree->lock);
-}
-
 /*
  * Read progress of device replace status according to the state and last
  * stored position. The value format is the same as for
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 11/11] btrfs: fix replace of seed device
  2020-08-31  1:38 [PATCH 0/11] btrfs: seed fix null ptr, use only main device_list_mutex, and cleanups Anand Jain
                   ` (11 preceding siblings ...)
  2020-08-30 14:41 ` [PATCH 10/11] btrfs: btrfs_dev_replace_update_device_in_mapping_tree drop file global declare Anand Jain
@ 2020-08-30 14:41 ` Anand Jain
  12 siblings, 0 replies; 38+ messages in thread
From: Anand Jain @ 2020-08-30 14:41 UTC (permalink / raw)
  To: linux-btrfs; +Cc: dsterba, josef

If you replace a seed device in a sprouted fs, it appears to have
successfully replaced the seed device, but if you look closely, it didn't.

Here is an example.

mkfs.btrfs -fq /dev/sda
btrfstune -S1 /dev/sda
mount /dev/sda /btrfs
btrfs dev add /dev/sdb /btrfs
umount /btrfs; btrfs dev scan --forget
mount -o device=/dev/sda /dev/sdb /btrfs
btrfs rep start -f /dev/sda /dev/sdc /btrfs; echo $?
0

  BTRFS info (device sdb): dev_replace from /dev/sda (devid 1) to /dev/sdc started
  BTRFS info (device sdb): dev_replace from /dev/sda (devid 1) to /dev/sdc finished

btrfs fi show
Label: none  uuid: ab2c88b7-be81-4a7e-9849-c3666e7f9f4f
	Total devices 2 FS bytes used 256.00KiB
	devid    1 size 3.00GiB used 520.00MiB path /dev/sdc
	devid    2 size 3.00GiB used 896.00MiB path /dev/sdb

Label: none  uuid: 10bd3202-0415-43af-96a8-d5409f310a7e
	Total devices 1 FS bytes used 128.00KiB
	devid    1 size 3.00GiB used 536.00MiB path /dev/sda

So as per the replace start command and kernel log replace was successful.

Now let's try to clean mount.

umount /btrfs;  btrfs dev scan --forget

mount -o device=/dev/sdc /dev/sdb /btrfs
mount: /btrfs: wrong fs type, bad option, bad superblock on /dev/sdb, missing codepage or helper program, or other error.

[  636.157517] BTRFS error (device sdc): failed to read chunk tree: -2
[  636.180177] BTRFS error (device sdc): open_ctree failed

That's because per dev items it is still looking for the original seed
device.

btrfs in dump-tree -d /dev/sdb

	item 0 key (DEV_ITEMS DEV_ITEM 1) itemoff 16185 itemsize 98
		devid 1 total_bytes 3221225472 bytes_used 545259520
		io_align 4096 io_width 4096 sector_size 4096 type 0
		generation 6 start_offset 0 dev_group 0
		seek_speed 0 bandwidth 0
		uuid 59368f50-9af2-4b17-91da-8a783cc418d4  <--- seed uuid
		fsid 10bd3202-0415-43af-96a8-d5409f310a7e  <--- seed fsid
	item 1 key (DEV_ITEMS DEV_ITEM 2) itemoff 16087 itemsize 98
		devid 2 total_bytes 3221225472 bytes_used 939524096
		io_align 4096 io_width 4096 sector_size 4096 type 0
		generation 0 start_offset 0 dev_group 0
		seek_speed 0 bandwidth 0
		uuid 56a0a6bc-4630-4998-8daf-3c3030c4256a  <- sprout uuid
		fsid ab2c88b7-be81-4a7e-9849-c3666e7f9f4f <- sprout fsid

But the replaced target has the following uuid+fsid in its superblock
which doesn't match with the expected uuid+fsid in its devitem.

btrfs in dump-super /dev/sdc | egrep '^generation|dev_item.uuid|dev_item.fsid|devid'
generation	20
dev_item.uuid	59368f50-9af2-4b17-91da-8a783cc418d4
dev_item.fsid	ab2c88b7-be81-4a7e-9849-c3666e7f9f4f [match]
dev_item.devid	1

So if you provide the original seed device the mount shall be successful.
Which so long happening in the test case btrfs/163.

btrfs dev scan --forget
mount -o device=/dev/sda /dev/sdb /btrfs

Fix in this patch:
Make it as you can't replace a seed device, you can only add a new device
and then delete the seed device. If replace is attempted then returns -EINVAL.
As in the below changes.

Another possible fix:
If we want to keep the ability to replace for seed-device, then we could
update the fsid of the replace-target blocks. And after replacement, you
have seed device but with sprout fsid. But then I don't know what is the
point and if there is any such use case.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
 fs/btrfs/dev-replace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 656d8ba642af..02b7b3edf9a3 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -225,7 +225,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 	int ret = 0;
 
 	*device_out = NULL;
-	if (fs_info->fs_devices->seeding) {
+	if (srcdev->fs_devices->seeding) {
 		btrfs_err(fs_info, "the filesystem is a seed filesystem!");
 		return -EINVAL;
 	}
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH] fstests: btrfs/163: replace sprout instead of seed
  2020-08-21 13:15 ` [PATCH RFC] btrfs/163: replace sprout instead of seed Anand Jain
  2020-08-21 13:15   ` [PATCH 1/2] btrfs: initialize sysfs devid and device link for seed device Anand Jain
@ 2020-08-30 14:41   ` Anand Jain
  1 sibling, 0 replies; 38+ messages in thread
From: Anand Jain @ 2020-08-30 14:41 UTC (permalink / raw)
  To: linux-btrfs; +Cc: dsterba, josef

As of now, this test case replaced the seed device in a sprouted seed
filesystem. However, the kernel doesn't support the replacement of the seed
device yet. And the following kernel patch shall enforce the [1]
unsupportability and returns -EINVAL (error code is the same as in seed
device in a non sprouted filesystem replacement).

[1]
   btrfs: fix replace of seed device

So in this test case instead of seed as replacing target use the sprout
device. As we didn't have any test case which shall test the replacement of
sprout device. So now this case fills the gap.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
I will send this to fstests ML once kernel patch is integrated.

 tests/btrfs/163     | 21 ++++++++++++++++-----
 tests/btrfs/163.out |  5 ++++-
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/tests/btrfs/163 b/tests/btrfs/163
index 24c725afb6b9..354d88502d47 100755
--- a/tests/btrfs/163
+++ b/tests/btrfs/163
@@ -4,11 +4,15 @@
 #
 # FS QA Test 163
 #
-# Test case to verify that a seed device can be replaced
+# Test case to verify that a sprouted device can be replaced
 #  Create a seed device
 #  Create a sprout device
 #  Remount RW
-#  Run device replace on the seed device
+#  Run device replace on the sprout device
+#
+# Depends on the kernel patch
+#   btrfs: fail replace of seed device
+
 seq=`basename $0`
 seqres=$RESULT_DIR/$seq
 echo "QA output created by $seq"
@@ -39,6 +43,7 @@ _supported_fs btrfs
 _supported_os Linux
 _require_command "$BTRFS_TUNE_PROG" btrfstune
 _require_scratch_dev_pool 3
+_require_btrfs_forget_or_module_loadable
 
 _scratch_dev_pool_get 3
 
@@ -52,7 +57,7 @@ create_seed()
 	run_check _mount $dev_seed $SCRATCH_MNT
 	$XFS_IO_PROG -f -d -c "pwrite -S 0xab 0 4M" $SCRATCH_MNT/foobar >\
 		/dev/null
-	echo -- gloden --
+	echo -- golden --
 	od -x $SCRATCH_MNT/foobar
 	_run_btrfs_util_prog filesystem show -m $SCRATCH_MNT
 	_scratch_unmount
@@ -64,22 +69,28 @@ add_sprout()
 {
 	_run_btrfs_util_prog device add -f $dev_sprout $SCRATCH_MNT
 	_run_btrfs_util_prog filesystem show -m $SCRATCH_MNT
+	_mount -o remount,rw $dev_sprout $SCRATCH_MNT
+	$XFS_IO_PROG -f -d -c "pwrite -S 0xcd 0 4M" $SCRATCH_MNT/foobar2 >\
+		/dev/null
 }
 
 replace_seed()
 {
-	_run_btrfs_util_prog replace start -fB $dev_seed $dev_replace_tgt $SCRATCH_MNT
+	_run_btrfs_util_prog replace start -fB $dev_sprout $dev_replace_tgt $SCRATCH_MNT
 	_run_btrfs_util_prog filesystem show -m $SCRATCH_MNT
 	_scratch_unmount
-	run_check _mount $dev_replace_tgt $SCRATCH_MNT
+	_btrfs_forget_or_module_reload
+	run_check _mount -o device=$dev_seed $dev_replace_tgt $SCRATCH_MNT
 	echo -- sprout --
 	od -x $SCRATCH_MNT/foobar
+	od -x $SCRATCH_MNT/foobar2
 	_scratch_unmount
 
 }
 
 seed_is_mountable()
 {
+	_btrfs_forget_or_module_reload
 	run_check _mount $dev_seed $SCRATCH_MNT
 	_run_btrfs_util_prog filesystem show -m $SCRATCH_MNT
 	_scratch_unmount
diff --git a/tests/btrfs/163.out b/tests/btrfs/163.out
index 91f6f5b6f48a..351ef7b040b2 100644
--- a/tests/btrfs/163.out
+++ b/tests/btrfs/163.out
@@ -1,5 +1,5 @@
 QA output created by 163
--- gloden --
+-- golden --
 0000000 abab abab abab abab abab abab abab abab
 *
 20000000
@@ -7,3 +7,6 @@ QA output created by 163
 0000000 abab abab abab abab abab abab abab abab
 *
 20000000
+0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd
+*
+20000000
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 0/11] btrfs: seed fix null ptr, use only main device_list_mutex, and cleanups
@ 2020-08-31  1:38 Anand Jain
  2020-08-21 13:15 ` [PATCH RFC] btrfs/163: replace sprout instead of seed Anand Jain
                   ` (12 more replies)
  0 siblings, 13 replies; 38+ messages in thread
From: Anand Jain @ 2020-08-31  1:38 UTC (permalink / raw)
  To: linux-btrfs; +Cc: dsterba, josef

The cover letter for the patchset below [1] doesn't seem to have reached the ML.
So here I am sending it again.

[1]
 https://patchwork.kernel.org/project/linux-btrfs/list/?series=340699

In this patch set:
Rebased on latest misc-next which contains the seed_list from Nicolas.
This patchset has been sent before as two sets, here it is been merged.

Patches 1/11 fix the null kobject being put, which is observed with kernel
compiled with memory poisoning.
Patches 2/11 and 3/11 are cleanups to maintain better flow in the
functions btrfs_sysfs_add_devices_dir() and btrfs_sysfs_remove_devices_dir().
Patches 4/11 and 5/11 drops the last two users of seed device_list_mutex,
so now they hold main filesystem device_list_mutex. 
Patches [6-10]/11 are cleanups.
Patches 11/11 block the replacement of seed device in a sprouted FS, we already
block replacing a seed device on a non-sprouted device.

This set has passed through fstests/volume.

---- original cover-letter ---------
In a sprouted file system, the seed's fs_devices are cloned and 
listed under the sprout's fs_devices. The fs_info::fs_devices
points to the sprout::fs_devices and various critical operations
like device-delete holds the top-level main device_list_mutex
which is sprout::fs_devices::device_list_mutex and _not_ the seed level
mutex such as sprout::fs_devices::seed::fs_devices::device_list_mutex.

Also all related readers (except for two threads- reada and init_devices_late)
hold the sprout::fs_devices::device_list_mutex too. And those two threads
which are missing to hold the correct lock are being fixed here.

I take the approach to fix the read end instead of fixing the writer end
which are not holding the seed level mutex because to keep things
simple and there isn't much benefit burning extra CPU cycles in going
through the lock/unlock process as we traverse through the
fs_devices::seed fs_devices (for example as in reada and init_devices_late
threads).

The first two patches (1/7, 2/7) fixes the threads using the
seed::device_list_mutex.

And rest of the patches ([3-7]/7) are cleanups and these patches
are independent by themself.

These patchset has been tested with full xfstests btrfs test cases and
found to have no new regressions.


Anand Jain (11):
  btrfs: initialize sysfs devid and device link for seed device
  btrfs: refactor btrfs_sysfs_add_devices_dir
  btrfs: refactor btrfs_sysfs_remove_devices_dir
  btrfs: reada: use sprout device_list_mutex
  btrfs: btrfs_init_devices_late: use sprout device_list_mutex
  btrfs: open code list_head pointer in btrfs_init_dev_replace_tgtdev
  btrfs: cleanup btrfs_remove_chunk
  btrfs: cleanup btrfs_assign_next_active_device()
  btrfs: cleanup unnecessary goto in open_seed_device
  btrfs: btrfs_dev_replace_update_device_in_mapping_tree drop file
    global declare
  btrfs: fix replace of seed device

 fs/btrfs/dev-replace.c |  66 ++++++++----------
 fs/btrfs/reada.c       |   5 +-
 fs/btrfs/sysfs.c       | 151 ++++++++++++++++++++++++-----------------
 fs/btrfs/sysfs.h       |   8 +--
 fs/btrfs/volumes.c     |  40 +++++------
 5 files changed, 143 insertions(+), 127 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 05/11] btrfs: btrfs_init_devices_late: use sprout device_list_mutex
  2020-08-30 14:41 ` [PATCH 05/11] btrfs: btrfs_init_devices_late: " Anand Jain
@ 2020-08-31  8:37   ` Nikolay Borisov
  2020-09-01  8:54     ` Anand Jain
  0 siblings, 1 reply; 38+ messages in thread
From: Nikolay Borisov @ 2020-08-31  8:37 UTC (permalink / raw)
  To: Anand Jain, linux-btrfs; +Cc: dsterba, josef



On 30.08.20 г. 17:41 ч., Anand Jain wrote:
> In a mounted sprout FS, all threads now are using the
> sprout::device_list_mutex, and this is the only piece of code using
> the seed::device_list_mutex. This patch converts to use the sprouts
> fs_info->fs_devices->device_list_mutex.
> 
> The same reasoning holds true here, that device delete is holding
> the sprout::device_list_mutex.
> 
> Signed-off-by: Anand Jain <anand.jain@oracle.com>
> ---
>  fs/btrfs/volumes.c | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
> 
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 9921b43ef839..7639a048c6cf 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -7184,16 +7184,14 @@ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
>  	mutex_lock(&fs_devices->device_list_mutex);
>  	list_for_each_entry(device, &fs_devices->devices, dev_list)
>  		device->fs_info = fs_info;
> -	mutex_unlock(&fs_devices->device_list_mutex);
>  
>  	list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) {
> -		mutex_lock(&seed_devs->device_list_mutex);
>  		list_for_each_entry(device, &seed_devs->devices, dev_list)
>  			device->fs_info = fs_info;
> -		mutex_unlock(&seed_devs->device_list_mutex);
>  
>  		seed_devs->fs_info = fs_info;
>  	}
> +	mutex_unlock(&fs_devices->device_list_mutex);

Instead of doing the looping here to set fs_info I think it makes more
sense to move the initialization of fs_info for the seed fs_info/seed
devices to open_seed_devices.

As a matter of fact at the point where btrfs_init_devices_late is called
btrfs_read_chunk_tree would have already been called which would have
resulted in all present seed devices be added to fs_info::seed_list. So
acquiring the lock here serves no purpose really.

>  }
>  
>  static u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
> 

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 06/11] btrfs: open code list_head pointer in btrfs_init_dev_replace_tgtdev
  2020-08-30 14:41 ` [PATCH 06/11] btrfs: open code list_head pointer in btrfs_init_dev_replace_tgtdev Anand Jain
@ 2020-08-31  8:38   ` Nikolay Borisov
  0 siblings, 0 replies; 38+ messages in thread
From: Nikolay Borisov @ 2020-08-31  8:38 UTC (permalink / raw)
  To: Anand Jain, linux-btrfs; +Cc: dsterba, josef



On 30.08.20 г. 17:41 ч., Anand Jain wrote:
> In the function btrfs_init_dev_replace_tgtdev(), the local
> variable struct list_head *devices is used only once, instead
> just open code the same.
> 
> Signed-off-by: Anand Jain <anand.jain@oracle.com>

Fair enough,
Reviewed-by: Nikolay Borisov <nborisov@suse.com>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 07/11] btrfs: cleanup btrfs_remove_chunk
  2020-08-30 14:41 ` [PATCH 07/11] btrfs: cleanup btrfs_remove_chunk Anand Jain
@ 2020-08-31  8:43   ` Nikolay Borisov
  0 siblings, 0 replies; 38+ messages in thread
From: Nikolay Borisov @ 2020-08-31  8:43 UTC (permalink / raw)
  To: Anand Jain, linux-btrfs; +Cc: dsterba, josef



On 30.08.20 г. 17:41 ч., Anand Jain wrote:
> In the function btrfs_remove_chunk() remove the local variable
> %fs_devices, instead use the assigned pointer directly.
> 
> Signed-off-by: Anand Jain <anand.jain@oracle.com>

So this is minor enough and correct, however, have measured whether
having multiple fs_info::fs_devices:device_list_Mutex derefs results in
slightly (perhaps negligible) increase in code size.

In any case the code is correct, so:

Reviewed-by: Nikolay Borisov <nborisov@suse.com>


> ---
>  fs/btrfs/volumes.c | 9 ++++-----
>  1 file changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 7639a048c6cf..3e7a7d94a211 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -2922,7 +2922,6 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
>  	struct map_lookup *map;
>  	u64 dev_extent_len = 0;
>  	int i, ret = 0;
> -	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
>  
>  	em = btrfs_get_chunk_map(fs_info, chunk_offset, 1);
>  	if (IS_ERR(em)) {
> @@ -2944,14 +2943,14 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
>  	 * a device replace operation that replaces the device object associated
>  	 * with map stripes (dev-replace.c:btrfs_dev_replace_finishing()).
>  	 */
> -	mutex_lock(&fs_devices->device_list_mutex);
> +	mutex_lock(&fs_info->fs_devices->device_list_mutex);
>  	for (i = 0; i < map->num_stripes; i++) {
>  		struct btrfs_device *device = map->stripes[i].dev;
>  		ret = btrfs_free_dev_extent(trans, device,
>  					    map->stripes[i].physical,
>  					    &dev_extent_len);
>  		if (ret) {
> -			mutex_unlock(&fs_devices->device_list_mutex);
> +			mutex_unlock(&fs_info->fs_devices->device_list_mutex);
>  			btrfs_abort_transaction(trans, ret);
>  			goto out;
>  		}
> @@ -2967,12 +2966,12 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
>  
>  		ret = btrfs_update_device(trans, device);
>  		if (ret) {
> -			mutex_unlock(&fs_devices->device_list_mutex);
> +			mutex_unlock(&fs_info->fs_devices->device_list_mutex);
>  			btrfs_abort_transaction(trans, ret);
>  			goto out;
>  		}
>  	}
> -	mutex_unlock(&fs_devices->device_list_mutex);
> +	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
>  
>  	ret = btrfs_free_chunk(trans, chunk_offset);
>  	if (ret) {
> 

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 08/11] btrfs: cleanup btrfs_assign_next_active_device()
  2020-08-30 14:41 ` [PATCH 08/11] btrfs: cleanup btrfs_assign_next_active_device() Anand Jain
@ 2020-08-31  8:44   ` Nikolay Borisov
  0 siblings, 0 replies; 38+ messages in thread
From: Nikolay Borisov @ 2020-08-31  8:44 UTC (permalink / raw)
  To: Anand Jain, linux-btrfs; +Cc: dsterba, josef



On 30.08.20 г. 17:41 ч., Anand Jain wrote:
> Cleanup btrfs_assign_next_active_device(), drop %this_dev.
> 
> Signed-off-by: Anand Jain <anand.jain@oracle.com>

Reviewed-by: Nikolay Borisov <nborisov@suse.com>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 09/11] btrfs: cleanup unnecessary goto in open_seed_device
  2020-08-30 14:41 ` [PATCH 09/11] btrfs: cleanup unnecessary goto in open_seed_device Anand Jain
@ 2020-08-31  8:44   ` Nikolay Borisov
  0 siblings, 0 replies; 38+ messages in thread
From: Nikolay Borisov @ 2020-08-31  8:44 UTC (permalink / raw)
  To: Anand Jain, linux-btrfs; +Cc: dsterba, josef



On 30.08.20 г. 17:41 ч., Anand Jain wrote:
> open_seed_devices() does goto to just return. So drop goto and just return
> instead.
> 
> Signed-off-by: Anand Jain <anand.jain@oracle.com>

Reviewed-by: Nikolay Borisov <nborisov@suse.com>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 10/11] btrfs: btrfs_dev_replace_update_device_in_mapping_tree drop file global declare
  2020-08-30 14:41 ` [PATCH 10/11] btrfs: btrfs_dev_replace_update_device_in_mapping_tree drop file global declare Anand Jain
@ 2020-08-31  8:46   ` Nikolay Borisov
  0 siblings, 0 replies; 38+ messages in thread
From: Nikolay Borisov @ 2020-08-31  8:46 UTC (permalink / raw)
  To: Anand Jain, linux-btrfs; +Cc: dsterba, josef



On 30.08.20 г. 17:41 ч., Anand Jain wrote:
> There isn't any convoluted child functions inside the
> btrfs_dev_replace_update_device_in_mapping_tree() function. With the
> function moved before where it is called, we can drop its file local
> declare.
> 
> No functional changes.
> 
> Signed-off-by: Anand Jain <anand.jain@oracle.com>

I personally am fan of such refactoring but I remember David didn't like
them so much on their own, still:

Reviewed-by: Nikolay Borisov <nborisov@suse.com>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 04/11] btrfs: reada: use sprout device_list_mutex
  2020-08-30 14:40 ` [PATCH 04/11] btrfs: reada: use sprout device_list_mutex Anand Jain
@ 2020-08-31  8:54   ` Nikolay Borisov
  2020-08-31 16:08   ` Josef Bacik
  1 sibling, 0 replies; 38+ messages in thread
From: Nikolay Borisov @ 2020-08-31  8:54 UTC (permalink / raw)
  To: Anand Jain, linux-btrfs; +Cc: dsterba, josef



On 30.08.20 г. 17:40 ч., Anand Jain wrote:
> On an fs mounted using a sprout-device, the seed fs_devices are maintained
> in a linked list under fs_info->fs_devices. Each seed's fs_devices also
> have device_list_mutex initialized to protect against the potential race
> with delete threads. But the delete thread (at btrfs_rm_device()) is holding
> the fs_info::fs_devices::device_list_mutex mutex which is sprout's
> device_list_mutex instead of seed's device_list_mutex. Moreover, there
> aren't any significient benefits in using the seed::device_list_mutex
> instead of sprout::device_list_mutex.
> 
> So this patch converts them of using the seed::device_list_mutex to
> sprout::device_list_mutex.
> 
> Signed-off-by: Anand Jain <anand.jain@oracle.com>

nit: I'm beginning to think that the structure representing seed devices
shouldn't be a full-fledged btrfs_fs_devices but a slimmed down version
which contains only the necessary bits.


Reviewed-by: Nikolay Borisov <nborisov@suse.com>

> ---
>  fs/btrfs/reada.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
> index c0035fc0ec67..9b54a51ba860 100644
> --- a/fs/btrfs/reada.c
> +++ b/fs/btrfs/reada.c
> @@ -776,13 +776,11 @@ static int __reada_start_for_fsdevs(struct btrfs_fs_devices *fs_devices)
>  
>  	do {
>  		enqueued = 0;
> -		mutex_lock(&fs_devices->device_list_mutex);
>  		list_for_each_entry(device, &fs_devices->devices, dev_list) {
>  			if (atomic_read(&device->reada_in_flight) <
>  			    MAX_IN_FLIGHT)
>  				enqueued += reada_start_machine_dev(device);
>  		}
> -		mutex_unlock(&fs_devices->device_list_mutex);
>  		total += enqueued;
>  	} while (enqueued && total < 10000);
>  
> @@ -795,10 +793,13 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
>  	int i;
>  	u64 enqueued = 0;
>  
> +	mutex_lock(&fs_devices->device_list_mutex);
> +
>  	enqueued += __reada_start_for_fsdevs(fs_devices);
>  	list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list)
>  		enqueued += __reada_start_for_fsdevs(seed_devs);
>  
> +	mutex_unlock(&fs_devices->device_list_mutex);
>  	if (enqueued == 0)
>  		return;
>  
> 

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 03/11] btrfs: refactor btrfs_sysfs_remove_devices_dir
  2020-08-30 14:40 ` [PATCH 03/11] btrfs: refactor btrfs_sysfs_remove_devices_dir Anand Jain
@ 2020-08-31  8:58   ` Nikolay Borisov
  2020-08-31  9:12     ` Anand Jain
  0 siblings, 1 reply; 38+ messages in thread
From: Nikolay Borisov @ 2020-08-31  8:58 UTC (permalink / raw)
  To: Anand Jain, linux-btrfs; +Cc: dsterba, josef



On 30.08.20 г. 17:40 ч., Anand Jain wrote:
> Similar to btrfs_sysfs_add_devices_dir() refactor, refactor
> btrfs_sysfs_remove_devices_dir() so that we don't have to use the 2nd
> argument to indicate whether to free all devices or just one device. So
> this patch also adds a bit of cleanups and return value is dropped to
> void.
> 
> Signed-off-by: Anand Jain <anand.jain@oracle.com>

<snip>

> -/* when 2nd argument device is NULL, it removes all devices link */
> -int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
> -				   struct btrfs_device *device)
> +void btrfs_sysfs_remove_fs_devices(struct btrfs_fs_devices *fs_devices)
>  {
> +	struct btrfs_device *device;
>  	struct btrfs_fs_devices *seed_fs_devices;
>  
> -	if (device) {
> -		btrfs_sysfs_remove_device(device);
> -		return 0;
> -	}

What branch is this based off of ? Because I don't see
btrfs_sysfs_remove_device function at all ?

<snip>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 01/11] btrfs: initialize sysfs devid and device link for seed device
  2020-08-30 14:40 ` [PATCH 01/11] btrfs: initialize sysfs devid and device link for seed device Anand Jain
@ 2020-08-31  9:07   ` Nikolay Borisov
  2020-08-31 12:00     ` Anand Jain
  2020-08-31 16:21   ` Josef Bacik
  1 sibling, 1 reply; 38+ messages in thread
From: Nikolay Borisov @ 2020-08-31  9:07 UTC (permalink / raw)
  To: Anand Jain, linux-btrfs; +Cc: dsterba, josef



On 30.08.20 г. 17:40 ч., Anand Jain wrote:
> The following test case leads to null kobject-being-freed error.
> 
>  mount seed /mnt
>  add sprout to /mnt
>  umount /mnt
>  mount sprout to /mnt
>  delete seed
> 
>  kobject: '(null)' (00000000dd2b87e4): is not initialized, yet kobject_put() is being called.
>  WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350
>  RIP: 0010:kobject_put+0x80/0x350
>  ::
>  Call Trace:
>  btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs]
>  btrfs_rm_device.cold+0xa8/0x298 [btrfs]
>  btrfs_ioctl+0x206c/0x22a0 [btrfs]
>  ksys_ioctl+0xe2/0x140
>  __x64_sys_ioctl+0x1e/0x29
>  do_syscall_64+0x96/0x150
>  entry_SYSCALL_64_after_hwframe+0x44/0xa9
>  RIP: 0033:0x7f4047c6288b
>  ::
> 
> This is because, at the end of the seed device-delete, we try to remove
> the seed's devid sysfs entry. But for the seed devices under the sprout
> fs, we don't initialize the devid kobject yet. So this patch initializes
> the seed device devid kobject and the device link in the sysfs. This takes
> care of the Warning.
> 
> Signed-off-by: Anand Jain <anand.jain@oracle.com>


This patch is doing too many things at once. Split it so that:
1. You first add the new functions add_device/remove_device in 2
separate patches.
2. Switch btrfs_sysfs_add_devices_dir/btrfs_sysfs_remove_devices_dir to
ousing the newly added helpers, again in 2 separate patches

> ---
>  fs/btrfs/sysfs.c | 146 ++++++++++++++++++++++++++++++-----------------
>  1 file changed, 93 insertions(+), 53 deletions(-)
> 
> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
> index 190e59152be5..9b5e58091fae 100644
> --- a/fs/btrfs/sysfs.c
> +++ b/fs/btrfs/sysfs.c
> @@ -1149,45 +1149,48 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
>  	return 0;
>  }
>  
> -/* when one_device is NULL, it removes all device links */
> -
> -int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
> -		struct btrfs_device *one_device)
> +static void btrfs_sysfs_remove_device(struct btrfs_device *device)
>  {
>  	struct hd_struct *disk;
>  	struct kobject *disk_kobj;
> +	struct kobject *devices_kobj;
>  
> -	if (!fs_devices->devices_kobj)
> -		return -EINVAL;
> +	/*
> +	 * Seed fs_devices devices_kobj aren't used, fetch kobject from the
> +	 * fs_info::fs_devices.
> +	 */
> +	devices_kobj = device->fs_info->fs_devices->devices_kobj;
> +	ASSERT(devices_kobj);
>  
> -	if (one_device) {
> -		if (one_device->bdev) {
> -			disk = one_device->bdev->bd_part;
> -			disk_kobj = &part_to_dev(disk)->kobj;
> -			sysfs_remove_link(fs_devices->devices_kobj,
> -					  disk_kobj->name);
> -		}
> +	if (device->bdev) {
> +		disk = device->bdev->bd_part;
> +		disk_kobj = &part_to_dev(disk)->kobj;
> +		sysfs_remove_link(devices_kobj, disk_kobj->name);
> +	}
>  
> -		kobject_del(&one_device->devid_kobj);
> -		kobject_put(&one_device->devid_kobj);
> +	kobject_del(&device->devid_kobj);
> +	kobject_put(&device->devid_kobj);
>  
> -		wait_for_completion(&one_device->kobj_unregister);
> +	wait_for_completion(&device->kobj_unregister);
> +}
>  
> +/* when 2nd argument device is NULL, it removes all devices link */
> +int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
> +				   struct btrfs_device *device)
> +{
> +	struct btrfs_fs_devices *seed_fs_devices;
> +
> +	if (device) {
> +		btrfs_sysfs_remove_device(device);
>  		return 0;
>  	}
>  
> -	list_for_each_entry(one_device, &fs_devices->devices, dev_list) {
> -
> -		if (one_device->bdev) {
> -			disk = one_device->bdev->bd_part;
> -			disk_kobj = &part_to_dev(disk)->kobj;
> -			sysfs_remove_link(fs_devices->devices_kobj,
> -					  disk_kobj->name);
> -		}
> -		kobject_del(&one_device->devid_kobj);
> -		kobject_put(&one_device->devid_kobj);
> +	list_for_each_entry(device, &fs_devices->devices, dev_list)
> +		btrfs_sysfs_remove_device(device);
>  
> -		wait_for_completion(&one_device->kobj_unregister);
> +	list_for_each_entry(seed_fs_devices, &fs_devices->seed_list, seed_list) {
> +		list_for_each_entry(device, &seed_fs_devices->devices, dev_list)
> +			btrfs_sysfs_remove_device(device);
>  	}
>  
>  	return 0;
> @@ -1271,44 +1274,81 @@ static struct kobj_type devid_ktype = {
>  	.release	= btrfs_release_devid_kobj,
>  };
>  
> +static int btrfs_sysfs_add_device(struct btrfs_device *device)
> +{
> +	int ret;
> +	struct kobject *devices_kobj;
> +        struct kobject *devinfo_kobj;
> +
> +	/*
> +	 * make sure we use the fs_info::fs_devices to fetch the kobjects
> +	 * even for the seed fs_devices
> +	 */
> +	devices_kobj = device->fs_devices->fs_info->fs_devices->devices_kobj;
> +	devinfo_kobj = device->fs_devices->fs_info->fs_devices->devinfo_kobj;
> +	ASSERT(devices_kobj);
> +	ASSERT(devinfo_kobj);
> +
> +	if (device->bdev) {
> +		struct hd_struct *disk;
> +		struct kobject *disk_kobj;
> +
> +		disk = device->bdev->bd_part;
> +		disk_kobj = &part_to_dev(disk)->kobj;
> +
> +		ret = sysfs_create_link(devices_kobj, disk_kobj,
> +					disk_kobj->name);
> +		if (ret) {
> +			btrfs_warn(device->fs_info,
> +			   "sysfs create device link failed %d devid %llu",
> +				   ret, device->devid);
> +			return ret;
> +		}
> +	}
> +
> +	init_completion(&device->kobj_unregister);
> +	ret = kobject_init_and_add(&device->devid_kobj, &devid_ktype,
> +				   devinfo_kobj, "%llu", device->devid);
> +	if (ret) {
> +		kobject_put(&device->devid_kobj);
> +		btrfs_warn(device->fs_info,
> +			   "sysfs devinfo init failed %d devid %llu",
> +			   ret, device->devid);
> +	}
> +
> +	return ret;
> +}
> +
>  int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
> -				struct btrfs_device *one_device)
> +				struct btrfs_device *device)
>  {
> -	int error = 0;
> -	struct btrfs_device *dev;
> +	int ret = 0;
>  	unsigned int nofs_flag;
> +	struct btrfs_fs_devices *seed_fs_devices;
>  
>  	nofs_flag = memalloc_nofs_save();
> -	list_for_each_entry(dev, &fs_devices->devices, dev_list) {
>  
> -		if (one_device && one_device != dev)
> -			continue;
> +	if (device)
> +		return btrfs_sysfs_add_device(device);
>  
> -		if (dev->bdev) {
> -			struct hd_struct *disk;
> -			struct kobject *disk_kobj;
> -
> -			disk = dev->bdev->bd_part;
> -			disk_kobj = &part_to_dev(disk)->kobj;
> -
> -			error = sysfs_create_link(fs_devices->devices_kobj,
> -						  disk_kobj, disk_kobj->name);
> -			if (error)
> -				break;
> -		}
> +	list_for_each_entry(device, &fs_devices->devices, dev_list) {
> +		ret = btrfs_sysfs_add_device(device);
> +		if (ret)
> +			goto out;
> +	}
>  
> -		init_completion(&dev->kobj_unregister);
> -		error = kobject_init_and_add(&dev->devid_kobj, &devid_ktype,
> -					     fs_devices->devinfo_kobj, "%llu",
> -					     dev->devid);
> -		if (error) {
> -			kobject_put(&dev->devid_kobj);
> -			break;
> +	list_for_each_entry(seed_fs_devices, &fs_devices->seed_list, seed_list) {
> +		list_for_each_entry(device, &seed_fs_devices->devices, dev_list) {
> +			ret = btrfs_sysfs_add_device(device);
> +			if (ret)
> +				goto out;
>  		}
>  	}
> +
> +out:
>  	memalloc_nofs_restore(nofs_flag);
>  
> -	return error;
> +	return ret;
>  }
>  
>  void btrfs_kobject_uevent(struct block_device *bdev, enum kobject_action action)
> 

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 03/11] btrfs: refactor btrfs_sysfs_remove_devices_dir
  2020-08-31  8:58   ` Nikolay Borisov
@ 2020-08-31  9:12     ` Anand Jain
  0 siblings, 0 replies; 38+ messages in thread
From: Anand Jain @ 2020-08-31  9:12 UTC (permalink / raw)
  To: Nikolay Borisov, linux-btrfs; +Cc: dsterba, josef



On 31/8/20 4:58 pm, Nikolay Borisov wrote:
> 
> 
> On 30.08.20 г. 17:40 ч., Anand Jain wrote:
>> Similar to btrfs_sysfs_add_devices_dir() refactor, refactor
>> btrfs_sysfs_remove_devices_dir() so that we don't have to use the 2nd
>> argument to indicate whether to free all devices or just one device. So
>> this patch also adds a bit of cleanups and return value is dropped to
>> void.
>>
>> Signed-off-by: Anand Jain <anand.jain@oracle.com>
> 
> <snip>
> 
>> -/* when 2nd argument device is NULL, it removes all devices link */
>> -int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
>> -				   struct btrfs_device *device)
>> +void btrfs_sysfs_remove_fs_devices(struct btrfs_fs_devices *fs_devices)
>>   {
>> +	struct btrfs_device *device;
>>   	struct btrfs_fs_devices *seed_fs_devices;
>>   
>> -	if (device) {
>> -		btrfs_sysfs_remove_device(device);
>> -		return 0;
>> -	}
> 
> What branch is this based off of ? Because I don't see
> btrfs_sysfs_remove_device function at all ?

  btrfs_sysfs_remove_device() is added in the patch 1/11 in this set.
  Anyways this is based on misc-next last commit 381ebd340264.


> 
> <snip>
> 

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 01/11] btrfs: initialize sysfs devid and device link for seed device
  2020-08-31  9:07   ` Nikolay Borisov
@ 2020-08-31 12:00     ` Anand Jain
  0 siblings, 0 replies; 38+ messages in thread
From: Anand Jain @ 2020-08-31 12:00 UTC (permalink / raw)
  To: Nikolay Borisov, linux-btrfs; +Cc: dsterba, josef



On 31/8/20 5:07 pm, Nikolay Borisov wrote:
> 
> 
> On 30.08.20 г. 17:40 ч., Anand Jain wrote:
>> The following test case leads to null kobject-being-freed error.
>>
>>   mount seed /mnt
>>   add sprout to /mnt
>>   umount /mnt
>>   mount sprout to /mnt
>>   delete seed
>>
>>   kobject: '(null)' (00000000dd2b87e4): is not initialized, yet kobject_put() is being called.
>>   WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350
>>   RIP: 0010:kobject_put+0x80/0x350
>>   ::
>>   Call Trace:
>>   btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs]
>>   btrfs_rm_device.cold+0xa8/0x298 [btrfs]
>>   btrfs_ioctl+0x206c/0x22a0 [btrfs]
>>   ksys_ioctl+0xe2/0x140
>>   __x64_sys_ioctl+0x1e/0x29
>>   do_syscall_64+0x96/0x150
>>   entry_SYSCALL_64_after_hwframe+0x44/0xa9
>>   RIP: 0033:0x7f4047c6288b
>>   ::
>>
>> This is because, at the end of the seed device-delete, we try to remove
>> the seed's devid sysfs entry. But for the seed devices under the sprout
>> fs, we don't initialize the devid kobject yet. So this patch initializes
>> the seed device devid kobject and the device link in the sysfs. This takes
>> care of the Warning.
>>
>> Signed-off-by: Anand Jain <anand.jain@oracle.com>
> 



> This patch is doing too many things at once. Split it so that:
> 1. You first add the new functions add_device/remove_device in 2
> separate patches.
> 2. Switch btrfs_sysfs_add_devices_dir/btrfs_sysfs_remove_devices_dir to
> ousing the newly added helpers, again in 2 separate patches

yeah and

  3. Initialize seed devices devid kobject to fix the bug.

fixing.

-Anand

> 
>> ---
>>   fs/btrfs/sysfs.c | 146 ++++++++++++++++++++++++++++++-----------------
>>   1 file changed, 93 insertions(+), 53 deletions(-)
>>
>> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
>> index 190e59152be5..9b5e58091fae 100644
>> --- a/fs/btrfs/sysfs.c
>> +++ b/fs/btrfs/sysfs.c
>> @@ -1149,45 +1149,48 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
>>   	return 0;
>>   }
>>   
>> -/* when one_device is NULL, it removes all device links */
>> -
>> -int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
>> -		struct btrfs_device *one_device)
>> +static void btrfs_sysfs_remove_device(struct btrfs_device *device)
>>   {
>>   	struct hd_struct *disk;
>>   	struct kobject *disk_kobj;
>> +	struct kobject *devices_kobj;
>>   
>> -	if (!fs_devices->devices_kobj)
>> -		return -EINVAL;
>> +	/*
>> +	 * Seed fs_devices devices_kobj aren't used, fetch kobject from the
>> +	 * fs_info::fs_devices.
>> +	 */
>> +	devices_kobj = device->fs_info->fs_devices->devices_kobj;
>> +	ASSERT(devices_kobj);
>>   
>> -	if (one_device) {
>> -		if (one_device->bdev) {
>> -			disk = one_device->bdev->bd_part;
>> -			disk_kobj = &part_to_dev(disk)->kobj;
>> -			sysfs_remove_link(fs_devices->devices_kobj,
>> -					  disk_kobj->name);
>> -		}
>> +	if (device->bdev) {
>> +		disk = device->bdev->bd_part;
>> +		disk_kobj = &part_to_dev(disk)->kobj;
>> +		sysfs_remove_link(devices_kobj, disk_kobj->name);
>> +	}
>>   
>> -		kobject_del(&one_device->devid_kobj);
>> -		kobject_put(&one_device->devid_kobj);
>> +	kobject_del(&device->devid_kobj);
>> +	kobject_put(&device->devid_kobj);
>>   
>> -		wait_for_completion(&one_device->kobj_unregister);
>> +	wait_for_completion(&device->kobj_unregister);
>> +}
>>   
>> +/* when 2nd argument device is NULL, it removes all devices link */
>> +int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
>> +				   struct btrfs_device *device)
>> +{
>> +	struct btrfs_fs_devices *seed_fs_devices;
>> +
>> +	if (device) {
>> +		btrfs_sysfs_remove_device(device);
>>   		return 0;
>>   	}
>>   
>> -	list_for_each_entry(one_device, &fs_devices->devices, dev_list) {
>> -
>> -		if (one_device->bdev) {
>> -			disk = one_device->bdev->bd_part;
>> -			disk_kobj = &part_to_dev(disk)->kobj;
>> -			sysfs_remove_link(fs_devices->devices_kobj,
>> -					  disk_kobj->name);
>> -		}
>> -		kobject_del(&one_device->devid_kobj);
>> -		kobject_put(&one_device->devid_kobj);
>> +	list_for_each_entry(device, &fs_devices->devices, dev_list)
>> +		btrfs_sysfs_remove_device(device);
>>   
>> -		wait_for_completion(&one_device->kobj_unregister);
>> +	list_for_each_entry(seed_fs_devices, &fs_devices->seed_list, seed_list) {
>> +		list_for_each_entry(device, &seed_fs_devices->devices, dev_list)
>> +			btrfs_sysfs_remove_device(device);
>>   	}
>>   
>>   	return 0;
>> @@ -1271,44 +1274,81 @@ static struct kobj_type devid_ktype = {
>>   	.release	= btrfs_release_devid_kobj,
>>   };
>>   
>> +static int btrfs_sysfs_add_device(struct btrfs_device *device)
>> +{
>> +	int ret;
>> +	struct kobject *devices_kobj;
>> +        struct kobject *devinfo_kobj;
>> +
>> +	/*
>> +	 * make sure we use the fs_info::fs_devices to fetch the kobjects
>> +	 * even for the seed fs_devices
>> +	 */
>> +	devices_kobj = device->fs_devices->fs_info->fs_devices->devices_kobj;
>> +	devinfo_kobj = device->fs_devices->fs_info->fs_devices->devinfo_kobj;
>> +	ASSERT(devices_kobj);
>> +	ASSERT(devinfo_kobj);
>> +
>> +	if (device->bdev) {
>> +		struct hd_struct *disk;
>> +		struct kobject *disk_kobj;
>> +
>> +		disk = device->bdev->bd_part;
>> +		disk_kobj = &part_to_dev(disk)->kobj;
>> +
>> +		ret = sysfs_create_link(devices_kobj, disk_kobj,
>> +					disk_kobj->name);
>> +		if (ret) {
>> +			btrfs_warn(device->fs_info,
>> +			   "sysfs create device link failed %d devid %llu",
>> +				   ret, device->devid);
>> +			return ret;
>> +		}
>> +	}
>> +
>> +	init_completion(&device->kobj_unregister);
>> +	ret = kobject_init_and_add(&device->devid_kobj, &devid_ktype,
>> +				   devinfo_kobj, "%llu", device->devid);
>> +	if (ret) {
>> +		kobject_put(&device->devid_kobj);
>> +		btrfs_warn(device->fs_info,
>> +			   "sysfs devinfo init failed %d devid %llu",
>> +			   ret, device->devid);
>> +	}
>> +
>> +	return ret;
>> +}
>> +
>>   int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
>> -				struct btrfs_device *one_device)
>> +				struct btrfs_device *device)
>>   {
>> -	int error = 0;
>> -	struct btrfs_device *dev;
>> +	int ret = 0;
>>   	unsigned int nofs_flag;
>> +	struct btrfs_fs_devices *seed_fs_devices;
>>   
>>   	nofs_flag = memalloc_nofs_save();
>> -	list_for_each_entry(dev, &fs_devices->devices, dev_list) {
>>   
>> -		if (one_device && one_device != dev)
>> -			continue;
>> +	if (device)
>> +		return btrfs_sysfs_add_device(device);
>>   
>> -		if (dev->bdev) {
>> -			struct hd_struct *disk;
>> -			struct kobject *disk_kobj;
>> -
>> -			disk = dev->bdev->bd_part;
>> -			disk_kobj = &part_to_dev(disk)->kobj;
>> -
>> -			error = sysfs_create_link(fs_devices->devices_kobj,
>> -						  disk_kobj, disk_kobj->name);
>> -			if (error)
>> -				break;
>> -		}
>> +	list_for_each_entry(device, &fs_devices->devices, dev_list) {
>> +		ret = btrfs_sysfs_add_device(device);
>> +		if (ret)
>> +			goto out;
>> +	}
>>   
>> -		init_completion(&dev->kobj_unregister);
>> -		error = kobject_init_and_add(&dev->devid_kobj, &devid_ktype,
>> -					     fs_devices->devinfo_kobj, "%llu",
>> -					     dev->devid);
>> -		if (error) {
>> -			kobject_put(&dev->devid_kobj);
>> -			break;
>> +	list_for_each_entry(seed_fs_devices, &fs_devices->seed_list, seed_list) {
>> +		list_for_each_entry(device, &seed_fs_devices->devices, dev_list) {
>> +			ret = btrfs_sysfs_add_device(device);
>> +			if (ret)
>> +				goto out;
>>   		}
>>   	}
>> +
>> +out:
>>   	memalloc_nofs_restore(nofs_flag);
>>   
>> -	return error;
>> +	return ret;
>>   }
>>   
>>   void btrfs_kobject_uevent(struct block_device *bdev, enum kobject_action action)
>>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 04/11] btrfs: reada: use sprout device_list_mutex
  2020-08-30 14:40 ` [PATCH 04/11] btrfs: reada: use sprout device_list_mutex Anand Jain
  2020-08-31  8:54   ` Nikolay Borisov
@ 2020-08-31 16:08   ` Josef Bacik
  2020-09-01  9:02     ` Anand Jain
  1 sibling, 1 reply; 38+ messages in thread
From: Josef Bacik @ 2020-08-31 16:08 UTC (permalink / raw)
  To: Anand Jain, linux-btrfs; +Cc: dsterba

On 8/30/20 10:40 AM, Anand Jain wrote:
> On an fs mounted using a sprout-device, the seed fs_devices are maintained
> in a linked list under fs_info->fs_devices. Each seed's fs_devices also
> have device_list_mutex initialized to protect against the potential race
> with delete threads. But the delete thread (at btrfs_rm_device()) is holding
> the fs_info::fs_devices::device_list_mutex mutex which is sprout's
> device_list_mutex instead of seed's device_list_mutex. Moreover, there
> aren't any significient benefits in using the seed::device_list_mutex
> instead of sprout::device_list_mutex.
> 
> So this patch converts them of using the seed::device_list_mutex to
> sprout::device_list_mutex.
> 
> Signed-off-by: Anand Jain <anand.jain@oracle.com>

This doesn't apply cleanly to misc-next as of this morning.  Thanks,

Josef

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 01/11] btrfs: initialize sysfs devid and device link for seed device
  2020-08-30 14:40 ` [PATCH 01/11] btrfs: initialize sysfs devid and device link for seed device Anand Jain
  2020-08-31  9:07   ` Nikolay Borisov
@ 2020-08-31 16:21   ` Josef Bacik
  2020-09-01 16:16     ` Anand Jain
  1 sibling, 1 reply; 38+ messages in thread
From: Josef Bacik @ 2020-08-31 16:21 UTC (permalink / raw)
  To: Anand Jain, linux-btrfs; +Cc: dsterba

On 8/30/20 10:40 AM, Anand Jain wrote:
> The following test case leads to null kobject-being-freed error.
> 
>   mount seed /mnt
>   add sprout to /mnt
>   umount /mnt
>   mount sprout to /mnt
>   delete seed
> 
>   kobject: '(null)' (00000000dd2b87e4): is not initialized, yet kobject_put() is being called.
>   WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350
>   RIP: 0010:kobject_put+0x80/0x350
>   ::
>   Call Trace:
>   btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs]
>   btrfs_rm_device.cold+0xa8/0x298 [btrfs]
>   btrfs_ioctl+0x206c/0x22a0 [btrfs]
>   ksys_ioctl+0xe2/0x140
>   __x64_sys_ioctl+0x1e/0x29
>   do_syscall_64+0x96/0x150
>   entry_SYSCALL_64_after_hwframe+0x44/0xa9
>   RIP: 0033:0x7f4047c6288b
>   ::
> 
> This is because, at the end of the seed device-delete, we try to remove
> the seed's devid sysfs entry. But for the seed devices under the sprout
> fs, we don't initialize the devid kobject yet. So this patch initializes
> the seed device devid kobject and the device link in the sysfs. This takes
> care of the Warning.
> 
> Signed-off-by: Anand Jain <anand.jain@oracle.com>
> ---

Ok again, this will not work.

Mount a seed fs, you get fs_info->fs_devices pointed at the seed device, and 
fs_info->fs_devices->devices_kobj is what is initialized.

Now you sprout.

This does clone_fs_devices(fs_info->fs_devices), which doesn't copy over 
fs_fdevices->devices_kobj.  Now we take this clone device, and set 
fs_info->fsdevices to the cloned fs_devices, and we add the original fs_devices, 
which had the sysfs objects init'ed already, to fs_devices->seed_list.

It appears to me you are completely ignoring this aspect of sprout.  Maybe I'm 
missing something, but I've gone through this code twice now to see if what I 
think is happening is actually happening, and I'm convinced this is wrong.

If it's _not_ wrong, and I _am_ missing something, then you need to explain why 
I'm wrong, and then go back and fix what needs to be fixed to make this whole 
process more clear, and _then_ you can do this patch series.  Thanks,

Josef

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 05/11] btrfs: btrfs_init_devices_late: use sprout device_list_mutex
  2020-08-31  8:37   ` Nikolay Borisov
@ 2020-09-01  8:54     ` Anand Jain
  0 siblings, 0 replies; 38+ messages in thread
From: Anand Jain @ 2020-09-01  8:54 UTC (permalink / raw)
  To: Nikolay Borisov, linux-btrfs; +Cc: dsterba, josef



On 31/8/20 4:37 pm, Nikolay Borisov wrote:
> 
> 
> On 30.08.20 г. 17:41 ч., Anand Jain wrote:
>> In a mounted sprout FS, all threads now are using the
>> sprout::device_list_mutex, and this is the only piece of code using
>> the seed::device_list_mutex. This patch converts to use the sprouts
>> fs_info->fs_devices->device_list_mutex.
>>
>> The same reasoning holds true here, that device delete is holding
>> the sprout::device_list_mutex.
>>
>> Signed-off-by: Anand Jain <anand.jain@oracle.com>
>> ---
>>   fs/btrfs/volumes.c | 4 +---
>>   1 file changed, 1 insertion(+), 3 deletions(-)
>>
>> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
>> index 9921b43ef839..7639a048c6cf 100644
>> --- a/fs/btrfs/volumes.c
>> +++ b/fs/btrfs/volumes.c
>> @@ -7184,16 +7184,14 @@ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
>>   	mutex_lock(&fs_devices->device_list_mutex);
>>   	list_for_each_entry(device, &fs_devices->devices, dev_list)
>>   		device->fs_info = fs_info;
>> -	mutex_unlock(&fs_devices->device_list_mutex);
>>   
>>   	list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) {
>> -		mutex_lock(&seed_devs->device_list_mutex);
>>   		list_for_each_entry(device, &seed_devs->devices, dev_list)
>>   			device->fs_info = fs_info;
>> -		mutex_unlock(&seed_devs->device_list_mutex);
>>   
>>   		seed_devs->fs_info = fs_info;
>>   	}
>> +	mutex_unlock(&fs_devices->device_list_mutex);
> 
> Instead of doing the looping here to set fs_info I think it makes more
> sense to move the initialization of fs_info for the seed fs_info/seed
> devices to open_seed_devices.
> 

  But that will be out of the purpose of this patch. May be in
  a different patch.

  And seed_devs->fs_info = fs_info co-exists with the
  device->fs_info = fs_info as well.


> As a matter of fact at the point where btrfs_init_devices_late is called
> btrfs_read_chunk_tree would have already been called which would have
> resulted in all present seed devices be added to fs_info::seed_list. So
> acquiring the lock here serves no purpose really.

  I tried to tests few times before. If any of the user initiated device
  operation could race with the mount thread. The ans was no. So the
  lock is not really required. But then there was mount and unmount
  racing bug, which was very strange to me. With that bug, if unmount
  wins the race, user initiated forget thread may free the devices. Now
  its fixed. So should be safe. But I am not too sure if that covers all
  the threads that could potentially race.

  So for now, its ok to consolidate the lock to
  sprout::device_list_mutex at least.

Thanks, Anand

> 
>>   }
>>   
>>   static u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
>>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 04/11] btrfs: reada: use sprout device_list_mutex
  2020-08-31 16:08   ` Josef Bacik
@ 2020-09-01  9:02     ` Anand Jain
  0 siblings, 0 replies; 38+ messages in thread
From: Anand Jain @ 2020-09-01  9:02 UTC (permalink / raw)
  To: Josef Bacik, linux-btrfs; +Cc: dsterba



On 1/9/20 12:08 am, Josef Bacik wrote:
> On 8/30/20 10:40 AM, Anand Jain wrote:
>> On an fs mounted using a sprout-device, the seed fs_devices are 
>> maintained
>> in a linked list under fs_info->fs_devices. Each seed's fs_devices also
>> have device_list_mutex initialized to protect against the potential race
>> with delete threads. But the delete thread (at btrfs_rm_device()) is 
>> holding
>> the fs_info::fs_devices::device_list_mutex mutex which is sprout's
>> device_list_mutex instead of seed's device_list_mutex. Moreover, there
>> aren't any significient benefits in using the seed::device_list_mutex
>> instead of sprout::device_list_mutex.
>>
>> So this patch converts them of using the seed::device_list_mutex to
>> sprout::device_list_mutex.
>>
>> Signed-off-by: Anand Jain <anand.jain@oracle.com>
> 
> This doesn't apply cleanly to misc-next as of this morning.  Thanks,

   Kdave renamed the function to reada_start_for_fsdevs() dropping
   the __ prefix. Rebased my misc-next. I am sending V2.

Thanks, Anand

> 
> Josef

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 01/11] btrfs: initialize sysfs devid and device link for seed device
  2020-08-31 16:21   ` Josef Bacik
@ 2020-09-01 16:16     ` Anand Jain
  0 siblings, 0 replies; 38+ messages in thread
From: Anand Jain @ 2020-09-01 16:16 UTC (permalink / raw)
  To: Josef Bacik, linux-btrfs; +Cc: dsterba



On 1/9/20 12:21 am, Josef Bacik wrote:
> On 8/30/20 10:40 AM, Anand Jain wrote:
>> The following test case leads to null kobject-being-freed error.
>>
>>   mount seed /mnt
>>   add sprout to /mnt
>>   umount /mnt
>>   mount sprout to /mnt
>>   delete seed
>>
>>   kobject: '(null)' (00000000dd2b87e4): is not initialized, yet 
>> kobject_put() is being called.
>>   WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350
>>   RIP: 0010:kobject_put+0x80/0x350
>>   ::
>>   Call Trace:
>>   btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs]
>>   btrfs_rm_device.cold+0xa8/0x298 [btrfs]
>>   btrfs_ioctl+0x206c/0x22a0 [btrfs]
>>   ksys_ioctl+0xe2/0x140
>>   __x64_sys_ioctl+0x1e/0x29
>>   do_syscall_64+0x96/0x150
>>   entry_SYSCALL_64_after_hwframe+0x44/0xa9
>>   RIP: 0033:0x7f4047c6288b
>>   ::
>>
>> This is because, at the end of the seed device-delete, we try to remove
>> the seed's devid sysfs entry. But for the seed devices under the sprout
>> fs, we don't initialize the devid kobject yet. So this patch initializes
>> the seed device devid kobject and the device link in the sysfs. This 
>> takes
>> care of the Warning.
>>
>> Signed-off-by: Anand Jain <anand.jain@oracle.com>
>> ---
> 
> Ok again, this will not work.
> 

  This comment was answered here [1] before.
    [1]  https://patchwork.kernel.org/patch/11729425/


  Again let me verify with the boilerplate code [2], which dumps
  btrfs_fs_devices and btrfs_device structures along with its
  memory address.
    [2] https://github.com/asj/bbp.git

> Mount a seed fs, you get fs_info->fs_devices pointed at the seed device, 
> and fs_info->fs_devices->devices_kobj is what is initialized.

Right.

$ mount /dev/sda /btrfs
mount: /btrfs: WARNING: device write-protected, mounted read-only.

Note our fs_devices is at 000000005c8322d4, which is paired with sda.

$ cat /proc/fs/btrfs/devlist | egrep "fsid|addr|device:|kobj"
[fsid: 938a1ff2-f493-4a9a-9ed3-3d2ace2f0fb2]
	fs_devs_addr:		000000005c8322d4 <-- fs_devices
	fsid_kobj_state:	1
	fsid_kobj_insysfs:	1
	kobj_state:		1
	kobj_insysfs:		1
		dev_addr:	0000000017aba761
		device:		/dev/sda
		devid_kobj:	1


> Now you sprout.


The relevant code snippets are as below.

-----------------------------
2344 static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)

2361         seed_devices = alloc_fs_devices(NULL, NULL);

2371         old_devices = clone_fs_devices(fs_devices);

2377         list_add(&old_devices->fs_list, &fs_uuids);

2379         memcpy(seed_devices, fs_devices, sizeof(*seed_devices));

2386         list_splice_init_rcu(&fs_devices->devices, 
&seed_devices->devices,
2387                               synchronize_rcu);

2396         list_add_tail(&seed_devices->seed_list, 
&fs_devices->seed_list);

2398         generate_random_uuid(fs_devices->fsid);
-----------------------------


> This does clone_fs_devices(fs_info->fs_devices), which doesn't copy over 
> fs_fdevices->devices_kobj.

  Right. Line 2371.

>  Now we take this clone device, and set 
> fs_info->fsdevices to the cloned fs_devices,

  Hm. No we just add it to the fs_uuids. So that user doesn't have
  to scan again to mount the seed device at a different mount point.
  Line 2377.

> and we add the original 
> fs_devices, which had the sysfs objects init'ed already, to 
> fs_devices->seed_list.

  No. Original fs_devices still remains with fs_info->fs_devices.
  Which moves _devices_ _only_  to seed_devices::devices
  Line 2386.

  Lets verify.

  $ btrfs dev add -f /dev/sdb /btrfs

  Our fs_devices was at 000000005c8322d4.

  And after device add, our fs_devices 000000005c8322d4 is pairing with
  the RW device sdb.

  And the original sda is moved under seed_devices.

  Ignore clone_fs_devices its of no use to our conversation here.
  Which goes away after btrfs dev scan --forget as its not mounted
  anymore.

  As shown below.

$ cat /proc/fs/btrfs/devlist | egrep "fsid|addr|device:|kobj"
[fsid: 938a1ff2-f493-4a9a-9ed3-3d2ace2f0fb2]
	fs_devs_addr:		00000000f0c691d1 <-- clone_fs_devices
	fsid_kobj_state:	0
	fsid_kobj_insysfs:	0
	kobj_state:		null
	kobj_insysfs:		null
		dev_addr:	000000006e24ab09
		device:		/dev/sda
		devid_kobj:	null
[fsid: 91294469-9c0e-45c7-8dc7-af3134ba8cf4]
	fs_devs_addr:		000000005c8322d4 <-- fs_devices
	fsid_kobj_state:	1
	fsid_kobj_insysfs:	1
	kobj_state:		1
	kobj_insysfs:		1
		dev_addr:	0000000039f25fa1
		device:		/dev/sdb
		devid_kobj:	1
	[[seed_fsid: 938a1ff2-f493-4a9a-9ed3-3d2ace2f0fb2]]
		sprout_fsid:		91294469-9c0e-45c7-8dc7-af3134ba8cf4
		fs_devs_addr:		000000005e19db25 <-- seed_devices
		fsid_kobj_state:	1
		fsid_kobj_insysfs:	1
		kobj_state:		1
		kobj_insysfs:		1
			dev_addr:	0000000017aba761
			device:		/dev/sda
			devid_kobj:	1


Clone_fs_devices is freed after forget

$ btrfs dev scan --forget

$ cat /proc/fs/btrfs/devlist | egrep "fsid|addr|device:|kobj"
[fsid: 91294469-9c0e-45c7-8dc7-af3134ba8cf4]
	fs_devs_addr:		000000005c8322d4  <-- fs_devices
	fsid_kobj_state:	1
	fsid_kobj_insysfs:	1
	kobj_state:		1
	kobj_insysfs:		1
		dev_addr:	0000000039f25fa1
		device:		/dev/sdb
		devid_kobj:		1
	[[seed_fsid: 938a1ff2-f493-4a9a-9ed3-3d2ace2f0fb2]]
		sprout_fsid:		91294469-9c0e-45c7-8dc7-af3134ba8cf4
		fs_devs_addr:		000000005e19db25 <-- seed_devcies
		fsid_kobj_state:	1
		fsid_kobj_insysfs:	1
		kobj_state:		1
		kobj_insysfs:		1
			dev_addr:	0000000017aba761
			device:		/dev/sda
			devid_kobj:		1



HTH

Anand

^ permalink raw reply	[flat|nested] 38+ messages in thread

end of thread, other threads:[~2020-09-01 16:16 UTC | newest]

Thread overview: 38+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-31  1:38 [PATCH 0/11] btrfs: seed fix null ptr, use only main device_list_mutex, and cleanups Anand Jain
2020-08-21 13:15 ` [PATCH RFC] btrfs/163: replace sprout instead of seed Anand Jain
2020-08-21 13:15   ` [PATCH 1/2] btrfs: initialize sysfs devid and device link for seed device Anand Jain
2020-08-21 13:15     ` [PATCH RFC 2/2] btrfs: fix replace of " Anand Jain
2020-08-21 14:38       ` Josef Bacik
2020-08-23 15:05         ` Anand Jain
2020-08-21 14:36     ` [PATCH 1/2] btrfs: initialize sysfs devid and device link for " Josef Bacik
2020-08-23 13:05       ` Anand Jain
2020-08-29 11:44     ` Anand Jain
2020-08-30 14:41   ` [PATCH] fstests: btrfs/163: replace sprout instead of seed Anand Jain
2020-08-30 14:40 ` Anand Jain
2020-08-30 14:40 ` [PATCH 01/11] btrfs: initialize sysfs devid and device link for seed device Anand Jain
2020-08-31  9:07   ` Nikolay Borisov
2020-08-31 12:00     ` Anand Jain
2020-08-31 16:21   ` Josef Bacik
2020-09-01 16:16     ` Anand Jain
2020-08-30 14:40 ` [PATCH 02/11] btrfs: refactor btrfs_sysfs_add_devices_dir Anand Jain
2020-08-30 14:40 ` [PATCH 03/11] btrfs: refactor btrfs_sysfs_remove_devices_dir Anand Jain
2020-08-31  8:58   ` Nikolay Borisov
2020-08-31  9:12     ` Anand Jain
2020-08-30 14:40 ` [PATCH 04/11] btrfs: reada: use sprout device_list_mutex Anand Jain
2020-08-31  8:54   ` Nikolay Borisov
2020-08-31 16:08   ` Josef Bacik
2020-09-01  9:02     ` Anand Jain
2020-08-30 14:41 ` [PATCH 05/11] btrfs: btrfs_init_devices_late: " Anand Jain
2020-08-31  8:37   ` Nikolay Borisov
2020-09-01  8:54     ` Anand Jain
2020-08-30 14:41 ` [PATCH 06/11] btrfs: open code list_head pointer in btrfs_init_dev_replace_tgtdev Anand Jain
2020-08-31  8:38   ` Nikolay Borisov
2020-08-30 14:41 ` [PATCH 07/11] btrfs: cleanup btrfs_remove_chunk Anand Jain
2020-08-31  8:43   ` Nikolay Borisov
2020-08-30 14:41 ` [PATCH 08/11] btrfs: cleanup btrfs_assign_next_active_device() Anand Jain
2020-08-31  8:44   ` Nikolay Borisov
2020-08-30 14:41 ` [PATCH 09/11] btrfs: cleanup unnecessary goto in open_seed_device Anand Jain
2020-08-31  8:44   ` Nikolay Borisov
2020-08-30 14:41 ` [PATCH 10/11] btrfs: btrfs_dev_replace_update_device_in_mapping_tree drop file global declare Anand Jain
2020-08-31  8:46   ` Nikolay Borisov
2020-08-30 14:41 ` [PATCH 11/11] btrfs: fix replace of seed device Anand Jain

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).