All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] md/bitmap: don't set sb values if can't pass sanity check
@ 2022-03-30 10:28 Heming Zhao
  2022-03-30 10:34 ` heming.zhao
  2022-03-31  7:06 ` Guoqing Jiang
  0 siblings, 2 replies; 4+ messages in thread
From: Heming Zhao @ 2022-03-30 10:28 UTC (permalink / raw)
  To: linux-raid, song, guoqing.jiang
  Cc: Heming Zhao, xni, kernel test robot, Dan Carpenter

If bitmap area contains invalid data, kernel will crash then mdadm
triggers "Segmentation fault".
This is cluster-md speical bug. In non-clustered env, mdadm will
handle broken metadata case. In clustered array, only kernel space
handles bitmap slot info. But even this bug only happened in clustered
env, current sanity check is wrong, the code should be changed.

How to trigger: (faulty injection)

dd if=/dev/zero bs=1M count=1 oflag=direct of=/dev/sda
dd if=/dev/zero bs=1M count=1 oflag=direct of=/dev/sdb
mdadm -C /dev/md0 -b clustered -e 1.2 -n 2 -l mirror /dev/sda /dev/sdb
mdadm -Ss
echo aaa > magic.txt
 == below modifying slot 2 bitmap data ==
dd if=magic.txt of=/dev/sda seek=16384 bs=1 count=3 <== destroy magic
dd if=/dev/zero of=/dev/sda seek=16436 bs=1 count=4 <== ZERO chunksize
mdadm -A /dev/md0 /dev/sda /dev/sdb
 == kernel crashes. mdadm outputs "Segmentation fault" ==

Crash log:

kernel: md: md0 stopped.
kernel: md/raid1:md0: not clean -- starting background reconstruction
kernel: md/raid1:md0: active with 2 out of 2 mirrors
kernel: dlm: ... ...
kernel: md-cluster: Joined cluster 44810aba-38bb-e6b8-daca-bc97a0b254aa slot 1
kernel: md0: invalid bitmap file superblock: bad magic
kernel: md_bitmap_copy_from_slot can't get bitmap from slot 2
kernel: md-cluster: Could not gather bitmaps from slot 2
kernel: divide error: 0000 [#1] SMP NOPTI
kernel: CPU: 0 PID: 1603 Comm: mdadm Not tainted 5.14.6-1-default
kernel: Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
kernel: RIP: 0010:md_bitmap_create+0x1d1/0x850 [md_mod]
kernel: RSP: 0018:ffffc22ac0843ba0 EFLAGS: 00010246
kernel: ... ...
kernel: Call Trace:
kernel:  ? dlm_lock_sync+0xd0/0xd0 [md_cluster 77fe..7a0]
kernel:  md_bitmap_copy_from_slot+0x2c/0x290 [md_mod 24ea..d3a]
kernel:  load_bitmaps+0xec/0x210 [md_cluster 77fe..7a0]
kernel:  md_bitmap_load+0x81/0x1e0 [md_mod 24ea..d3a]
kernel:  do_md_run+0x30/0x100 [md_mod 24ea..d3a]
kernel:  md_ioctl+0x1290/0x15a0 [md_mod 24ea....d3a]
kernel:  ? mddev_unlock+0xaa/0x130 [md_mod 24ea..d3a]
kernel:  ? blkdev_ioctl+0xb1/0x2b0
kernel:  block_ioctl+0x3b/0x40
kernel:  __x64_sys_ioctl+0x7f/0xb0
kernel:  do_syscall_64+0x59/0x80
kernel:  ? exit_to_user_mode_prepare+0x1ab/0x230
kernel:  ? syscall_exit_to_user_mode+0x18/0x40
kernel:  ? do_syscall_64+0x69/0x80
kernel:  entry_SYSCALL_64_after_hwframe+0x44/0xae
kernel: RIP: 0033:0x7f4a15fa722b
kernel: ... ...
kernel: ---[ end trace 8afa7612f559c868 ]---
kernel: RIP: 0010:md_bitmap_create+0x1d1/0x850 [md_mod]

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Heming Zhao <heming.zhao@suse.com>
---
v3: * fixed "uninitialized symbol" error which reported by kbuild robot.
v2: * revise commit log
      - change mdadm "FPE" error to "Segmentation fault" error
        ("FPE" belongs to another issue)
      - add kernel crash log
    * modify a comment style to follow code rule
    * change strlcpy to strscpy for strlcpy is marked as deprecated in
      Documentation/process/deprecated.rst
      - note: strlcpy() still exists in md.c & md-cluster.c
---
 drivers/md/md-bitmap.c | 46 ++++++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index bfd6026d7809..c198a83c9361 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -639,14 +639,6 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
 	daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
 	write_behind = le32_to_cpu(sb->write_behind);
 	sectors_reserved = le32_to_cpu(sb->sectors_reserved);
-	/* Setup nodes/clustername only if bitmap version is
-	 * cluster-compatible
-	 */
-	if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
-		nodes = le32_to_cpu(sb->nodes);
-		strlcpy(bitmap->mddev->bitmap_info.cluster_name,
-				sb->cluster_name, 64);
-	}
 
 	/* verify that the bitmap-specific fields are valid */
 	if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
@@ -668,6 +660,16 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
 		goto out;
 	}
 
+	/*
+	 * Setup nodes/clustername only if bitmap version is
+	 * cluster-compatible
+	 */
+	if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
+		nodes = le32_to_cpu(sb->nodes);
+		strscpy(bitmap->mddev->bitmap_info.cluster_name,
+				sb->cluster_name, 64);
+	}
+
 	/* keep the array size field of the bitmap superblock up to date */
 	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
 
@@ -695,14 +697,14 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
 	if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
 		set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
 	bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
-	strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
+	strscpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
 	err = 0;
 
 out:
 	kunmap_atomic(sb);
-	/* Assigning chunksize is required for "re_read" */
-	bitmap->mddev->bitmap_info.chunksize = chunksize;
 	if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
+		/* Assigning chunksize is required for "re_read" */
+		bitmap->mddev->bitmap_info.chunksize = chunksize;
 		err = md_setup_cluster(bitmap->mddev, nodes);
 		if (err) {
 			pr_warn("%s: Could not setup cluster service (%d)\n",
@@ -713,18 +715,18 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
 		goto re_read;
 	}
 
-
 out_no_sb:
-	if (test_bit(BITMAP_STALE, &bitmap->flags))
-		bitmap->events_cleared = bitmap->mddev->events;
-	bitmap->mddev->bitmap_info.chunksize = chunksize;
-	bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
-	bitmap->mddev->bitmap_info.max_write_behind = write_behind;
-	bitmap->mddev->bitmap_info.nodes = nodes;
-	if (bitmap->mddev->bitmap_info.space == 0 ||
-	    bitmap->mddev->bitmap_info.space > sectors_reserved)
-		bitmap->mddev->bitmap_info.space = sectors_reserved;
-	if (err) {
+	if (err == 0) {
+		if (test_bit(BITMAP_STALE, &bitmap->flags))
+			bitmap->events_cleared = bitmap->mddev->events;
+		bitmap->mddev->bitmap_info.chunksize = chunksize;
+		bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
+		bitmap->mddev->bitmap_info.max_write_behind = write_behind;
+		bitmap->mddev->bitmap_info.nodes = nodes;
+		if (bitmap->mddev->bitmap_info.space == 0 ||
+			bitmap->mddev->bitmap_info.space > sectors_reserved)
+			bitmap->mddev->bitmap_info.space = sectors_reserved;
+	} else {
 		md_bitmap_print_sb(bitmap);
 		if (bitmap->cluster_slot < 0)
 			md_cluster_stop(bitmap->mddev);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] md/bitmap: don't set sb values if can't pass sanity check
  2022-03-30 10:28 [PATCH v3] md/bitmap: don't set sb values if can't pass sanity check Heming Zhao
@ 2022-03-30 10:34 ` heming.zhao
  2022-03-31  7:06 ` Guoqing Jiang
  1 sibling, 0 replies; 4+ messages in thread
From: heming.zhao @ 2022-03-30 10:34 UTC (permalink / raw)
  To: linux-raid, song, guoqing.jiang; +Cc: xni

sorry for the mistake in v2 patch.
I had checked codes & tested module ko many times before sending v3.

- Heming

On 3/30/22 18:28, Heming Zhao wrote:
> If bitmap area contains invalid data, kernel will crash then mdadm
> triggers "Segmentation fault".
> This is cluster-md speical bug. In non-clustered env, mdadm will
> handle broken metadata case. In clustered array, only kernel space
> handles bitmap slot info. But even this bug only happened in clustered
> env, current sanity check is wrong, the code should be changed.
>


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] md/bitmap: don't set sb values if can't pass sanity check
  2022-03-30 10:28 [PATCH v3] md/bitmap: don't set sb values if can't pass sanity check Heming Zhao
  2022-03-30 10:34 ` heming.zhao
@ 2022-03-31  7:06 ` Guoqing Jiang
  2022-03-31  8:24   ` heming.zhao
  1 sibling, 1 reply; 4+ messages in thread
From: Guoqing Jiang @ 2022-03-31  7:06 UTC (permalink / raw)
  To: Heming Zhao, linux-raid, song; +Cc: xni, kernel test robot, Dan Carpenter



On 3/30/22 6:28 PM, Heming Zhao wrote:
> If bitmap area contains invalid data, kernel will crash then mdadm
> triggers "Segmentation fault".
> This is cluster-md speical bug. In non-clustered env, mdadm will
> handle broken metadata case. In clustered array, only kernel space
> handles bitmap slot info. But even this bug only happened in clustered
> env, current sanity check is wrong, the code should be changed.
>
> How to trigger: (faulty injection)
>
> dd if=/dev/zero bs=1M count=1 oflag=direct of=/dev/sda
> dd if=/dev/zero bs=1M count=1 oflag=direct of=/dev/sdb
> mdadm -C /dev/md0 -b clustered -e 1.2 -n 2 -l mirror /dev/sda /dev/sdb
> mdadm -Ss
> echo aaa > magic.txt
>   == below modifying slot 2 bitmap data ==
> dd if=magic.txt of=/dev/sda seek=16384 bs=1 count=3 <== destroy magic
> dd if=/dev/zero of=/dev/sda seek=16436 bs=1 count=4 <== ZERO chunksize
> mdadm -A /dev/md0 /dev/sda /dev/sdb
>   == kernel crashes. mdadm outputs "Segmentation fault" ==
>
> Crash log:
>
> kernel: md: md0 stopped.
> kernel: md/raid1:md0: not clean -- starting background reconstruction
> kernel: md/raid1:md0: active with 2 out of 2 mirrors
> kernel: dlm: ... ...
> kernel: md-cluster: Joined cluster 44810aba-38bb-e6b8-daca-bc97a0b254aa slot 1
> kernel: md0: invalid bitmap file superblock: bad magic
> kernel: md_bitmap_copy_from_slot can't get bitmap from slot 2
> kernel: md-cluster: Could not gather bitmaps from slot 2
> kernel: divide error: 0000 [#1] SMP NOPTI
> kernel: CPU: 0 PID: 1603 Comm: mdadm Not tainted 5.14.6-1-default
> kernel: Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
> kernel: RIP: 0010:md_bitmap_create+0x1d1/0x850 [md_mod]
> kernel: RSP: 0018:ffffc22ac0843ba0 EFLAGS: 00010246
> kernel: ... ...
> kernel: Call Trace:
> kernel:  ? dlm_lock_sync+0xd0/0xd0 [md_cluster 77fe..7a0]
> kernel:  md_bitmap_copy_from_slot+0x2c/0x290 [md_mod 24ea..d3a]
> kernel:  load_bitmaps+0xec/0x210 [md_cluster 77fe..7a0]
> kernel:  md_bitmap_load+0x81/0x1e0 [md_mod 24ea..d3a]
> kernel:  do_md_run+0x30/0x100 [md_mod 24ea..d3a]
> kernel:  md_ioctl+0x1290/0x15a0 [md_mod 24ea....d3a]
> kernel:  ? mddev_unlock+0xaa/0x130 [md_mod 24ea..d3a]
> kernel:  ? blkdev_ioctl+0xb1/0x2b0
> kernel:  block_ioctl+0x3b/0x40
> kernel:  __x64_sys_ioctl+0x7f/0xb0
> kernel:  do_syscall_64+0x59/0x80
> kernel:  ? exit_to_user_mode_prepare+0x1ab/0x230
> kernel:  ? syscall_exit_to_user_mode+0x18/0x40
> kernel:  ? do_syscall_64+0x69/0x80
> kernel:  entry_SYSCALL_64_after_hwframe+0x44/0xae
> kernel: RIP: 0033:0x7f4a15fa722b
> kernel: ... ...
> kernel: ---[ end trace 8afa7612f559c868 ]---
> kernel: RIP: 0010:md_bitmap_create+0x1d1/0x850 [md_mod]

l *md_bitmap_create+0x1d1
0x3a81 is in md_bitmap_create (drivers/md/md-bitmap.c:609).
604     re_read:
605             /* If cluster_slot is set, the cluster is setup */
606             if (bitmap->cluster_slot >= 0) {
607                     sector_t bm_blocks = 
bitmap->mddev->resync_max_sectors;
608
609                     bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks,
610 (bitmap->mddev->bitmap_info.chunksize >> 9));

Please add something to header like "because the chunksize is zero, which
caused the kernel crash".

> Reported-by: kernel test robot <lkp@intel.com>
> Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
> Signed-off-by: Heming Zhao <heming.zhao@suse.com>
> ---
> v3: * fixed "uninitialized symbol" error which reported by kbuild robot.
> v2: * revise commit log
>        - change mdadm "FPE" error to "Segmentation fault" error
>          ("FPE" belongs to another issue)
>        - add kernel crash log
>      * modify a comment style to follow code rule
>      * change strlcpy to strscpy for strlcpy is marked as deprecated in
>        Documentation/process/deprecated.rst
>        - note: strlcpy() still exists in md.c & md-cluster.c
> ---
>   drivers/md/md-bitmap.c | 46 ++++++++++++++++++++++--------------------
>   1 file changed, 24 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
> index bfd6026d7809..c198a83c9361 100644
> --- a/drivers/md/md-bitmap.c
> +++ b/drivers/md/md-bitmap.c
> @@ -639,14 +639,6 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
>   	daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
>   	write_behind = le32_to_cpu(sb->write_behind);
>   	sectors_reserved = le32_to_cpu(sb->sectors_reserved);
> -	/* Setup nodes/clustername only if bitmap version is
> -	 * cluster-compatible
> -	 */
> -	if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
> -		nodes = le32_to_cpu(sb->nodes);
> -		strlcpy(bitmap->mddev->bitmap_info.cluster_name,
> -				sb->cluster_name, 64);
> -	}
>   
>   	/* verify that the bitmap-specific fields are valid */
>   	if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
> @@ -668,6 +660,16 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
>   		goto out;
>   	}
>   
> +	/*
> +	 * Setup nodes/clustername only if bitmap version is
> +	 * cluster-compatible
> +	 */
> +	if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
> +		nodes = le32_to_cpu(sb->nodes);
> +		strscpy(bitmap->mddev->bitmap_info.cluster_name,
> +				sb->cluster_name, 64);
> +	}
> +
>   	/* keep the array size field of the bitmap superblock up to date */
>   	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
>   
> @@ -695,14 +697,14 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
>   	if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
>   		set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
>   	bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
> -	strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
> +	strscpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);

I feel we don't need copy cluster_name twice, pls double check and send 
additional
patch to remove one "strscpy(*cluster_name* )" if my feeling is correct.

>   	err = 0;
>   
>   out:
>   	kunmap_atomic(sb);
> -	/* Assigning chunksize is required for "re_read" */
> -	bitmap->mddev->bitmap_info.chunksize = chunksize;
>   	if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
> +		/* Assigning chunksize is required for "re_read" */
> +		bitmap->mddev->bitmap_info.chunksize = chunksize;
>   		err = md_setup_cluster(bitmap->mddev, nodes);
>   		if (err) {
>   			pr_warn("%s: Could not setup cluster service (%d)\n",
> @@ -713,18 +715,18 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
>   		goto re_read;
>   	}
>   
> -
>   out_no_sb:
> -	if (test_bit(BITMAP_STALE, &bitmap->flags))
> -		bitmap->events_cleared = bitmap->mddev->events;
> -	bitmap->mddev->bitmap_info.chunksize = chunksize;
> -	bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
> -	bitmap->mddev->bitmap_info.max_write_behind = write_behind;
> -	bitmap->mddev->bitmap_info.nodes = nodes;
> -	if (bitmap->mddev->bitmap_info.space == 0 ||
> -	    bitmap->mddev->bitmap_info.space > sectors_reserved)
> -		bitmap->mddev->bitmap_info.space = sectors_reserved;
> -	if (err) {
> +	if (err == 0) {
> +		if (test_bit(BITMAP_STALE, &bitmap->flags))
> +			bitmap->events_cleared = bitmap->mddev->events;
> +		bitmap->mddev->bitmap_info.chunksize = chunksize;
> +		bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
> +		bitmap->mddev->bitmap_info.max_write_behind = write_behind;
> +		bitmap->mddev->bitmap_info.nodes = nodes;
> +		if (bitmap->mddev->bitmap_info.space == 0 ||
> +			bitmap->mddev->bitmap_info.space > sectors_reserved)
> +			bitmap->mddev->bitmap_info.space = sectors_reserved;
> +	} else {
>   		md_bitmap_print_sb(bitmap);
>   		if (bitmap->cluster_slot < 0)
>   			md_cluster_stop(bitmap->mddev);

Acked-by: Guoqing Jiang <guoqing.jiang@linux.dev>

Thanks,
Guoqing

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] md/bitmap: don't set sb values if can't pass sanity check
  2022-03-31  7:06 ` Guoqing Jiang
@ 2022-03-31  8:24   ` heming.zhao
  0 siblings, 0 replies; 4+ messages in thread
From: heming.zhao @ 2022-03-31  8:24 UTC (permalink / raw)
  To: Guoqing Jiang, linux-raid, song; +Cc: xni, kernel test robot, Dan Carpenter

On 3/31/22 15:06, Guoqing Jiang wrote:
> 
> 
> On 3/30/22 6:28 PM, Heming Zhao wrote:
>> If bitmap area contains invalid data, kernel will crash then mdadm
>> triggers "Segmentation fault".
>> This is cluster-md speical bug. In non-clustered env, mdadm will
>> handle broken metadata case. In clustered array, only kernel space
>> handles bitmap slot info. But even this bug only happened in clustered
>> env, current sanity check is wrong, the code should be changed.
>>
>> How to trigger: (faulty injection)
>>
>> dd if=/dev/zero bs=1M count=1 oflag=direct of=/dev/sda
>> dd if=/dev/zero bs=1M count=1 oflag=direct of=/dev/sdb
>> mdadm -C /dev/md0 -b clustered -e 1.2 -n 2 -l mirror /dev/sda /dev/sdb
>> mdadm -Ss
>> echo aaa > magic.txt
>>   == below modifying slot 2 bitmap data ==
>> dd if=magic.txt of=/dev/sda seek=16384 bs=1 count=3 <== destroy magic
>> dd if=/dev/zero of=/dev/sda seek=16436 bs=1 count=4 <== ZERO chunksize
>> mdadm -A /dev/md0 /dev/sda /dev/sdb
>>   == kernel crashes. mdadm outputs "Segmentation fault" ==
>>
>> Crash log:
>>
>> kernel: md: md0 stopped.
>> kernel: md/raid1:md0: not clean -- starting background reconstruction
>> kernel: md/raid1:md0: active with 2 out of 2 mirrors
>> kernel: dlm: ... ...
>> kernel: md-cluster: Joined cluster 44810aba-38bb-e6b8-daca-bc97a0b254aa slot 1
>> kernel: md0: invalid bitmap file superblock: bad magic
>> kernel: md_bitmap_copy_from_slot can't get bitmap from slot 2
>> kernel: md-cluster: Could not gather bitmaps from slot 2
>> kernel: divide error: 0000 [#1] SMP NOPTI
>> kernel: CPU: 0 PID: 1603 Comm: mdadm Not tainted 5.14.6-1-default
>> kernel: Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
>> kernel: RIP: 0010:md_bitmap_create+0x1d1/0x850 [md_mod]
>> kernel: RSP: 0018:ffffc22ac0843ba0 EFLAGS: 00010246
>> kernel: ... ...
>> kernel: Call Trace:
>> kernel:  ? dlm_lock_sync+0xd0/0xd0 [md_cluster 77fe..7a0]
>> kernel:  md_bitmap_copy_from_slot+0x2c/0x290 [md_mod 24ea..d3a]
>> kernel:  load_bitmaps+0xec/0x210 [md_cluster 77fe..7a0]
>> kernel:  md_bitmap_load+0x81/0x1e0 [md_mod 24ea..d3a]
>> kernel:  do_md_run+0x30/0x100 [md_mod 24ea..d3a]
>> kernel:  md_ioctl+0x1290/0x15a0 [md_mod 24ea....d3a]
>> kernel:  ? mddev_unlock+0xaa/0x130 [md_mod 24ea..d3a]
>> kernel:  ? blkdev_ioctl+0xb1/0x2b0
>> kernel:  block_ioctl+0x3b/0x40
>> kernel:  __x64_sys_ioctl+0x7f/0xb0
>> kernel:  do_syscall_64+0x59/0x80
>> kernel:  ? exit_to_user_mode_prepare+0x1ab/0x230
>> kernel:  ? syscall_exit_to_user_mode+0x18/0x40
>> kernel:  ? do_syscall_64+0x69/0x80
>> kernel:  entry_SYSCALL_64_after_hwframe+0x44/0xae
>> kernel: RIP: 0033:0x7f4a15fa722b
>> kernel: ... ...
>> kernel: ---[ end trace 8afa7612f559c868 ]---
>> kernel: RIP: 0010:md_bitmap_create+0x1d1/0x850 [md_mod]
> 
> l *md_bitmap_create+0x1d1
> 0x3a81 is in md_bitmap_create (drivers/md/md-bitmap.c:609).
> 604     re_read:
> 605             /* If cluster_slot is set, the cluster is setup */
> 606             if (bitmap->cluster_slot >= 0) {
> 607                     sector_t bm_blocks = bitmap->mddev->resync_max_sectors;
> 608
> 609                     bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks,
> 610 (bitmap->mddev->bitmap_info.chunksize >> 9));
> 
> Please add something to header like "because the chunksize is zero, which
> caused the kernel crash".

OK

> 
>> Reported-by: kernel test robot <lkp@intel.com>
>> Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
>> Signed-off-by: Heming Zhao <heming.zhao@suse.com>
>> ---
>> v3: * fixed "uninitialized symbol" error which reported by kbuild robot.
>> v2: * revise commit log
>>        - change mdadm "FPE" error to "Segmentation fault" error
>>          ("FPE" belongs to another issue)
>>        - add kernel crash log
>>      * modify a comment style to follow code rule
>>      * change strlcpy to strscpy for strlcpy is marked as deprecated in
>>        Documentation/process/deprecated.rst
>>        - note: strlcpy() still exists in md.c & md-cluster.c
>> ---
>>   drivers/md/md-bitmap.c | 46 ++++++++++++++++++++++--------------------
>>   1 file changed, 24 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
>> index bfd6026d7809..c198a83c9361 100644
>> --- a/drivers/md/md-bitmap.c
>> +++ b/drivers/md/md-bitmap.c
>> @@ -639,14 +639,6 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
>>       daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
>>       write_behind = le32_to_cpu(sb->write_behind);
>>       sectors_reserved = le32_to_cpu(sb->sectors_reserved);
>> -    /* Setup nodes/clustername only if bitmap version is
>> -     * cluster-compatible
>> -     */
>> -    if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
>> -        nodes = le32_to_cpu(sb->nodes);
>> -        strlcpy(bitmap->mddev->bitmap_info.cluster_name,
>> -                sb->cluster_name, 64);
>> -    }
>>       /* verify that the bitmap-specific fields are valid */
>>       if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
>> @@ -668,6 +660,16 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
>>           goto out;
>>       }
>> +    /*
>> +     * Setup nodes/clustername only if bitmap version is
>> +     * cluster-compatible
>> +     */
>> +    if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
>> +        nodes = le32_to_cpu(sb->nodes);
>> +        strscpy(bitmap->mddev->bitmap_info.cluster_name,
>> +                sb->cluster_name, 64);
>> +    }
>> +
>>       /* keep the array size field of the bitmap superblock up to date */
>>       sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
>> @@ -695,14 +697,14 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
>>       if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
>>           set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
>>       bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
>> -    strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
>> +    strscpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
> 
> I feel we don't need copy cluster_name twice, pls double check and send additional
> patch to remove one "strscpy(*cluster_name* )" if my feeling is correct.

cf921cc19cf7c ("Add node recovery callbacks") introduced the first usage of strlcpy().



b97e92574c0bf ("Use separate bitmaps for each nodes in the cluster") introduced the second strlcpy().
from this time, the two strlcpy() were same, we can remove anyone of them safely. But see next commit



d3b178adb3a3a ("md: Skip cluster setup for dm-raid") added dm-raid special handling.

and the "nodes" value is the key of this patch. but from this patch, strlcpy() which was introduced
by b97e92574c0bf become necessary.



3c462c880b52a ("md: Increment version for clustered bitmaps") used clustered major version to only
handle in clustered env. this patch could look a polish for clustered code.



(my v3 patch changed strlcpy() to strscpy() for deprecated API.)

so strlcpy() from cf921cc19cf7c became useless after d3b178adb3a3a, we could remove it safely.

IMO, it makes sense to remove the duplicated strcpy().
I will send two patches for v4:
- for the new comment
- for strlcpy => strscpy & removing duplicate.

Thanks for your great comments

- Heming

> 
>>       err = 0;
>>   out:
>>       kunmap_atomic(sb);
>> -    /* Assigning chunksize is required for "re_read" */
>> -    bitmap->mddev->bitmap_info.chunksize = chunksize;
>>       if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
>> +        /* Assigning chunksize is required for "re_read" */
>> +        bitmap->mddev->bitmap_info.chunksize = chunksize;
>>           err = md_setup_cluster(bitmap->mddev, nodes);
>>           if (err) {
>>               pr_warn("%s: Could not setup cluster service (%d)\n",
>> @@ -713,18 +715,18 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
>>           goto re_read;
>>       }
>> -
>>   out_no_sb:
>> -    if (test_bit(BITMAP_STALE, &bitmap->flags))
>> -        bitmap->events_cleared = bitmap->mddev->events;
>> -    bitmap->mddev->bitmap_info.chunksize = chunksize;
>> -    bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
>> -    bitmap->mddev->bitmap_info.max_write_behind = write_behind;
>> -    bitmap->mddev->bitmap_info.nodes = nodes;
>> -    if (bitmap->mddev->bitmap_info.space == 0 ||
>> -        bitmap->mddev->bitmap_info.space > sectors_reserved)
>> -        bitmap->mddev->bitmap_info.space = sectors_reserved;
>> -    if (err) {
>> +    if (err == 0) {
>> +        if (test_bit(BITMAP_STALE, &bitmap->flags))
>> +            bitmap->events_cleared = bitmap->mddev->events;
>> +        bitmap->mddev->bitmap_info.chunksize = chunksize;
>> +        bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
>> +        bitmap->mddev->bitmap_info.max_write_behind = write_behind;
>> +        bitmap->mddev->bitmap_info.nodes = nodes;
>> +        if (bitmap->mddev->bitmap_info.space == 0 ||
>> +            bitmap->mddev->bitmap_info.space > sectors_reserved)
>> +            bitmap->mddev->bitmap_info.space = sectors_reserved;
>> +    } else {
>>           md_bitmap_print_sb(bitmap);
>>           if (bitmap->cluster_slot < 0)
>>               md_cluster_stop(bitmap->mddev);
> 
> Acked-by: Guoqing Jiang <guoqing.jiang@linux.dev>
> 
> Thanks,
> Guoqing
> 


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-03-31  8:25 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-30 10:28 [PATCH v3] md/bitmap: don't set sb values if can't pass sanity check Heming Zhao
2022-03-30 10:34 ` heming.zhao
2022-03-31  7:06 ` Guoqing Jiang
2022-03-31  8:24   ` heming.zhao

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.