All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
@ 2023-04-17 14:01 Li Xiao Keng
  2023-04-23  1:30 ` Li Xiao Keng
  2023-05-08 13:35 ` Martin Wilck
  0 siblings, 2 replies; 13+ messages in thread
From: Li Xiao Keng @ 2023-04-17 14:01 UTC (permalink / raw)
  To: jes, mwilck, pmenzel, colyli, linux-raid
  Cc: miaoguanqin, louhongxiang, Li Xiao Keng

When we add a new disk to a raid, it may return -EBUSY.

The main process of --add:
1. dev_open
2. store_super1(st, di->fd) in write_init_super1
3. fsync(di->fd) in write_init_super1
4. close(di->fd)
5. ioctl(ADD_NEW_DISK)

However, there will be some udev(change) event after step4. Then
"/usr/sbin/mdadm --incremental ..." will be run, and the new disk
will be add to md device. After that, ioctl will return -EBUSY.

Here we add map_lock before write_init_super in "mdadm --add"
to fix this race.

Signed-off-by: Li Xiao Keng <lixiaokeng@huawei.com>
Signed-off-by: Guanqin Miao <miaoguanqin@huawei.com>
---
 Assemble.c |  5 ++++-
 Manage.c   | 25 +++++++++++++++++--------
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/Assemble.c b/Assemble.c
index 49804941..086890ed 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -1479,8 +1479,11 @@ try_again:
 	 * to our list.  We flag them so that we don't try to re-add,
 	 * but can remove if they turn out to not be wanted.
 	 */
-	if (map_lock(&map))
+	if (map_lock(&map)) {
 		pr_err("failed to get exclusive lock on mapfile - continue anyway...\n");
+		return 1;
+	}
+
 	if (c->update == UOPT_UUID)
 		mp = NULL;
 	else
diff --git a/Manage.c b/Manage.c
index f54de7c6..6a101bae 100644
--- a/Manage.c
+++ b/Manage.c
@@ -703,6 +703,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 	struct supertype *dev_st;
 	int j;
 	mdu_disk_info_t disc;
+	struct map_ent *map = NULL;
 
 	if (!get_dev_size(tfd, dv->devname, &ldsize)) {
 		if (dv->disposition == 'M')
@@ -900,6 +901,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 		disc.raid_disk = 0;
 	}
 
+	if (map_lock(&map)) {
+		pr_err("failed to get exclusive lock on mapfile when add disk\n");
+		return -1;
+	}
 	if (array->not_persistent==0) {
 		int dfd;
 		if (dv->disposition == 'j')
@@ -911,9 +916,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 		dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
 		if (tst->ss->add_to_super(tst, &disc, dfd,
 					  dv->devname, INVALID_SECTORS))
-			return -1;
+			goto unlock;
 		if (tst->ss->write_init_super(tst))
-			return -1;
+			goto unlock;
 	} else if (dv->disposition == 'A') {
 		/*  this had better be raid1.
 		 * As we are "--re-add"ing we must find a spare slot
@@ -971,14 +976,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 			pr_err("add failed for %s: could not get exclusive access to container\n",
 			       dv->devname);
 			tst->ss->free_super(tst);
-			return -1;
+			goto unlock;
 		}
 
 		/* Check if metadata handler is able to accept the drive */
 		if (!tst->ss->validate_geometry(tst, LEVEL_CONTAINER, 0, 1, NULL,
 		    0, 0, dv->devname, NULL, 0, 1)) {
 			close(container_fd);
-			return -1;
+			goto unlock;
 		}
 
 		Kill(dv->devname, NULL, 0, -1, 0);
@@ -987,7 +992,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 					  dv->devname, INVALID_SECTORS)) {
 			close(dfd);
 			close(container_fd);
-			return -1;
+			goto unlock;
 		}
 		if (!mdmon_running(tst->container_devnm))
 			tst->ss->sync_metadata(tst);
@@ -998,7 +1003,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 			       dv->devname);
 			close(container_fd);
 			tst->ss->free_super(tst);
-			return -1;
+			goto unlock;
 		}
 		sra->array.level = LEVEL_CONTAINER;
 		/* Need to set data_offset and component_size */
@@ -1013,7 +1018,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 			pr_err("add new device to external metadata failed for %s\n", dv->devname);
 			close(container_fd);
 			sysfs_free(sra);
-			return -1;
+			goto unlock;
 		}
 		ping_monitor(devnm);
 		sysfs_free(sra);
@@ -1027,7 +1032,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 			else
 				pr_err("add new device failed for %s as %d: %s\n",
 				       dv->devname, j, strerror(errno));
-			return -1;
+			goto unlock;
 		}
 		if (dv->disposition == 'j') {
 			pr_err("Journal added successfully, making %s read-write\n", devname);
@@ -1038,7 +1043,11 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 	}
 	if (verbose >= 0)
 		pr_err("added %s\n", dv->devname);
+	map_unlock(&map);
 	return 1;
+unlock:
+	map_unlock(&map);
+	return -1;
 }
 
 int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
-- 
2.33.0


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
  2023-04-17 14:01 [PATCH] Fix race of "mdadm --add" and "mdadm --incremental" Li Xiao Keng
@ 2023-04-23  1:30 ` Li Xiao Keng
  2023-05-08  1:30   ` Li Xiao Keng
  2023-05-08 13:35 ` Martin Wilck
  1 sibling, 1 reply; 13+ messages in thread
From: Li Xiao Keng @ 2023-04-23  1:30 UTC (permalink / raw)
  To: jes, mwilck, pmenzel, colyli, linux-raid; +Cc: miaoguanqin, louhongxiang

ping

On 2023/4/17 22:01, Li Xiao Keng wrote:
> When we add a new disk to a raid, it may return -EBUSY.
> 
> The main process of --add:
> 1. dev_open
> 2. store_super1(st, di->fd) in write_init_super1
> 3. fsync(di->fd) in write_init_super1
> 4. close(di->fd)
> 5. ioctl(ADD_NEW_DISK)
> 
> However, there will be some udev(change) event after step4. Then
> "/usr/sbin/mdadm --incremental ..." will be run, and the new disk
> will be add to md device. After that, ioctl will return -EBUSY.
> 
> Here we add map_lock before write_init_super in "mdadm --add"
> to fix this race.
> 
> Signed-off-by: Li Xiao Keng <lixiaokeng@huawei.com>
> Signed-off-by: Guanqin Miao <miaoguanqin@huawei.com>
> ---
>  Assemble.c |  5 ++++-
>  Manage.c   | 25 +++++++++++++++++--------
>  2 files changed, 21 insertions(+), 9 deletions(-)
> 
> diff --git a/Assemble.c b/Assemble.c
> index 49804941..086890ed 100644
> --- a/Assemble.c
> +++ b/Assemble.c
> @@ -1479,8 +1479,11 @@ try_again:
>  	 * to our list.  We flag them so that we don't try to re-add,
>  	 * but can remove if they turn out to not be wanted.
>  	 */
> -	if (map_lock(&map))
> +	if (map_lock(&map)) {
>  		pr_err("failed to get exclusive lock on mapfile - continue anyway...\n");
> +		return 1;
> +	}
> +
>  	if (c->update == UOPT_UUID)
>  		mp = NULL;
>  	else
> diff --git a/Manage.c b/Manage.c
> index f54de7c6..6a101bae 100644
> --- a/Manage.c
> +++ b/Manage.c
> @@ -703,6 +703,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  	struct supertype *dev_st;
>  	int j;
>  	mdu_disk_info_t disc;
> +	struct map_ent *map = NULL;
>  
>  	if (!get_dev_size(tfd, dv->devname, &ldsize)) {
>  		if (dv->disposition == 'M')
> @@ -900,6 +901,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  		disc.raid_disk = 0;
>  	}
>  
> +	if (map_lock(&map)) {
> +		pr_err("failed to get exclusive lock on mapfile when add disk\n");
> +		return -1;
> +	}
>  	if (array->not_persistent==0) {
>  		int dfd;
>  		if (dv->disposition == 'j')
> @@ -911,9 +916,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  		dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
>  		if (tst->ss->add_to_super(tst, &disc, dfd,
>  					  dv->devname, INVALID_SECTORS))
> -			return -1;
> +			goto unlock;
>  		if (tst->ss->write_init_super(tst))
> -			return -1;
> +			goto unlock;
>  	} else if (dv->disposition == 'A') {
>  		/*  this had better be raid1.
>  		 * As we are "--re-add"ing we must find a spare slot
> @@ -971,14 +976,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  			pr_err("add failed for %s: could not get exclusive access to container\n",
>  			       dv->devname);
>  			tst->ss->free_super(tst);
> -			return -1;
> +			goto unlock;
>  		}
>  
>  		/* Check if metadata handler is able to accept the drive */
>  		if (!tst->ss->validate_geometry(tst, LEVEL_CONTAINER, 0, 1, NULL,
>  		    0, 0, dv->devname, NULL, 0, 1)) {
>  			close(container_fd);
> -			return -1;
> +			goto unlock;
>  		}
>  
>  		Kill(dv->devname, NULL, 0, -1, 0);
> @@ -987,7 +992,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  					  dv->devname, INVALID_SECTORS)) {
>  			close(dfd);
>  			close(container_fd);
> -			return -1;
> +			goto unlock;
>  		}
>  		if (!mdmon_running(tst->container_devnm))
>  			tst->ss->sync_metadata(tst);
> @@ -998,7 +1003,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  			       dv->devname);
>  			close(container_fd);
>  			tst->ss->free_super(tst);
> -			return -1;
> +			goto unlock;
>  		}
>  		sra->array.level = LEVEL_CONTAINER;
>  		/* Need to set data_offset and component_size */
> @@ -1013,7 +1018,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  			pr_err("add new device to external metadata failed for %s\n", dv->devname);
>  			close(container_fd);
>  			sysfs_free(sra);
> -			return -1;
> +			goto unlock;
>  		}
>  		ping_monitor(devnm);
>  		sysfs_free(sra);
> @@ -1027,7 +1032,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  			else
>  				pr_err("add new device failed for %s as %d: %s\n",
>  				       dv->devname, j, strerror(errno));
> -			return -1;
> +			goto unlock;
>  		}
>  		if (dv->disposition == 'j') {
>  			pr_err("Journal added successfully, making %s read-write\n", devname);
> @@ -1038,7 +1043,11 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>  	}
>  	if (verbose >= 0)
>  		pr_err("added %s\n", dv->devname);
> +	map_unlock(&map);
>  	return 1;
> +unlock:
> +	map_unlock(&map);
> +	return -1;
>  }
>  
>  int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
  2023-04-23  1:30 ` Li Xiao Keng
@ 2023-05-08  1:30   ` Li Xiao Keng
  2023-05-08  1:40     ` Coly Li
  0 siblings, 1 reply; 13+ messages in thread
From: Li Xiao Keng @ 2023-05-08  1:30 UTC (permalink / raw)
  To: jes, mwilck, pmenzel, colyli, linux-raid; +Cc: miaoguanqin, louhongxiang

ping

On 2023/4/23 9:30, Li Xiao Keng wrote:
> ping
> 
> On 2023/4/17 22:01, Li Xiao Keng wrote:
>> When we add a new disk to a raid, it may return -EBUSY.
>>
>> The main process of --add:
>> 1. dev_open
>> 2. store_super1(st, di->fd) in write_init_super1
>> 3. fsync(di->fd) in write_init_super1
>> 4. close(di->fd)
>> 5. ioctl(ADD_NEW_DISK)
>>
>> However, there will be some udev(change) event after step4. Then
>> "/usr/sbin/mdadm --incremental ..." will be run, and the new disk
>> will be add to md device. After that, ioctl will return -EBUSY.
>>
>> Here we add map_lock before write_init_super in "mdadm --add"
>> to fix this race.
>>
>> Signed-off-by: Li Xiao Keng <lixiaokeng@huawei.com>
>> Signed-off-by: Guanqin Miao <miaoguanqin@huawei.com>
>> ---
>>  Assemble.c |  5 ++++-
>>  Manage.c   | 25 +++++++++++++++++--------
>>  2 files changed, 21 insertions(+), 9 deletions(-)
>>
>> diff --git a/Assemble.c b/Assemble.c
>> index 49804941..086890ed 100644
>> --- a/Assemble.c
>> +++ b/Assemble.c
>> @@ -1479,8 +1479,11 @@ try_again:
>>  	 * to our list.  We flag them so that we don't try to re-add,
>>  	 * but can remove if they turn out to not be wanted.
>>  	 */
>> -	if (map_lock(&map))
>> +	if (map_lock(&map)) {
>>  		pr_err("failed to get exclusive lock on mapfile - continue anyway...\n");
>> +		return 1;
>> +	}
>> +
>>  	if (c->update == UOPT_UUID)
>>  		mp = NULL;
>>  	else
>> diff --git a/Manage.c b/Manage.c
>> index f54de7c6..6a101bae 100644
>> --- a/Manage.c
>> +++ b/Manage.c
>> @@ -703,6 +703,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>  	struct supertype *dev_st;
>>  	int j;
>>  	mdu_disk_info_t disc;
>> +	struct map_ent *map = NULL;
>>  
>>  	if (!get_dev_size(tfd, dv->devname, &ldsize)) {
>>  		if (dv->disposition == 'M')
>> @@ -900,6 +901,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>  		disc.raid_disk = 0;
>>  	}
>>  
>> +	if (map_lock(&map)) {
>> +		pr_err("failed to get exclusive lock on mapfile when add disk\n");
>> +		return -1;
>> +	}
>>  	if (array->not_persistent==0) {
>>  		int dfd;
>>  		if (dv->disposition == 'j')
>> @@ -911,9 +916,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>  		dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
>>  		if (tst->ss->add_to_super(tst, &disc, dfd,
>>  					  dv->devname, INVALID_SECTORS))
>> -			return -1;
>> +			goto unlock;
>>  		if (tst->ss->write_init_super(tst))
>> -			return -1;
>> +			goto unlock;
>>  	} else if (dv->disposition == 'A') {
>>  		/*  this had better be raid1.
>>  		 * As we are "--re-add"ing we must find a spare slot
>> @@ -971,14 +976,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>  			pr_err("add failed for %s: could not get exclusive access to container\n",
>>  			       dv->devname);
>>  			tst->ss->free_super(tst);
>> -			return -1;
>> +			goto unlock;
>>  		}
>>  
>>  		/* Check if metadata handler is able to accept the drive */
>>  		if (!tst->ss->validate_geometry(tst, LEVEL_CONTAINER, 0, 1, NULL,
>>  		    0, 0, dv->devname, NULL, 0, 1)) {
>>  			close(container_fd);
>> -			return -1;
>> +			goto unlock;
>>  		}
>>  
>>  		Kill(dv->devname, NULL, 0, -1, 0);
>> @@ -987,7 +992,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>  					  dv->devname, INVALID_SECTORS)) {
>>  			close(dfd);
>>  			close(container_fd);
>> -			return -1;
>> +			goto unlock;
>>  		}
>>  		if (!mdmon_running(tst->container_devnm))
>>  			tst->ss->sync_metadata(tst);
>> @@ -998,7 +1003,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>  			       dv->devname);
>>  			close(container_fd);
>>  			tst->ss->free_super(tst);
>> -			return -1;
>> +			goto unlock;
>>  		}
>>  		sra->array.level = LEVEL_CONTAINER;
>>  		/* Need to set data_offset and component_size */
>> @@ -1013,7 +1018,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>  			pr_err("add new device to external metadata failed for %s\n", dv->devname);
>>  			close(container_fd);
>>  			sysfs_free(sra);
>> -			return -1;
>> +			goto unlock;
>>  		}
>>  		ping_monitor(devnm);
>>  		sysfs_free(sra);
>> @@ -1027,7 +1032,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>  			else
>>  				pr_err("add new device failed for %s as %d: %s\n",
>>  				       dv->devname, j, strerror(errno));
>> -			return -1;
>> +			goto unlock;
>>  		}
>>  		if (dv->disposition == 'j') {
>>  			pr_err("Journal added successfully, making %s read-write\n", devname);
>> @@ -1038,7 +1043,11 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>  	}
>>  	if (verbose >= 0)
>>  		pr_err("added %s\n", dv->devname);
>> +	map_unlock(&map);
>>  	return 1;
>> +unlock:
>> +	map_unlock(&map);
>> +	return -1;
>>  }
>>  
>>  int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
>>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
  2023-05-08  1:30   ` Li Xiao Keng
@ 2023-05-08  1:40     ` Coly Li
  2023-05-11 14:28       ` Li Xiao Keng
  0 siblings, 1 reply; 13+ messages in thread
From: Coly Li @ 2023-05-08  1:40 UTC (permalink / raw)
  To: Li Xiao Keng; +Cc: jes, mwilck, pmenzel, linux-raid, miaoguanqin, louhongxiang



> 2023年5月8日 09:30,Li Xiao Keng <lixiaokeng@huawei.com> 写道:
> 
> ping
> 
> On 2023/4/23 9:30, Li Xiao Keng wrote:
>> ping
>> 
>> On 2023/4/17 22:01, Li Xiao Keng wrote:
>>> When we add a new disk to a raid, it may return -EBUSY.
>>> 
>>> The main process of --add:
>>> 1. dev_open
>>> 2. store_super1(st, di->fd) in write_init_super1
>>> 3. fsync(di->fd) in write_init_super1
>>> 4. close(di->fd)
>>> 5. ioctl(ADD_NEW_DISK)
>>> 
>>> However, there will be some udev(change) event after step4. Then
>>> "/usr/sbin/mdadm --incremental ..." will be run, and the new disk
>>> will be add to md device. After that, ioctl will return -EBUSY.
>>> 

Hi Xiao Keng,

The above description of the race is not informative enough, I am aware exactly how the race is from, therefore I am not able to response for the fix.

Coly Li




>>> Here we add map_lock before write_init_super in "mdadm --add"
>>> to fix this race.
>>> 
>>> Signed-off-by: Li Xiao Keng <lixiaokeng@huawei.com>
>>> Signed-off-by: Guanqin Miao <miaoguanqin@huawei.com>
>>> ---
>>> Assemble.c |  5 ++++-
>>> Manage.c   | 25 +++++++++++++++++--------
>>> 2 files changed, 21 insertions(+), 9 deletions(-)
>>> 
>>> diff --git a/Assemble.c b/Assemble.c
>>> index 49804941..086890ed 100644
>>> --- a/Assemble.c
>>> +++ b/Assemble.c
>>> @@ -1479,8 +1479,11 @@ try_again:
>>> * to our list.  We flag them so that we don't try to re-add,
>>> * but can remove if they turn out to not be wanted.
>>> */
>>> - if (map_lock(&map))
>>> + if (map_lock(&map)) {
>>> pr_err("failed to get exclusive lock on mapfile - continue anyway...\n");
>>> + return 1;
>>> + }
>>> +
>>> if (c->update == UOPT_UUID)
>>> mp = NULL;
>>> else
>>> diff --git a/Manage.c b/Manage.c
>>> index f54de7c6..6a101bae 100644
>>> --- a/Manage.c
>>> +++ b/Manage.c
>>> @@ -703,6 +703,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>> struct supertype *dev_st;
>>> int j;
>>> mdu_disk_info_t disc;
>>> + struct map_ent *map = NULL;
>>> 
>>> if (!get_dev_size(tfd, dv->devname, &ldsize)) {
>>> if (dv->disposition == 'M')
>>> @@ -900,6 +901,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>> disc.raid_disk = 0;
>>> }
>>> 
>>> + if (map_lock(&map)) {
>>> + pr_err("failed to get exclusive lock on mapfile when add disk\n");
>>> + return -1;
>>> + }
>>> if (array->not_persistent==0) {
>>> int dfd;
>>> if (dv->disposition == 'j')
>>> @@ -911,9 +916,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>> dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
>>> if (tst->ss->add_to_super(tst, &disc, dfd,
>>>  dv->devname, INVALID_SECTORS))
>>> - return -1;
>>> + goto unlock;
>>> if (tst->ss->write_init_super(tst))
>>> - return -1;
>>> + goto unlock;
>>> } else if (dv->disposition == 'A') {
>>> /*  this had better be raid1.
>>> * As we are "--re-add"ing we must find a spare slot
>>> @@ -971,14 +976,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>> pr_err("add failed for %s: could not get exclusive access to container\n",
>>>       dv->devname);
>>> tst->ss->free_super(tst);
>>> - return -1;
>>> + goto unlock;
>>> }
>>> 
>>> /* Check if metadata handler is able to accept the drive */
>>> if (!tst->ss->validate_geometry(tst, LEVEL_CONTAINER, 0, 1, NULL,
>>>    0, 0, dv->devname, NULL, 0, 1)) {
>>> close(container_fd);
>>> - return -1;
>>> + goto unlock;
>>> }
>>> 
>>> Kill(dv->devname, NULL, 0, -1, 0);
>>> @@ -987,7 +992,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>>  dv->devname, INVALID_SECTORS)) {
>>> close(dfd);
>>> close(container_fd);
>>> - return -1;
>>> + goto unlock;
>>> }
>>> if (!mdmon_running(tst->container_devnm))
>>> tst->ss->sync_metadata(tst);
>>> @@ -998,7 +1003,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>>       dv->devname);
>>> close(container_fd);
>>> tst->ss->free_super(tst);
>>> - return -1;
>>> + goto unlock;
>>> }
>>> sra->array.level = LEVEL_CONTAINER;
>>> /* Need to set data_offset and component_size */
>>> @@ -1013,7 +1018,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>> pr_err("add new device to external metadata failed for %s\n", dv->devname);
>>> close(container_fd);
>>> sysfs_free(sra);
>>> - return -1;
>>> + goto unlock;
>>> }
>>> ping_monitor(devnm);
>>> sysfs_free(sra);
>>> @@ -1027,7 +1032,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>> else
>>> pr_err("add new device failed for %s as %d: %s\n",
>>>       dv->devname, j, strerror(errno));
>>> - return -1;
>>> + goto unlock;
>>> }
>>> if (dv->disposition == 'j') {
>>> pr_err("Journal added successfully, making %s read-write\n", devname);
>>> @@ -1038,7 +1043,11 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>> }
>>> if (verbose >= 0)
>>> pr_err("added %s\n", dv->devname);
>>> + map_unlock(&map);
>>> return 1;
>>> +unlock:
>>> + map_unlock(&map);
>>> + return -1;
>>> }
>>> 
>>> int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
>>> 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
  2023-04-17 14:01 [PATCH] Fix race of "mdadm --add" and "mdadm --incremental" Li Xiao Keng
  2023-04-23  1:30 ` Li Xiao Keng
@ 2023-05-08 13:35 ` Martin Wilck
  2023-05-11 14:37   ` Li Xiao Keng
  1 sibling, 1 reply; 13+ messages in thread
From: Martin Wilck @ 2023-05-08 13:35 UTC (permalink / raw)
  To: Li Xiao Keng, jes, pmenzel, colyli, linux-raid; +Cc: miaoguanqin, louhongxiang

Hello Li Xiao Keng,

On Mon, 2023-04-17 at 22:01 +0800, Li Xiao Keng wrote:
> When we add a new disk to a raid, it may return -EBUSY.
> 
> The main process of --add:
> 1. dev_open
> 2. store_super1(st, di->fd) in write_init_super1
> 3. fsync(di->fd) in write_init_super1
> 4. close(di->fd)
> 5. ioctl(ADD_NEW_DISK)
> 
> However, there will be some udev(change) event after step4. Then
> "/usr/sbin/mdadm --incremental ..." will be run, and the new disk
> will be add to md device. After that, ioctl will return -EBUSY.
> 
> Here we add map_lock before write_init_super in "mdadm --add"
> to fix this race.
> 
> Signed-off-by: Li Xiao Keng <lixiaokeng@huawei.com>
> Signed-off-by: Guanqin Miao <miaoguanqin@huawei.com>

I don't feel familiar enough with the mdadm code to write an
authoritative review. In particular, I don't fully understand the 
intended semantics of map_lock(); thus while I believe the way you 
are using this lock is correct, I can't assert this with certainty.
Jes, Coly, or someone else should double-check that.

Anyway, I'll try to get the communication on this issue going again.

I think you should expand some more on your commit description. It
makes sense for people who followed the previous discussion, but it
should be self-explanatory also for people reading the commit 5y from
now.

Other than that, I only have one nitpick (see below).

Regards
Martin

> ---
>  Assemble.c |  5 ++++-
>  Manage.c   | 25 +++++++++++++++++--------
>  2 files changed, 21 insertions(+), 9 deletions(-)
> 
> diff --git a/Assemble.c b/Assemble.c
> index 49804941..086890ed 100644
> --- a/Assemble.c
> +++ b/Assemble.c
> @@ -1479,8 +1479,11 @@ try_again:
>          * to our list.  We flag them so that we don't try to re-add,
>          * but can remove if they turn out to not be wanted.
>          */
> -       if (map_lock(&map))
> +       if (map_lock(&map)) {
>                 pr_err("failed to get exclusive lock on mapfile -
> continue anyway...\n");

As you added a "return 1" here, the "continue anyway" message is wrong.
You need to change it.

> +               return 1;
> +       }
> +
>         if (c->update == UOPT_UUID)
>                 mp = NULL;
>         else
> diff --git a/Manage.c b/Manage.c
> index f54de7c6..6a101bae 100644
> --- a/Manage.c
> +++ b/Manage.c
> @@ -703,6 +703,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev
> *dv,
>         struct supertype *dev_st;
>         int j;
>         mdu_disk_info_t disc;
> +       struct map_ent *map = NULL;
>  
>         if (!get_dev_size(tfd, dv->devname, &ldsize)) {
>                 if (dv->disposition == 'M')
> @@ -900,6 +901,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev
> *dv,
>                 disc.raid_disk = 0;
>         }
>  
> +       if (map_lock(&map)) {
> +               pr_err("failed to get exclusive lock on mapfile when
> add disk\n");
> +               return -1;
> +       }
>         if (array->not_persistent==0) {
>                 int dfd;
>                 if (dv->disposition == 'j')
> @@ -911,9 +916,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev
> *dv,
>                 dfd = dev_open(dv->devname, O_RDWR |
> O_EXCL|O_DIRECT);
>                 if (tst->ss->add_to_super(tst, &disc, dfd,
>                                           dv->devname,
> INVALID_SECTORS))
> -                       return -1;
> +                       goto unlock;
>                 if (tst->ss->write_init_super(tst))
> -                       return -1;
> +                       goto unlock;
>         } else if (dv->disposition == 'A') {
>                 /*  this had better be raid1.
>                  * As we are "--re-add"ing we must find a spare slot
> @@ -971,14 +976,14 @@ int Manage_add(int fd, int tfd, struct
> mddev_dev *dv,
>                         pr_err("add failed for %s: could not get
> exclusive access to container\n",
>                                dv->devname);
>                         tst->ss->free_super(tst);
> -                       return -1;
> +                       goto unlock;
>                 }
>  
>                 /* Check if metadata handler is able to accept the
> drive */
>                 if (!tst->ss->validate_geometry(tst, LEVEL_CONTAINER,
> 0, 1, NULL,
>                     0, 0, dv->devname, NULL, 0, 1)) {
>                         close(container_fd);
> -                       return -1;
> +                       goto unlock;
>                 }
>  
>                 Kill(dv->devname, NULL, 0, -1, 0);
> @@ -987,7 +992,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev
> *dv,
>                                           dv->devname,
> INVALID_SECTORS)) {
>                         close(dfd);
>                         close(container_fd);
> -                       return -1;
> +                       goto unlock;
>                 }
>                 if (!mdmon_running(tst->container_devnm))
>                         tst->ss->sync_metadata(tst);
> @@ -998,7 +1003,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev
> *dv,
>                                dv->devname);
>                         close(container_fd);
>                         tst->ss->free_super(tst);
> -                       return -1;
> +                       goto unlock;
>                 }
>                 sra->array.level = LEVEL_CONTAINER;
>                 /* Need to set data_offset and component_size */
> @@ -1013,7 +1018,7 @@ int Manage_add(int fd, int tfd, struct
> mddev_dev *dv,
>                         pr_err("add new device to external metadata
> failed for %s\n", dv->devname);
>                         close(container_fd);
>                         sysfs_free(sra);
> -                       return -1;
> +                       goto unlock;
>                 }
>                 ping_monitor(devnm);
>                 sysfs_free(sra);
> @@ -1027,7 +1032,7 @@ int Manage_add(int fd, int tfd, struct
> mddev_dev *dv,
>                         else
>                                 pr_err("add new device failed for %s
> as %d: %s\n",
>                                        dv->devname, j,
> strerror(errno));
> -                       return -1;
> +                       goto unlock;
>                 }
>                 if (dv->disposition == 'j') {
>                         pr_err("Journal added successfully, making %s
> read-write\n", devname);
> @@ -1038,7 +1043,11 @@ int Manage_add(int fd, int tfd, struct
> mddev_dev *dv,
>         }
>         if (verbose >= 0)
>                 pr_err("added %s\n", dv->devname);
> +       map_unlock(&map);
>         return 1;
> +unlock:
> +       map_unlock(&map);
> +       return -1;
>  }
>  
>  int Manage_remove(struct supertype *tst, int fd, struct mddev_dev
> *dv,


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
  2023-05-08  1:40     ` Coly Li
@ 2023-05-11 14:28       ` Li Xiao Keng
  0 siblings, 0 replies; 13+ messages in thread
From: Li Xiao Keng @ 2023-05-11 14:28 UTC (permalink / raw)
  To: Coly Li; +Cc: jes, mwilck, pmenzel, linux-raid, miaoguanqin, louhongxiang



On 2023/5/8 9:40, Coly Li wrote:
> 
> 
>> 2023年5月8日 09:30,Li Xiao Keng <lixiaokeng@huawei.com> 写道:
>>
>> ping
>>
>> On 2023/4/23 9:30, Li Xiao Keng wrote:
>>> ping
>>>
>>> On 2023/4/17 22:01, Li Xiao Keng wrote:
>>>> When we add a new disk to a raid, it may return -EBUSY.
>>>>
>>>> The main process of --add:
>>>> 1. dev_open
>>>> 2. store_super1(st, di->fd) in write_init_super1
>>>> 3. fsync(di->fd) in write_init_super1
>>>> 4. close(di->fd)
>>>> 5. ioctl(ADD_NEW_DISK)
>>>>
>>>> However, there will be some udev(change) event after step4. Then
>>>> "/usr/sbin/mdadm --incremental ..." will be run, and the new disk
>>>> will be add to md device. After that, ioctl will return -EBUSY.
>>>>
> 
> Hi Xiao Keng,
> 
> The above description of the race is not informative enough, I am aware exactly how the race is from, therefore I am not able to response for the fix.
> 
> Coly Li
> 
------
There is a raid1 with sda and sdb. And we add sdc to this raid,
it may return -EBUSY.

The main process of --add:
1. dev_open(sdc) in Manage_add
2. store_super1(st, di->fd) in write_init_super1
3. fsync(fd) in store_super1
4. close(di->fd) in write_init_super1
5. ioctl(ADD_NEW_DISK)

Step 2 and 3 will add sdc to metadata of raid1. There will be
udev(change of sdc) event after step4. Then "/usr/sbin/mdadm
--incremental --export $devnode --offroot $env{DEVLINKS}"
will be run, and the sdc will be added to the raid1. Then
step 5 will return -EBUSY because it checks if device isn't
claimed in md_import_device()->lock_rdev()->blkdev_get_by_dev()
->blkdev_get().

It will be confusing for users because sdc is added first time.
The "incremental" will get map_lock before add sdc to raid1.
So we add map_lock before write_init_super in "mdadm --add"
to fix the race of "add" and "incremental".
------

I'm sorry for no detailed description. Do you have any advice
of the new description?

> 
> 
> 
>>>> Here we add map_lock before write_init_super in "mdadm --add"
>>>> to fix this race.
>>>>
>>>> Signed-off-by: Li Xiao Keng <lixiaokeng@huawei.com>
>>>> Signed-off-by: Guanqin Miao <miaoguanqin@huawei.com>
>>>> ---
>>>> Assemble.c |  5 ++++-
>>>> Manage.c   | 25 +++++++++++++++++--------
>>>> 2 files changed, 21 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/Assemble.c b/Assemble.c
>>>> index 49804941..086890ed 100644
>>>> --- a/Assemble.c
>>>> +++ b/Assemble.c
>>>> @@ -1479,8 +1479,11 @@ try_again:
>>>> * to our list.  We flag them so that we don't try to re-add,
>>>> * but can remove if they turn out to not be wanted.
>>>> */
>>>> - if (map_lock(&map))
>>>> + if (map_lock(&map)) {
>>>> pr_err("failed to get exclusive lock on mapfile - continue anyway...\n");
>>>> + return 1;
>>>> + }
>>>> +
>>>> if (c->update == UOPT_UUID)
>>>> mp = NULL;
>>>> else
>>>> diff --git a/Manage.c b/Manage.c
>>>> index f54de7c6..6a101bae 100644
>>>> --- a/Manage.c
>>>> +++ b/Manage.c
>>>> @@ -703,6 +703,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>>> struct supertype *dev_st;
>>>> int j;
>>>> mdu_disk_info_t disc;
>>>> + struct map_ent *map = NULL;
>>>>
>>>> if (!get_dev_size(tfd, dv->devname, &ldsize)) {
>>>> if (dv->disposition == 'M')
>>>> @@ -900,6 +901,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>>> disc.raid_disk = 0;
>>>> }
>>>>
>>>> + if (map_lock(&map)) {
>>>> + pr_err("failed to get exclusive lock on mapfile when add disk\n");
>>>> + return -1;
>>>> + }
>>>> if (array->not_persistent==0) {
>>>> int dfd;
>>>> if (dv->disposition == 'j')
>>>> @@ -911,9 +916,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>>> dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
>>>> if (tst->ss->add_to_super(tst, &disc, dfd,
>>>>  dv->devname, INVALID_SECTORS))
>>>> - return -1;
>>>> + goto unlock;
>>>> if (tst->ss->write_init_super(tst))
>>>> - return -1;
>>>> + goto unlock;
>>>> } else if (dv->disposition == 'A') {
>>>> /*  this had better be raid1.
>>>> * As we are "--re-add"ing we must find a spare slot
>>>> @@ -971,14 +976,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>>> pr_err("add failed for %s: could not get exclusive access to container\n",
>>>>       dv->devname);
>>>> tst->ss->free_super(tst);
>>>> - return -1;
>>>> + goto unlock;
>>>> }
>>>>
>>>> /* Check if metadata handler is able to accept the drive */
>>>> if (!tst->ss->validate_geometry(tst, LEVEL_CONTAINER, 0, 1, NULL,
>>>>    0, 0, dv->devname, NULL, 0, 1)) {
>>>> close(container_fd);
>>>> - return -1;
>>>> + goto unlock;
>>>> }
>>>>
>>>> Kill(dv->devname, NULL, 0, -1, 0);
>>>> @@ -987,7 +992,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>>>  dv->devname, INVALID_SECTORS)) {
>>>> close(dfd);
>>>> close(container_fd);
>>>> - return -1;
>>>> + goto unlock;
>>>> }
>>>> if (!mdmon_running(tst->container_devnm))
>>>> tst->ss->sync_metadata(tst);
>>>> @@ -998,7 +1003,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>>>       dv->devname);
>>>> close(container_fd);
>>>> tst->ss->free_super(tst);
>>>> - return -1;
>>>> + goto unlock;
>>>> }
>>>> sra->array.level = LEVEL_CONTAINER;
>>>> /* Need to set data_offset and component_size */
>>>> @@ -1013,7 +1018,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>>> pr_err("add new device to external metadata failed for %s\n", dv->devname);
>>>> close(container_fd);
>>>> sysfs_free(sra);
>>>> - return -1;
>>>> + goto unlock;
>>>> }
>>>> ping_monitor(devnm);
>>>> sysfs_free(sra);
>>>> @@ -1027,7 +1032,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>>> else
>>>> pr_err("add new device failed for %s as %d: %s\n",
>>>>       dv->devname, j, strerror(errno));
>>>> - return -1;
>>>> + goto unlock;
>>>> }
>>>> if (dv->disposition == 'j') {
>>>> pr_err("Journal added successfully, making %s read-write\n", devname);
>>>> @@ -1038,7 +1043,11 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>>> }
>>>> if (verbose >= 0)
>>>> pr_err("added %s\n", dv->devname);
>>>> + map_unlock(&map);
>>>> return 1;
>>>> +unlock:
>>>> + map_unlock(&map);
>>>> + return -1;
>>>> }
>>>>
>>>> int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
>>>>
> 
> .
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
  2023-05-08 13:35 ` Martin Wilck
@ 2023-05-11 14:37   ` Li Xiao Keng
  2023-09-01 16:11     ` Jes Sorensen
  0 siblings, 1 reply; 13+ messages in thread
From: Li Xiao Keng @ 2023-05-11 14:37 UTC (permalink / raw)
  To: Martin Wilck, jes, pmenzel, colyli, linux-raid; +Cc: miaoguanqin, louhongxiang



On 2023/5/8 21:35, Martin Wilck wrote:
>> -       if (map_lock(&map))
>> +       if (map_lock(&map)) {
>>                 pr_err("failed to get exclusive lock on mapfile -
>> continue anyway...\n");
> As you added a "return 1" here, the "continue anyway" message is wrong.
> You need to change it.
> 

After resolving doubts of Coly Li, I will change it to

pr_err("failed to get exclusive lock on mapfile when assemble raid.")

Regards,
Li Xiao Keng

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
  2023-05-11 14:37   ` Li Xiao Keng
@ 2023-09-01 16:11     ` Jes Sorensen
  2023-09-05  7:42       ` Li Xiao Keng
  0 siblings, 1 reply; 13+ messages in thread
From: Jes Sorensen @ 2023-09-01 16:11 UTC (permalink / raw)
  To: Li Xiao Keng, Martin Wilck, pmenzel, colyli, linux-raid
  Cc: miaoguanqin, louhongxiang

On 5/11/23 10:37, Li Xiao Keng wrote:
> 
> 
> On 2023/5/8 21:35, Martin Wilck wrote:
>>> -       if (map_lock(&map))
>>> +       if (map_lock(&map)) {
>>>                 pr_err("failed to get exclusive lock on mapfile -
>>> continue anyway...\n");
>> As you added a "return 1" here, the "continue anyway" message is wrong.
>> You need to change it.
>>
> 
> After resolving doubts of Coly Li, I will change it to
> 
> pr_err("failed to get exclusive lock on mapfile when assemble raid.")
> 
> Regards,
> Li Xiao Keng

Hi Li,

Did you ever post an updated version?

Thanks,
Jes



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
  2023-09-01 16:11     ` Jes Sorensen
@ 2023-09-05  7:42       ` Li Xiao Keng
  0 siblings, 0 replies; 13+ messages in thread
From: Li Xiao Keng @ 2023-09-05  7:42 UTC (permalink / raw)
  To: Jes Sorensen, Martin Wilck, pmenzel, colyli, linux-raid
  Cc: miaoguanqin, louhongxiang



On 2023/9/2 0:11, Jes Sorensen wrote:
> On 5/11/23 10:37, Li Xiao Keng wrote:
>>
>>
>> On 2023/5/8 21:35, Martin Wilck wrote:
>>>> -       if (map_lock(&map))
>>>> +       if (map_lock(&map)) {
>>>>                 pr_err("failed to get exclusive lock on mapfile -
>>>> continue anyway...\n");
>>> As you added a "return 1" here, the "continue anyway" message is wrong.
>>> You need to change it.
>>>
>>
>> After resolving doubts of Coly Li, I will change it to
>>
>> pr_err("failed to get exclusive lock on mapfile when assemble raid.")
>>
>> Regards,
>> Li Xiao Keng
> 
> Hi Li,
> 
> Did you ever post an updated version?

Coly Li did not reply me. If there is not other question, I will update and send the patch.
> 
> Thanks,
> Jes
> 
> 
> 
> .
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
  2023-03-21  9:21 ` Martin Wilck
@ 2023-03-21 12:28   ` Li Xiao Keng
  0 siblings, 0 replies; 13+ messages in thread
From: Li Xiao Keng @ 2023-03-21 12:28 UTC (permalink / raw)
  To: Martin Wilck, jes, pmenzel, colyli, linux-raid; +Cc: miaoguanqin, louhongxiang



On 2023/3/21 17:21, Martin Wilck wrote:
> On Tue, 2023-03-21 at 16:55 +0800, Li Xiaokeng wrote:
>> From: lixiaokeng <lixiaokeng@huawei.com>
>>
>> When we add a new disk to a raid, it may return -EBUSY.
>>
>> The main process of --add:
>> 1. dev_open
>> 2. store_super1(st, di->fd) in write_init_super1
>> 3. fsync(di->fd) in write_init_super1
>> 4. close(di->fd)
>> 5. ioctl(ADD_NEW_DISK)
>>
>> However, there will be some udev(change) event after step4. Then
>> "/usr/sbin/mdadm --incremental ..." will be run, and the new disk
>> will be add to md device. After that, ioctl will return -EBUSY.
>>
>> Here we add map_lock before write_init_super in "mdadm --add"
>> to fix this race.
>>
>> Signed-off-by: Li Xiao Keng <lixiaokeng@huawei.com>
> 
> As noted in the previous thread about the topic, this will only help
> if "mdadm -I" quits when it can't lock the device. Or am I overlooking
> something?
> 

  I am sorry for missing previous email before sending patch.

  At first, I had similar doubts. But I test it, "Incremental" will
be blocked if "Add" uses map_lock. Only when open and flock return
error, map_lock() returns -1. It is hardly to happen. In commit
"ad5bc697a", the return value of map_lock() is not checked. So
I don't change the codes of the original map_lock(). If it's better
to return error when map_lock fails, I will change my patch.

Regards,
Li Xiao Keng

> Martin
> 
> .
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
  2023-03-21  8:55 Li Xiaokeng
  2023-03-21  9:21 ` Martin Wilck
@ 2023-03-21 10:09 ` Paul Menzel
  1 sibling, 0 replies; 13+ messages in thread
From: Paul Menzel @ 2023-03-21 10:09 UTC (permalink / raw)
  To: Li Xiaokeng; +Cc: jes, mwilck, colyli, linux-raid, miaoguanqin, louhongxiang

Dear Li,


Am 21.03.23 um 09:55 schrieb Li Xiaokeng:
> From: lixiaokeng <lixiaokeng@huawei.com>

Just a note, that your name is not correctly configured on the system 
you created the patch on.

[…]


Kind regards,

Paul

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
  2023-03-21  8:55 Li Xiaokeng
@ 2023-03-21  9:21 ` Martin Wilck
  2023-03-21 12:28   ` Li Xiao Keng
  2023-03-21 10:09 ` Paul Menzel
  1 sibling, 1 reply; 13+ messages in thread
From: Martin Wilck @ 2023-03-21  9:21 UTC (permalink / raw)
  To: Li Xiaokeng, jes, pmenzel, colyli, linux-raid; +Cc: miaoguanqin, louhongxiang

On Tue, 2023-03-21 at 16:55 +0800, Li Xiaokeng wrote:
> From: lixiaokeng <lixiaokeng@huawei.com>
> 
> When we add a new disk to a raid, it may return -EBUSY.
> 
> The main process of --add:
> 1. dev_open
> 2. store_super1(st, di->fd) in write_init_super1
> 3. fsync(di->fd) in write_init_super1
> 4. close(di->fd)
> 5. ioctl(ADD_NEW_DISK)
> 
> However, there will be some udev(change) event after step4. Then
> "/usr/sbin/mdadm --incremental ..." will be run, and the new disk
> will be add to md device. After that, ioctl will return -EBUSY.
> 
> Here we add map_lock before write_init_super in "mdadm --add"
> to fix this race.
> 
> Signed-off-by: Li Xiao Keng <lixiaokeng@huawei.com>

As noted in the previous thread about the topic, this will only help
if "mdadm -I" quits when it can't lock the device. Or am I overlooking
something?

Martin


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
@ 2023-03-21  8:55 Li Xiaokeng
  2023-03-21  9:21 ` Martin Wilck
  2023-03-21 10:09 ` Paul Menzel
  0 siblings, 2 replies; 13+ messages in thread
From: Li Xiaokeng @ 2023-03-21  8:55 UTC (permalink / raw)
  To: jes, mwilck, pmenzel, colyli, linux-raid
  Cc: miaoguanqin, louhongxiang, lixiaokeng

From: lixiaokeng <lixiaokeng@huawei.com>

When we add a new disk to a raid, it may return -EBUSY.

The main process of --add:
1. dev_open
2. store_super1(st, di->fd) in write_init_super1
3. fsync(di->fd) in write_init_super1
4. close(di->fd)
5. ioctl(ADD_NEW_DISK)

However, there will be some udev(change) event after step4. Then
"/usr/sbin/mdadm --incremental ..." will be run, and the new disk
will be add to md device. After that, ioctl will return -EBUSY.

Here we add map_lock before write_init_super in "mdadm --add"
to fix this race.

Signed-off-by: Li Xiao Keng <lixiaokeng@huawei.com>
---
 Manage.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/Manage.c b/Manage.c
index fde6aba3..893a2ea8 100644
--- a/Manage.c
+++ b/Manage.c
@@ -720,6 +720,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 	struct supertype *dev_st;
 	int j;
 	mdu_disk_info_t disc;
+	struct map_ent *map = NULL;
 
 	if (!get_dev_size(tfd, dv->devname, &ldsize)) {
 		if (dv->disposition == 'M')
@@ -917,6 +918,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 		disc.raid_disk = 0;
 	}
 
+	map_lock(&map);
 	if (array->not_persistent==0) {
 		int dfd;
 		if (dv->disposition == 'j')
@@ -928,9 +930,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 		dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
 		if (tst->ss->add_to_super(tst, &disc, dfd,
 					  dv->devname, INVALID_SECTORS))
-			return -1;
+			goto unlock;
 		if (tst->ss->write_init_super(tst))
-			return -1;
+			goto unlock;
 	} else if (dv->disposition == 'A') {
 		/*  this had better be raid1.
 		 * As we are "--re-add"ing we must find a spare slot
@@ -988,14 +990,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 			pr_err("add failed for %s: could not get exclusive access to container\n",
 			       dv->devname);
 			tst->ss->free_super(tst);
-			return -1;
+			goto unlock;
 		}
 
 		/* Check if metadata handler is able to accept the drive */
 		if (!tst->ss->validate_geometry(tst, LEVEL_CONTAINER, 0, 1, NULL,
 		    0, 0, dv->devname, NULL, 0, 1)) {
 			close(container_fd);
-			return -1;
+			goto unlock;
 		}
 
 		Kill(dv->devname, NULL, 0, -1, 0);
@@ -1004,7 +1006,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 					  dv->devname, INVALID_SECTORS)) {
 			close(dfd);
 			close(container_fd);
-			return -1;
+			goto unlock;
 		}
 		if (!mdmon_running(tst->container_devnm))
 			tst->ss->sync_metadata(tst);
@@ -1015,7 +1017,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 			       dv->devname);
 			close(container_fd);
 			tst->ss->free_super(tst);
-			return -1;
+			goto unlock;
 		}
 		sra->array.level = LEVEL_CONTAINER;
 		/* Need to set data_offset and component_size */
@@ -1030,7 +1032,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 			pr_err("add new device to external metadata failed for %s\n", dv->devname);
 			close(container_fd);
 			sysfs_free(sra);
-			return -1;
+			goto unlock;
 		}
 		ping_monitor(devnm);
 		sysfs_free(sra);
@@ -1044,7 +1046,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 			else
 				pr_err("add new device failed for %s as %d: %s\n",
 				       dv->devname, j, strerror(errno));
-			return -1;
+			goto unlock;
 		}
 		if (dv->disposition == 'j') {
 			pr_err("Journal added successfully, making %s read-write\n", devname);
@@ -1055,7 +1057,11 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
 	}
 	if (verbose >= 0)
 		pr_err("added %s\n", dv->devname);
+	map_unlock(&map);
 	return 1;
+unlock:
+	map_unlock(&map);
+	return -1;
 }
 
 int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
-- 
2.33.0


^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2023-09-05 16:04 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-17 14:01 [PATCH] Fix race of "mdadm --add" and "mdadm --incremental" Li Xiao Keng
2023-04-23  1:30 ` Li Xiao Keng
2023-05-08  1:30   ` Li Xiao Keng
2023-05-08  1:40     ` Coly Li
2023-05-11 14:28       ` Li Xiao Keng
2023-05-08 13:35 ` Martin Wilck
2023-05-11 14:37   ` Li Xiao Keng
2023-09-01 16:11     ` Jes Sorensen
2023-09-05  7:42       ` Li Xiao Keng
  -- strict thread matches above, loose matches on Subject: below --
2023-03-21  8:55 Li Xiaokeng
2023-03-21  9:21 ` Martin Wilck
2023-03-21 12:28   ` Li Xiao Keng
2023-03-21 10:09 ` Paul Menzel

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.