* [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
@ 2023-04-17 14:01 Li Xiao Keng
2023-04-23 1:30 ` Li Xiao Keng
2023-05-08 13:35 ` Martin Wilck
0 siblings, 2 replies; 13+ messages in thread
From: Li Xiao Keng @ 2023-04-17 14:01 UTC (permalink / raw)
To: jes, mwilck, pmenzel, colyli, linux-raid
Cc: miaoguanqin, louhongxiang, Li Xiao Keng
When we add a new disk to a raid, it may return -EBUSY.
The main process of --add:
1. dev_open
2. store_super1(st, di->fd) in write_init_super1
3. fsync(di->fd) in write_init_super1
4. close(di->fd)
5. ioctl(ADD_NEW_DISK)
However, there will be some udev(change) event after step4. Then
"/usr/sbin/mdadm --incremental ..." will be run, and the new disk
will be add to md device. After that, ioctl will return -EBUSY.
Here we add map_lock before write_init_super in "mdadm --add"
to fix this race.
Signed-off-by: Li Xiao Keng <lixiaokeng@huawei.com>
Signed-off-by: Guanqin Miao <miaoguanqin@huawei.com>
---
Assemble.c | 5 ++++-
Manage.c | 25 +++++++++++++++++--------
2 files changed, 21 insertions(+), 9 deletions(-)
diff --git a/Assemble.c b/Assemble.c
index 49804941..086890ed 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -1479,8 +1479,11 @@ try_again:
* to our list. We flag them so that we don't try to re-add,
* but can remove if they turn out to not be wanted.
*/
- if (map_lock(&map))
+ if (map_lock(&map)) {
pr_err("failed to get exclusive lock on mapfile - continue anyway...\n");
+ return 1;
+ }
+
if (c->update == UOPT_UUID)
mp = NULL;
else
diff --git a/Manage.c b/Manage.c
index f54de7c6..6a101bae 100644
--- a/Manage.c
+++ b/Manage.c
@@ -703,6 +703,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
struct supertype *dev_st;
int j;
mdu_disk_info_t disc;
+ struct map_ent *map = NULL;
if (!get_dev_size(tfd, dv->devname, &ldsize)) {
if (dv->disposition == 'M')
@@ -900,6 +901,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
disc.raid_disk = 0;
}
+ if (map_lock(&map)) {
+ pr_err("failed to get exclusive lock on mapfile when add disk\n");
+ return -1;
+ }
if (array->not_persistent==0) {
int dfd;
if (dv->disposition == 'j')
@@ -911,9 +916,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
if (tst->ss->add_to_super(tst, &disc, dfd,
dv->devname, INVALID_SECTORS))
- return -1;
+ goto unlock;
if (tst->ss->write_init_super(tst))
- return -1;
+ goto unlock;
} else if (dv->disposition == 'A') {
/* this had better be raid1.
* As we are "--re-add"ing we must find a spare slot
@@ -971,14 +976,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
pr_err("add failed for %s: could not get exclusive access to container\n",
dv->devname);
tst->ss->free_super(tst);
- return -1;
+ goto unlock;
}
/* Check if metadata handler is able to accept the drive */
if (!tst->ss->validate_geometry(tst, LEVEL_CONTAINER, 0, 1, NULL,
0, 0, dv->devname, NULL, 0, 1)) {
close(container_fd);
- return -1;
+ goto unlock;
}
Kill(dv->devname, NULL, 0, -1, 0);
@@ -987,7 +992,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
dv->devname, INVALID_SECTORS)) {
close(dfd);
close(container_fd);
- return -1;
+ goto unlock;
}
if (!mdmon_running(tst->container_devnm))
tst->ss->sync_metadata(tst);
@@ -998,7 +1003,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
dv->devname);
close(container_fd);
tst->ss->free_super(tst);
- return -1;
+ goto unlock;
}
sra->array.level = LEVEL_CONTAINER;
/* Need to set data_offset and component_size */
@@ -1013,7 +1018,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
pr_err("add new device to external metadata failed for %s\n", dv->devname);
close(container_fd);
sysfs_free(sra);
- return -1;
+ goto unlock;
}
ping_monitor(devnm);
sysfs_free(sra);
@@ -1027,7 +1032,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
else
pr_err("add new device failed for %s as %d: %s\n",
dv->devname, j, strerror(errno));
- return -1;
+ goto unlock;
}
if (dv->disposition == 'j') {
pr_err("Journal added successfully, making %s read-write\n", devname);
@@ -1038,7 +1043,11 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
}
if (verbose >= 0)
pr_err("added %s\n", dv->devname);
+ map_unlock(&map);
return 1;
+unlock:
+ map_unlock(&map);
+ return -1;
}
int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
--
2.33.0
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
2023-04-17 14:01 [PATCH] Fix race of "mdadm --add" and "mdadm --incremental" Li Xiao Keng
@ 2023-04-23 1:30 ` Li Xiao Keng
2023-05-08 1:30 ` Li Xiao Keng
2023-05-08 13:35 ` Martin Wilck
1 sibling, 1 reply; 13+ messages in thread
From: Li Xiao Keng @ 2023-04-23 1:30 UTC (permalink / raw)
To: jes, mwilck, pmenzel, colyli, linux-raid; +Cc: miaoguanqin, louhongxiang
ping
On 2023/4/17 22:01, Li Xiao Keng wrote:
> When we add a new disk to a raid, it may return -EBUSY.
>
> The main process of --add:
> 1. dev_open
> 2. store_super1(st, di->fd) in write_init_super1
> 3. fsync(di->fd) in write_init_super1
> 4. close(di->fd)
> 5. ioctl(ADD_NEW_DISK)
>
> However, there will be some udev(change) event after step4. Then
> "/usr/sbin/mdadm --incremental ..." will be run, and the new disk
> will be add to md device. After that, ioctl will return -EBUSY.
>
> Here we add map_lock before write_init_super in "mdadm --add"
> to fix this race.
>
> Signed-off-by: Li Xiao Keng <lixiaokeng@huawei.com>
> Signed-off-by: Guanqin Miao <miaoguanqin@huawei.com>
> ---
> Assemble.c | 5 ++++-
> Manage.c | 25 +++++++++++++++++--------
> 2 files changed, 21 insertions(+), 9 deletions(-)
>
> diff --git a/Assemble.c b/Assemble.c
> index 49804941..086890ed 100644
> --- a/Assemble.c
> +++ b/Assemble.c
> @@ -1479,8 +1479,11 @@ try_again:
> * to our list. We flag them so that we don't try to re-add,
> * but can remove if they turn out to not be wanted.
> */
> - if (map_lock(&map))
> + if (map_lock(&map)) {
> pr_err("failed to get exclusive lock on mapfile - continue anyway...\n");
> + return 1;
> + }
> +
> if (c->update == UOPT_UUID)
> mp = NULL;
> else
> diff --git a/Manage.c b/Manage.c
> index f54de7c6..6a101bae 100644
> --- a/Manage.c
> +++ b/Manage.c
> @@ -703,6 +703,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
> struct supertype *dev_st;
> int j;
> mdu_disk_info_t disc;
> + struct map_ent *map = NULL;
>
> if (!get_dev_size(tfd, dv->devname, &ldsize)) {
> if (dv->disposition == 'M')
> @@ -900,6 +901,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
> disc.raid_disk = 0;
> }
>
> + if (map_lock(&map)) {
> + pr_err("failed to get exclusive lock on mapfile when add disk\n");
> + return -1;
> + }
> if (array->not_persistent==0) {
> int dfd;
> if (dv->disposition == 'j')
> @@ -911,9 +916,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
> dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
> if (tst->ss->add_to_super(tst, &disc, dfd,
> dv->devname, INVALID_SECTORS))
> - return -1;
> + goto unlock;
> if (tst->ss->write_init_super(tst))
> - return -1;
> + goto unlock;
> } else if (dv->disposition == 'A') {
> /* this had better be raid1.
> * As we are "--re-add"ing we must find a spare slot
> @@ -971,14 +976,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
> pr_err("add failed for %s: could not get exclusive access to container\n",
> dv->devname);
> tst->ss->free_super(tst);
> - return -1;
> + goto unlock;
> }
>
> /* Check if metadata handler is able to accept the drive */
> if (!tst->ss->validate_geometry(tst, LEVEL_CONTAINER, 0, 1, NULL,
> 0, 0, dv->devname, NULL, 0, 1)) {
> close(container_fd);
> - return -1;
> + goto unlock;
> }
>
> Kill(dv->devname, NULL, 0, -1, 0);
> @@ -987,7 +992,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
> dv->devname, INVALID_SECTORS)) {
> close(dfd);
> close(container_fd);
> - return -1;
> + goto unlock;
> }
> if (!mdmon_running(tst->container_devnm))
> tst->ss->sync_metadata(tst);
> @@ -998,7 +1003,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
> dv->devname);
> close(container_fd);
> tst->ss->free_super(tst);
> - return -1;
> + goto unlock;
> }
> sra->array.level = LEVEL_CONTAINER;
> /* Need to set data_offset and component_size */
> @@ -1013,7 +1018,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
> pr_err("add new device to external metadata failed for %s\n", dv->devname);
> close(container_fd);
> sysfs_free(sra);
> - return -1;
> + goto unlock;
> }
> ping_monitor(devnm);
> sysfs_free(sra);
> @@ -1027,7 +1032,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
> else
> pr_err("add new device failed for %s as %d: %s\n",
> dv->devname, j, strerror(errno));
> - return -1;
> + goto unlock;
> }
> if (dv->disposition == 'j') {
> pr_err("Journal added successfully, making %s read-write\n", devname);
> @@ -1038,7 +1043,11 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
> }
> if (verbose >= 0)
> pr_err("added %s\n", dv->devname);
> + map_unlock(&map);
> return 1;
> +unlock:
> + map_unlock(&map);
> + return -1;
> }
>
> int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
2023-04-23 1:30 ` Li Xiao Keng
@ 2023-05-08 1:30 ` Li Xiao Keng
2023-05-08 1:40 ` Coly Li
0 siblings, 1 reply; 13+ messages in thread
From: Li Xiao Keng @ 2023-05-08 1:30 UTC (permalink / raw)
To: jes, mwilck, pmenzel, colyli, linux-raid; +Cc: miaoguanqin, louhongxiang
ping
On 2023/4/23 9:30, Li Xiao Keng wrote:
> ping
>
> On 2023/4/17 22:01, Li Xiao Keng wrote:
>> When we add a new disk to a raid, it may return -EBUSY.
>>
>> The main process of --add:
>> 1. dev_open
>> 2. store_super1(st, di->fd) in write_init_super1
>> 3. fsync(di->fd) in write_init_super1
>> 4. close(di->fd)
>> 5. ioctl(ADD_NEW_DISK)
>>
>> However, there will be some udev(change) event after step4. Then
>> "/usr/sbin/mdadm --incremental ..." will be run, and the new disk
>> will be add to md device. After that, ioctl will return -EBUSY.
>>
>> Here we add map_lock before write_init_super in "mdadm --add"
>> to fix this race.
>>
>> Signed-off-by: Li Xiao Keng <lixiaokeng@huawei.com>
>> Signed-off-by: Guanqin Miao <miaoguanqin@huawei.com>
>> ---
>> Assemble.c | 5 ++++-
>> Manage.c | 25 +++++++++++++++++--------
>> 2 files changed, 21 insertions(+), 9 deletions(-)
>>
>> diff --git a/Assemble.c b/Assemble.c
>> index 49804941..086890ed 100644
>> --- a/Assemble.c
>> +++ b/Assemble.c
>> @@ -1479,8 +1479,11 @@ try_again:
>> * to our list. We flag them so that we don't try to re-add,
>> * but can remove if they turn out to not be wanted.
>> */
>> - if (map_lock(&map))
>> + if (map_lock(&map)) {
>> pr_err("failed to get exclusive lock on mapfile - continue anyway...\n");
>> + return 1;
>> + }
>> +
>> if (c->update == UOPT_UUID)
>> mp = NULL;
>> else
>> diff --git a/Manage.c b/Manage.c
>> index f54de7c6..6a101bae 100644
>> --- a/Manage.c
>> +++ b/Manage.c
>> @@ -703,6 +703,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>> struct supertype *dev_st;
>> int j;
>> mdu_disk_info_t disc;
>> + struct map_ent *map = NULL;
>>
>> if (!get_dev_size(tfd, dv->devname, &ldsize)) {
>> if (dv->disposition == 'M')
>> @@ -900,6 +901,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>> disc.raid_disk = 0;
>> }
>>
>> + if (map_lock(&map)) {
>> + pr_err("failed to get exclusive lock on mapfile when add disk\n");
>> + return -1;
>> + }
>> if (array->not_persistent==0) {
>> int dfd;
>> if (dv->disposition == 'j')
>> @@ -911,9 +916,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>> dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
>> if (tst->ss->add_to_super(tst, &disc, dfd,
>> dv->devname, INVALID_SECTORS))
>> - return -1;
>> + goto unlock;
>> if (tst->ss->write_init_super(tst))
>> - return -1;
>> + goto unlock;
>> } else if (dv->disposition == 'A') {
>> /* this had better be raid1.
>> * As we are "--re-add"ing we must find a spare slot
>> @@ -971,14 +976,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>> pr_err("add failed for %s: could not get exclusive access to container\n",
>> dv->devname);
>> tst->ss->free_super(tst);
>> - return -1;
>> + goto unlock;
>> }
>>
>> /* Check if metadata handler is able to accept the drive */
>> if (!tst->ss->validate_geometry(tst, LEVEL_CONTAINER, 0, 1, NULL,
>> 0, 0, dv->devname, NULL, 0, 1)) {
>> close(container_fd);
>> - return -1;
>> + goto unlock;
>> }
>>
>> Kill(dv->devname, NULL, 0, -1, 0);
>> @@ -987,7 +992,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>> dv->devname, INVALID_SECTORS)) {
>> close(dfd);
>> close(container_fd);
>> - return -1;
>> + goto unlock;
>> }
>> if (!mdmon_running(tst->container_devnm))
>> tst->ss->sync_metadata(tst);
>> @@ -998,7 +1003,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>> dv->devname);
>> close(container_fd);
>> tst->ss->free_super(tst);
>> - return -1;
>> + goto unlock;
>> }
>> sra->array.level = LEVEL_CONTAINER;
>> /* Need to set data_offset and component_size */
>> @@ -1013,7 +1018,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>> pr_err("add new device to external metadata failed for %s\n", dv->devname);
>> close(container_fd);
>> sysfs_free(sra);
>> - return -1;
>> + goto unlock;
>> }
>> ping_monitor(devnm);
>> sysfs_free(sra);
>> @@ -1027,7 +1032,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>> else
>> pr_err("add new device failed for %s as %d: %s\n",
>> dv->devname, j, strerror(errno));
>> - return -1;
>> + goto unlock;
>> }
>> if (dv->disposition == 'j') {
>> pr_err("Journal added successfully, making %s read-write\n", devname);
>> @@ -1038,7 +1043,11 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>> }
>> if (verbose >= 0)
>> pr_err("added %s\n", dv->devname);
>> + map_unlock(&map);
>> return 1;
>> +unlock:
>> + map_unlock(&map);
>> + return -1;
>> }
>>
>> int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
>>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
2023-05-08 1:30 ` Li Xiao Keng
@ 2023-05-08 1:40 ` Coly Li
2023-05-11 14:28 ` Li Xiao Keng
0 siblings, 1 reply; 13+ messages in thread
From: Coly Li @ 2023-05-08 1:40 UTC (permalink / raw)
To: Li Xiao Keng; +Cc: jes, mwilck, pmenzel, linux-raid, miaoguanqin, louhongxiang
> 2023年5月8日 09:30,Li Xiao Keng <lixiaokeng@huawei.com> 写道:
>
> ping
>
> On 2023/4/23 9:30, Li Xiao Keng wrote:
>> ping
>>
>> On 2023/4/17 22:01, Li Xiao Keng wrote:
>>> When we add a new disk to a raid, it may return -EBUSY.
>>>
>>> The main process of --add:
>>> 1. dev_open
>>> 2. store_super1(st, di->fd) in write_init_super1
>>> 3. fsync(di->fd) in write_init_super1
>>> 4. close(di->fd)
>>> 5. ioctl(ADD_NEW_DISK)
>>>
>>> However, there will be some udev(change) event after step4. Then
>>> "/usr/sbin/mdadm --incremental ..." will be run, and the new disk
>>> will be add to md device. After that, ioctl will return -EBUSY.
>>>
Hi Xiao Keng,
The above description of the race is not informative enough, I am aware exactly how the race is from, therefore I am not able to response for the fix.
Coly Li
>>> Here we add map_lock before write_init_super in "mdadm --add"
>>> to fix this race.
>>>
>>> Signed-off-by: Li Xiao Keng <lixiaokeng@huawei.com>
>>> Signed-off-by: Guanqin Miao <miaoguanqin@huawei.com>
>>> ---
>>> Assemble.c | 5 ++++-
>>> Manage.c | 25 +++++++++++++++++--------
>>> 2 files changed, 21 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/Assemble.c b/Assemble.c
>>> index 49804941..086890ed 100644
>>> --- a/Assemble.c
>>> +++ b/Assemble.c
>>> @@ -1479,8 +1479,11 @@ try_again:
>>> * to our list. We flag them so that we don't try to re-add,
>>> * but can remove if they turn out to not be wanted.
>>> */
>>> - if (map_lock(&map))
>>> + if (map_lock(&map)) {
>>> pr_err("failed to get exclusive lock on mapfile - continue anyway...\n");
>>> + return 1;
>>> + }
>>> +
>>> if (c->update == UOPT_UUID)
>>> mp = NULL;
>>> else
>>> diff --git a/Manage.c b/Manage.c
>>> index f54de7c6..6a101bae 100644
>>> --- a/Manage.c
>>> +++ b/Manage.c
>>> @@ -703,6 +703,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>> struct supertype *dev_st;
>>> int j;
>>> mdu_disk_info_t disc;
>>> + struct map_ent *map = NULL;
>>>
>>> if (!get_dev_size(tfd, dv->devname, &ldsize)) {
>>> if (dv->disposition == 'M')
>>> @@ -900,6 +901,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>> disc.raid_disk = 0;
>>> }
>>>
>>> + if (map_lock(&map)) {
>>> + pr_err("failed to get exclusive lock on mapfile when add disk\n");
>>> + return -1;
>>> + }
>>> if (array->not_persistent==0) {
>>> int dfd;
>>> if (dv->disposition == 'j')
>>> @@ -911,9 +916,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>> dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
>>> if (tst->ss->add_to_super(tst, &disc, dfd,
>>> dv->devname, INVALID_SECTORS))
>>> - return -1;
>>> + goto unlock;
>>> if (tst->ss->write_init_super(tst))
>>> - return -1;
>>> + goto unlock;
>>> } else if (dv->disposition == 'A') {
>>> /* this had better be raid1.
>>> * As we are "--re-add"ing we must find a spare slot
>>> @@ -971,14 +976,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>> pr_err("add failed for %s: could not get exclusive access to container\n",
>>> dv->devname);
>>> tst->ss->free_super(tst);
>>> - return -1;
>>> + goto unlock;
>>> }
>>>
>>> /* Check if metadata handler is able to accept the drive */
>>> if (!tst->ss->validate_geometry(tst, LEVEL_CONTAINER, 0, 1, NULL,
>>> 0, 0, dv->devname, NULL, 0, 1)) {
>>> close(container_fd);
>>> - return -1;
>>> + goto unlock;
>>> }
>>>
>>> Kill(dv->devname, NULL, 0, -1, 0);
>>> @@ -987,7 +992,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>> dv->devname, INVALID_SECTORS)) {
>>> close(dfd);
>>> close(container_fd);
>>> - return -1;
>>> + goto unlock;
>>> }
>>> if (!mdmon_running(tst->container_devnm))
>>> tst->ss->sync_metadata(tst);
>>> @@ -998,7 +1003,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>> dv->devname);
>>> close(container_fd);
>>> tst->ss->free_super(tst);
>>> - return -1;
>>> + goto unlock;
>>> }
>>> sra->array.level = LEVEL_CONTAINER;
>>> /* Need to set data_offset and component_size */
>>> @@ -1013,7 +1018,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>> pr_err("add new device to external metadata failed for %s\n", dv->devname);
>>> close(container_fd);
>>> sysfs_free(sra);
>>> - return -1;
>>> + goto unlock;
>>> }
>>> ping_monitor(devnm);
>>> sysfs_free(sra);
>>> @@ -1027,7 +1032,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>> else
>>> pr_err("add new device failed for %s as %d: %s\n",
>>> dv->devname, j, strerror(errno));
>>> - return -1;
>>> + goto unlock;
>>> }
>>> if (dv->disposition == 'j') {
>>> pr_err("Journal added successfully, making %s read-write\n", devname);
>>> @@ -1038,7 +1043,11 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>> }
>>> if (verbose >= 0)
>>> pr_err("added %s\n", dv->devname);
>>> + map_unlock(&map);
>>> return 1;
>>> +unlock:
>>> + map_unlock(&map);
>>> + return -1;
>>> }
>>>
>>> int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
>>>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
2023-04-17 14:01 [PATCH] Fix race of "mdadm --add" and "mdadm --incremental" Li Xiao Keng
2023-04-23 1:30 ` Li Xiao Keng
@ 2023-05-08 13:35 ` Martin Wilck
2023-05-11 14:37 ` Li Xiao Keng
1 sibling, 1 reply; 13+ messages in thread
From: Martin Wilck @ 2023-05-08 13:35 UTC (permalink / raw)
To: Li Xiao Keng, jes, pmenzel, colyli, linux-raid; +Cc: miaoguanqin, louhongxiang
Hello Li Xiao Keng,
On Mon, 2023-04-17 at 22:01 +0800, Li Xiao Keng wrote:
> When we add a new disk to a raid, it may return -EBUSY.
>
> The main process of --add:
> 1. dev_open
> 2. store_super1(st, di->fd) in write_init_super1
> 3. fsync(di->fd) in write_init_super1
> 4. close(di->fd)
> 5. ioctl(ADD_NEW_DISK)
>
> However, there will be some udev(change) event after step4. Then
> "/usr/sbin/mdadm --incremental ..." will be run, and the new disk
> will be add to md device. After that, ioctl will return -EBUSY.
>
> Here we add map_lock before write_init_super in "mdadm --add"
> to fix this race.
>
> Signed-off-by: Li Xiao Keng <lixiaokeng@huawei.com>
> Signed-off-by: Guanqin Miao <miaoguanqin@huawei.com>
I don't feel familiar enough with the mdadm code to write an
authoritative review. In particular, I don't fully understand the
intended semantics of map_lock(); thus while I believe the way you
are using this lock is correct, I can't assert this with certainty.
Jes, Coly, or someone else should double-check that.
Anyway, I'll try to get the communication on this issue going again.
I think you should expand some more on your commit description. It
makes sense for people who followed the previous discussion, but it
should be self-explanatory also for people reading the commit 5y from
now.
Other than that, I only have one nitpick (see below).
Regards
Martin
> ---
> Assemble.c | 5 ++++-
> Manage.c | 25 +++++++++++++++++--------
> 2 files changed, 21 insertions(+), 9 deletions(-)
>
> diff --git a/Assemble.c b/Assemble.c
> index 49804941..086890ed 100644
> --- a/Assemble.c
> +++ b/Assemble.c
> @@ -1479,8 +1479,11 @@ try_again:
> * to our list. We flag them so that we don't try to re-add,
> * but can remove if they turn out to not be wanted.
> */
> - if (map_lock(&map))
> + if (map_lock(&map)) {
> pr_err("failed to get exclusive lock on mapfile -
> continue anyway...\n");
As you added a "return 1" here, the "continue anyway" message is wrong.
You need to change it.
> + return 1;
> + }
> +
> if (c->update == UOPT_UUID)
> mp = NULL;
> else
> diff --git a/Manage.c b/Manage.c
> index f54de7c6..6a101bae 100644
> --- a/Manage.c
> +++ b/Manage.c
> @@ -703,6 +703,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev
> *dv,
> struct supertype *dev_st;
> int j;
> mdu_disk_info_t disc;
> + struct map_ent *map = NULL;
>
> if (!get_dev_size(tfd, dv->devname, &ldsize)) {
> if (dv->disposition == 'M')
> @@ -900,6 +901,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev
> *dv,
> disc.raid_disk = 0;
> }
>
> + if (map_lock(&map)) {
> + pr_err("failed to get exclusive lock on mapfile when
> add disk\n");
> + return -1;
> + }
> if (array->not_persistent==0) {
> int dfd;
> if (dv->disposition == 'j')
> @@ -911,9 +916,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev
> *dv,
> dfd = dev_open(dv->devname, O_RDWR |
> O_EXCL|O_DIRECT);
> if (tst->ss->add_to_super(tst, &disc, dfd,
> dv->devname,
> INVALID_SECTORS))
> - return -1;
> + goto unlock;
> if (tst->ss->write_init_super(tst))
> - return -1;
> + goto unlock;
> } else if (dv->disposition == 'A') {
> /* this had better be raid1.
> * As we are "--re-add"ing we must find a spare slot
> @@ -971,14 +976,14 @@ int Manage_add(int fd, int tfd, struct
> mddev_dev *dv,
> pr_err("add failed for %s: could not get
> exclusive access to container\n",
> dv->devname);
> tst->ss->free_super(tst);
> - return -1;
> + goto unlock;
> }
>
> /* Check if metadata handler is able to accept the
> drive */
> if (!tst->ss->validate_geometry(tst, LEVEL_CONTAINER,
> 0, 1, NULL,
> 0, 0, dv->devname, NULL, 0, 1)) {
> close(container_fd);
> - return -1;
> + goto unlock;
> }
>
> Kill(dv->devname, NULL, 0, -1, 0);
> @@ -987,7 +992,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev
> *dv,
> dv->devname,
> INVALID_SECTORS)) {
> close(dfd);
> close(container_fd);
> - return -1;
> + goto unlock;
> }
> if (!mdmon_running(tst->container_devnm))
> tst->ss->sync_metadata(tst);
> @@ -998,7 +1003,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev
> *dv,
> dv->devname);
> close(container_fd);
> tst->ss->free_super(tst);
> - return -1;
> + goto unlock;
> }
> sra->array.level = LEVEL_CONTAINER;
> /* Need to set data_offset and component_size */
> @@ -1013,7 +1018,7 @@ int Manage_add(int fd, int tfd, struct
> mddev_dev *dv,
> pr_err("add new device to external metadata
> failed for %s\n", dv->devname);
> close(container_fd);
> sysfs_free(sra);
> - return -1;
> + goto unlock;
> }
> ping_monitor(devnm);
> sysfs_free(sra);
> @@ -1027,7 +1032,7 @@ int Manage_add(int fd, int tfd, struct
> mddev_dev *dv,
> else
> pr_err("add new device failed for %s
> as %d: %s\n",
> dv->devname, j,
> strerror(errno));
> - return -1;
> + goto unlock;
> }
> if (dv->disposition == 'j') {
> pr_err("Journal added successfully, making %s
> read-write\n", devname);
> @@ -1038,7 +1043,11 @@ int Manage_add(int fd, int tfd, struct
> mddev_dev *dv,
> }
> if (verbose >= 0)
> pr_err("added %s\n", dv->devname);
> + map_unlock(&map);
> return 1;
> +unlock:
> + map_unlock(&map);
> + return -1;
> }
>
> int Manage_remove(struct supertype *tst, int fd, struct mddev_dev
> *dv,
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
2023-05-08 1:40 ` Coly Li
@ 2023-05-11 14:28 ` Li Xiao Keng
0 siblings, 0 replies; 13+ messages in thread
From: Li Xiao Keng @ 2023-05-11 14:28 UTC (permalink / raw)
To: Coly Li; +Cc: jes, mwilck, pmenzel, linux-raid, miaoguanqin, louhongxiang
On 2023/5/8 9:40, Coly Li wrote:
>
>
>> 2023年5月8日 09:30,Li Xiao Keng <lixiaokeng@huawei.com> 写道:
>>
>> ping
>>
>> On 2023/4/23 9:30, Li Xiao Keng wrote:
>>> ping
>>>
>>> On 2023/4/17 22:01, Li Xiao Keng wrote:
>>>> When we add a new disk to a raid, it may return -EBUSY.
>>>>
>>>> The main process of --add:
>>>> 1. dev_open
>>>> 2. store_super1(st, di->fd) in write_init_super1
>>>> 3. fsync(di->fd) in write_init_super1
>>>> 4. close(di->fd)
>>>> 5. ioctl(ADD_NEW_DISK)
>>>>
>>>> However, there will be some udev(change) event after step4. Then
>>>> "/usr/sbin/mdadm --incremental ..." will be run, and the new disk
>>>> will be add to md device. After that, ioctl will return -EBUSY.
>>>>
>
> Hi Xiao Keng,
>
> The above description of the race is not informative enough, I am aware exactly how the race is from, therefore I am not able to response for the fix.
>
> Coly Li
>
------
There is a raid1 with sda and sdb. And we add sdc to this raid,
it may return -EBUSY.
The main process of --add:
1. dev_open(sdc) in Manage_add
2. store_super1(st, di->fd) in write_init_super1
3. fsync(fd) in store_super1
4. close(di->fd) in write_init_super1
5. ioctl(ADD_NEW_DISK)
Step 2 and 3 will add sdc to metadata of raid1. There will be
udev(change of sdc) event after step4. Then "/usr/sbin/mdadm
--incremental --export $devnode --offroot $env{DEVLINKS}"
will be run, and the sdc will be added to the raid1. Then
step 5 will return -EBUSY because it checks if device isn't
claimed in md_import_device()->lock_rdev()->blkdev_get_by_dev()
->blkdev_get().
It will be confusing for users because sdc is added first time.
The "incremental" will get map_lock before add sdc to raid1.
So we add map_lock before write_init_super in "mdadm --add"
to fix the race of "add" and "incremental".
------
I'm sorry for no detailed description. Do you have any advice
of the new description?
>
>
>
>>>> Here we add map_lock before write_init_super in "mdadm --add"
>>>> to fix this race.
>>>>
>>>> Signed-off-by: Li Xiao Keng <lixiaokeng@huawei.com>
>>>> Signed-off-by: Guanqin Miao <miaoguanqin@huawei.com>
>>>> ---
>>>> Assemble.c | 5 ++++-
>>>> Manage.c | 25 +++++++++++++++++--------
>>>> 2 files changed, 21 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/Assemble.c b/Assemble.c
>>>> index 49804941..086890ed 100644
>>>> --- a/Assemble.c
>>>> +++ b/Assemble.c
>>>> @@ -1479,8 +1479,11 @@ try_again:
>>>> * to our list. We flag them so that we don't try to re-add,
>>>> * but can remove if they turn out to not be wanted.
>>>> */
>>>> - if (map_lock(&map))
>>>> + if (map_lock(&map)) {
>>>> pr_err("failed to get exclusive lock on mapfile - continue anyway...\n");
>>>> + return 1;
>>>> + }
>>>> +
>>>> if (c->update == UOPT_UUID)
>>>> mp = NULL;
>>>> else
>>>> diff --git a/Manage.c b/Manage.c
>>>> index f54de7c6..6a101bae 100644
>>>> --- a/Manage.c
>>>> +++ b/Manage.c
>>>> @@ -703,6 +703,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>>> struct supertype *dev_st;
>>>> int j;
>>>> mdu_disk_info_t disc;
>>>> + struct map_ent *map = NULL;
>>>>
>>>> if (!get_dev_size(tfd, dv->devname, &ldsize)) {
>>>> if (dv->disposition == 'M')
>>>> @@ -900,6 +901,10 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>>> disc.raid_disk = 0;
>>>> }
>>>>
>>>> + if (map_lock(&map)) {
>>>> + pr_err("failed to get exclusive lock on mapfile when add disk\n");
>>>> + return -1;
>>>> + }
>>>> if (array->not_persistent==0) {
>>>> int dfd;
>>>> if (dv->disposition == 'j')
>>>> @@ -911,9 +916,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>>> dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
>>>> if (tst->ss->add_to_super(tst, &disc, dfd,
>>>> dv->devname, INVALID_SECTORS))
>>>> - return -1;
>>>> + goto unlock;
>>>> if (tst->ss->write_init_super(tst))
>>>> - return -1;
>>>> + goto unlock;
>>>> } else if (dv->disposition == 'A') {
>>>> /* this had better be raid1.
>>>> * As we are "--re-add"ing we must find a spare slot
>>>> @@ -971,14 +976,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>>> pr_err("add failed for %s: could not get exclusive access to container\n",
>>>> dv->devname);
>>>> tst->ss->free_super(tst);
>>>> - return -1;
>>>> + goto unlock;
>>>> }
>>>>
>>>> /* Check if metadata handler is able to accept the drive */
>>>> if (!tst->ss->validate_geometry(tst, LEVEL_CONTAINER, 0, 1, NULL,
>>>> 0, 0, dv->devname, NULL, 0, 1)) {
>>>> close(container_fd);
>>>> - return -1;
>>>> + goto unlock;
>>>> }
>>>>
>>>> Kill(dv->devname, NULL, 0, -1, 0);
>>>> @@ -987,7 +992,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>>> dv->devname, INVALID_SECTORS)) {
>>>> close(dfd);
>>>> close(container_fd);
>>>> - return -1;
>>>> + goto unlock;
>>>> }
>>>> if (!mdmon_running(tst->container_devnm))
>>>> tst->ss->sync_metadata(tst);
>>>> @@ -998,7 +1003,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>>> dv->devname);
>>>> close(container_fd);
>>>> tst->ss->free_super(tst);
>>>> - return -1;
>>>> + goto unlock;
>>>> }
>>>> sra->array.level = LEVEL_CONTAINER;
>>>> /* Need to set data_offset and component_size */
>>>> @@ -1013,7 +1018,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>>> pr_err("add new device to external metadata failed for %s\n", dv->devname);
>>>> close(container_fd);
>>>> sysfs_free(sra);
>>>> - return -1;
>>>> + goto unlock;
>>>> }
>>>> ping_monitor(devnm);
>>>> sysfs_free(sra);
>>>> @@ -1027,7 +1032,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>>> else
>>>> pr_err("add new device failed for %s as %d: %s\n",
>>>> dv->devname, j, strerror(errno));
>>>> - return -1;
>>>> + goto unlock;
>>>> }
>>>> if (dv->disposition == 'j') {
>>>> pr_err("Journal added successfully, making %s read-write\n", devname);
>>>> @@ -1038,7 +1043,11 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
>>>> }
>>>> if (verbose >= 0)
>>>> pr_err("added %s\n", dv->devname);
>>>> + map_unlock(&map);
>>>> return 1;
>>>> +unlock:
>>>> + map_unlock(&map);
>>>> + return -1;
>>>> }
>>>>
>>>> int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
>>>>
>
> .
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
2023-05-08 13:35 ` Martin Wilck
@ 2023-05-11 14:37 ` Li Xiao Keng
2023-09-01 16:11 ` Jes Sorensen
0 siblings, 1 reply; 13+ messages in thread
From: Li Xiao Keng @ 2023-05-11 14:37 UTC (permalink / raw)
To: Martin Wilck, jes, pmenzel, colyli, linux-raid; +Cc: miaoguanqin, louhongxiang
On 2023/5/8 21:35, Martin Wilck wrote:
>> - if (map_lock(&map))
>> + if (map_lock(&map)) {
>> pr_err("failed to get exclusive lock on mapfile -
>> continue anyway...\n");
> As you added a "return 1" here, the "continue anyway" message is wrong.
> You need to change it.
>
After resolving doubts of Coly Li, I will change it to
pr_err("failed to get exclusive lock on mapfile when assemble raid.")
Regards,
Li Xiao Keng
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
2023-05-11 14:37 ` Li Xiao Keng
@ 2023-09-01 16:11 ` Jes Sorensen
2023-09-05 7:42 ` Li Xiao Keng
0 siblings, 1 reply; 13+ messages in thread
From: Jes Sorensen @ 2023-09-01 16:11 UTC (permalink / raw)
To: Li Xiao Keng, Martin Wilck, pmenzel, colyli, linux-raid
Cc: miaoguanqin, louhongxiang
On 5/11/23 10:37, Li Xiao Keng wrote:
>
>
> On 2023/5/8 21:35, Martin Wilck wrote:
>>> - if (map_lock(&map))
>>> + if (map_lock(&map)) {
>>> pr_err("failed to get exclusive lock on mapfile -
>>> continue anyway...\n");
>> As you added a "return 1" here, the "continue anyway" message is wrong.
>> You need to change it.
>>
>
> After resolving doubts of Coly Li, I will change it to
>
> pr_err("failed to get exclusive lock on mapfile when assemble raid.")
>
> Regards,
> Li Xiao Keng
Hi Li,
Did you ever post an updated version?
Thanks,
Jes
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
2023-09-01 16:11 ` Jes Sorensen
@ 2023-09-05 7:42 ` Li Xiao Keng
0 siblings, 0 replies; 13+ messages in thread
From: Li Xiao Keng @ 2023-09-05 7:42 UTC (permalink / raw)
To: Jes Sorensen, Martin Wilck, pmenzel, colyli, linux-raid
Cc: miaoguanqin, louhongxiang
On 2023/9/2 0:11, Jes Sorensen wrote:
> On 5/11/23 10:37, Li Xiao Keng wrote:
>>
>>
>> On 2023/5/8 21:35, Martin Wilck wrote:
>>>> - if (map_lock(&map))
>>>> + if (map_lock(&map)) {
>>>> pr_err("failed to get exclusive lock on mapfile -
>>>> continue anyway...\n");
>>> As you added a "return 1" here, the "continue anyway" message is wrong.
>>> You need to change it.
>>>
>>
>> After resolving doubts of Coly Li, I will change it to
>>
>> pr_err("failed to get exclusive lock on mapfile when assemble raid.")
>>
>> Regards,
>> Li Xiao Keng
>
> Hi Li,
>
> Did you ever post an updated version?
Coly Li did not reply me. If there is not other question, I will update and send the patch.
>
> Thanks,
> Jes
>
>
>
> .
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
2023-03-21 9:21 ` Martin Wilck
@ 2023-03-21 12:28 ` Li Xiao Keng
0 siblings, 0 replies; 13+ messages in thread
From: Li Xiao Keng @ 2023-03-21 12:28 UTC (permalink / raw)
To: Martin Wilck, jes, pmenzel, colyli, linux-raid; +Cc: miaoguanqin, louhongxiang
On 2023/3/21 17:21, Martin Wilck wrote:
> On Tue, 2023-03-21 at 16:55 +0800, Li Xiaokeng wrote:
>> From: lixiaokeng <lixiaokeng@huawei.com>
>>
>> When we add a new disk to a raid, it may return -EBUSY.
>>
>> The main process of --add:
>> 1. dev_open
>> 2. store_super1(st, di->fd) in write_init_super1
>> 3. fsync(di->fd) in write_init_super1
>> 4. close(di->fd)
>> 5. ioctl(ADD_NEW_DISK)
>>
>> However, there will be some udev(change) event after step4. Then
>> "/usr/sbin/mdadm --incremental ..." will be run, and the new disk
>> will be add to md device. After that, ioctl will return -EBUSY.
>>
>> Here we add map_lock before write_init_super in "mdadm --add"
>> to fix this race.
>>
>> Signed-off-by: Li Xiao Keng <lixiaokeng@huawei.com>
>
> As noted in the previous thread about the topic, this will only help
> if "mdadm -I" quits when it can't lock the device. Or am I overlooking
> something?
>
I am sorry for missing previous email before sending patch.
At first, I had similar doubts. But I test it, "Incremental" will
be blocked if "Add" uses map_lock. Only when open and flock return
error, map_lock() returns -1. It is hardly to happen. In commit
"ad5bc697a", the return value of map_lock() is not checked. So
I don't change the codes of the original map_lock(). If it's better
to return error when map_lock fails, I will change my patch.
Regards,
Li Xiao Keng
> Martin
>
> .
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
2023-03-21 8:55 Li Xiaokeng
2023-03-21 9:21 ` Martin Wilck
@ 2023-03-21 10:09 ` Paul Menzel
1 sibling, 0 replies; 13+ messages in thread
From: Paul Menzel @ 2023-03-21 10:09 UTC (permalink / raw)
To: Li Xiaokeng; +Cc: jes, mwilck, colyli, linux-raid, miaoguanqin, louhongxiang
Dear Li,
Am 21.03.23 um 09:55 schrieb Li Xiaokeng:
> From: lixiaokeng <lixiaokeng@huawei.com>
Just a note, that your name is not correctly configured on the system
you created the patch on.
[…]
Kind regards,
Paul
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
2023-03-21 8:55 Li Xiaokeng
@ 2023-03-21 9:21 ` Martin Wilck
2023-03-21 12:28 ` Li Xiao Keng
2023-03-21 10:09 ` Paul Menzel
1 sibling, 1 reply; 13+ messages in thread
From: Martin Wilck @ 2023-03-21 9:21 UTC (permalink / raw)
To: Li Xiaokeng, jes, pmenzel, colyli, linux-raid; +Cc: miaoguanqin, louhongxiang
On Tue, 2023-03-21 at 16:55 +0800, Li Xiaokeng wrote:
> From: lixiaokeng <lixiaokeng@huawei.com>
>
> When we add a new disk to a raid, it may return -EBUSY.
>
> The main process of --add:
> 1. dev_open
> 2. store_super1(st, di->fd) in write_init_super1
> 3. fsync(di->fd) in write_init_super1
> 4. close(di->fd)
> 5. ioctl(ADD_NEW_DISK)
>
> However, there will be some udev(change) event after step4. Then
> "/usr/sbin/mdadm --incremental ..." will be run, and the new disk
> will be add to md device. After that, ioctl will return -EBUSY.
>
> Here we add map_lock before write_init_super in "mdadm --add"
> to fix this race.
>
> Signed-off-by: Li Xiao Keng <lixiaokeng@huawei.com>
As noted in the previous thread about the topic, this will only help
if "mdadm -I" quits when it can't lock the device. Or am I overlooking
something?
Martin
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH] Fix race of "mdadm --add" and "mdadm --incremental"
@ 2023-03-21 8:55 Li Xiaokeng
2023-03-21 9:21 ` Martin Wilck
2023-03-21 10:09 ` Paul Menzel
0 siblings, 2 replies; 13+ messages in thread
From: Li Xiaokeng @ 2023-03-21 8:55 UTC (permalink / raw)
To: jes, mwilck, pmenzel, colyli, linux-raid
Cc: miaoguanqin, louhongxiang, lixiaokeng
From: lixiaokeng <lixiaokeng@huawei.com>
When we add a new disk to a raid, it may return -EBUSY.
The main process of --add:
1. dev_open
2. store_super1(st, di->fd) in write_init_super1
3. fsync(di->fd) in write_init_super1
4. close(di->fd)
5. ioctl(ADD_NEW_DISK)
However, there will be some udev(change) event after step4. Then
"/usr/sbin/mdadm --incremental ..." will be run, and the new disk
will be add to md device. After that, ioctl will return -EBUSY.
Here we add map_lock before write_init_super in "mdadm --add"
to fix this race.
Signed-off-by: Li Xiao Keng <lixiaokeng@huawei.com>
---
Manage.c | 22 ++++++++++++++--------
1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/Manage.c b/Manage.c
index fde6aba3..893a2ea8 100644
--- a/Manage.c
+++ b/Manage.c
@@ -720,6 +720,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
struct supertype *dev_st;
int j;
mdu_disk_info_t disc;
+ struct map_ent *map = NULL;
if (!get_dev_size(tfd, dv->devname, &ldsize)) {
if (dv->disposition == 'M')
@@ -917,6 +918,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
disc.raid_disk = 0;
}
+ map_lock(&map);
if (array->not_persistent==0) {
int dfd;
if (dv->disposition == 'j')
@@ -928,9 +930,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT);
if (tst->ss->add_to_super(tst, &disc, dfd,
dv->devname, INVALID_SECTORS))
- return -1;
+ goto unlock;
if (tst->ss->write_init_super(tst))
- return -1;
+ goto unlock;
} else if (dv->disposition == 'A') {
/* this had better be raid1.
* As we are "--re-add"ing we must find a spare slot
@@ -988,14 +990,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
pr_err("add failed for %s: could not get exclusive access to container\n",
dv->devname);
tst->ss->free_super(tst);
- return -1;
+ goto unlock;
}
/* Check if metadata handler is able to accept the drive */
if (!tst->ss->validate_geometry(tst, LEVEL_CONTAINER, 0, 1, NULL,
0, 0, dv->devname, NULL, 0, 1)) {
close(container_fd);
- return -1;
+ goto unlock;
}
Kill(dv->devname, NULL, 0, -1, 0);
@@ -1004,7 +1006,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
dv->devname, INVALID_SECTORS)) {
close(dfd);
close(container_fd);
- return -1;
+ goto unlock;
}
if (!mdmon_running(tst->container_devnm))
tst->ss->sync_metadata(tst);
@@ -1015,7 +1017,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
dv->devname);
close(container_fd);
tst->ss->free_super(tst);
- return -1;
+ goto unlock;
}
sra->array.level = LEVEL_CONTAINER;
/* Need to set data_offset and component_size */
@@ -1030,7 +1032,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
pr_err("add new device to external metadata failed for %s\n", dv->devname);
close(container_fd);
sysfs_free(sra);
- return -1;
+ goto unlock;
}
ping_monitor(devnm);
sysfs_free(sra);
@@ -1044,7 +1046,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
else
pr_err("add new device failed for %s as %d: %s\n",
dv->devname, j, strerror(errno));
- return -1;
+ goto unlock;
}
if (dv->disposition == 'j') {
pr_err("Journal added successfully, making %s read-write\n", devname);
@@ -1055,7 +1057,11 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv,
}
if (verbose >= 0)
pr_err("added %s\n", dv->devname);
+ map_unlock(&map);
return 1;
+unlock:
+ map_unlock(&map);
+ return -1;
}
int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv,
--
2.33.0
^ permalink raw reply related [flat|nested] 13+ messages in thread
end of thread, other threads:[~2023-09-05 16:04 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-17 14:01 [PATCH] Fix race of "mdadm --add" and "mdadm --incremental" Li Xiao Keng
2023-04-23 1:30 ` Li Xiao Keng
2023-05-08 1:30 ` Li Xiao Keng
2023-05-08 1:40 ` Coly Li
2023-05-11 14:28 ` Li Xiao Keng
2023-05-08 13:35 ` Martin Wilck
2023-05-11 14:37 ` Li Xiao Keng
2023-09-01 16:11 ` Jes Sorensen
2023-09-05 7:42 ` Li Xiao Keng
-- strict thread matches above, loose matches on Subject: below --
2023-03-21 8:55 Li Xiaokeng
2023-03-21 9:21 ` Martin Wilck
2023-03-21 12:28 ` Li Xiao Keng
2023-03-21 10:09 ` Paul Menzel
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.