* Re: [PATCH 4/4] btrfs: cleanup barrier_all_devices() to check dev stat flush error
2017-03-13 7:42 ` [PATCH 4/4] btrfs: cleanup barrier_all_devices() to check dev stat flush error Anand Jain
@ 2017-03-13 9:05 ` Qu Wenruo
2017-03-13 16:21 ` Anand Jain
2017-03-14 8:26 ` [PATCH V2 " Anand Jain
` (2 subsequent siblings)
3 siblings, 1 reply; 22+ messages in thread
From: Qu Wenruo @ 2017-03-13 9:05 UTC (permalink / raw)
To: Anand Jain, linux-btrfs; +Cc: dsterba
At 03/13/2017 03:42 PM, Anand Jain wrote:
> The objective of this patch is to cleanup barrier_all_devices()
> so that the error checking is in a separate loop independent of
> of the loop which submits and waits on the device flush requests.
The idea itself is quite good, and we do need it.
>
> By doing this it helps to further develop patches which would tune
> the error-actions as needed.
>
> Here functions such as btrfs_dev_stats_dirty() couldn't be used
> because it doesn't monitor the flush errors BTRFS_DEV_STAT_FLUSH_ERRS.
>
> Signed-off-by: Anand Jain <anand.jain@oracle.com>
> ---
> fs/btrfs/disk-io.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++-------
> 1 file changed, 85 insertions(+), 11 deletions(-)
>
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 5719e036048b..12531a5b14ff 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -3566,6 +3566,76 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
> return 0;
> }
>
> +struct device_checkpoint {
> + struct list_head list;
> + struct btrfs_device *device;
> + int stat_value_checkpoint;
> +};
> +
> +static int add_device_checkpoint(struct list_head *checkpoint,
Could we have another structure instead of list_head to record device
checkpoints?
The list_head is never a meaningful structure under most cases.
> + struct btrfs_device *device)
> +{
> + struct device_checkpoint *cdev =
> + kzalloc(sizeof(struct device_checkpoint), GFP_KERNEL);
> + if (!cdev)
> + return -ENOMEM;
This means that, we must check return value of add_device_checkpoint(),
while later code doesn't check it at all.
> +
> + list_add(&cdev->list, checkpoint);
And I prefer to do extra check, in case such device is already inserted
once.
> +
> + cdev->device = device;
> + cdev->stat_value_checkpoint =
> + btrfs_dev_stat_read(device, BTRFS_DEV_STAT_FLUSH_ERRS);
> +
> + return 0;
> +}
> +
> +static void fini_devices_checkpoint(struct list_head *checkpoint)
Never a fan of the "fini_" naming.
What about "release_"?
> +{
> + struct device_checkpoint *cdev;
> +
> + while(!list_empty(checkpoint)) {
> + cdev = list_entry(checkpoint->next,
> + struct device_checkpoint, list);
> + list_del(&cdev->list);
> + kfree(cdev);
> + }
> +}
> +
> +static int check_stat_flush(struct btrfs_device *dev,
> + struct list_head *checkpoint)
> +{
> + int val;
> + struct device_checkpoint *cdev;
> +
> + list_for_each_entry(cdev, checkpoint, list) {
> + if (cdev->device == dev) {
> + val = btrfs_dev_stat_read(dev,
> + BTRFS_DEV_STAT_FLUSH_ERRS);
> + if (cdev->stat_value_checkpoint != val)
> + return 1;
This check implies that BTRFS_DEV_STAT_FLUSH_ERRS will only be modified
by checkpoint related code.
Or any other modifier can easily break the check, causing false alert.
IIRC that's the reason why I update my previous degraded patch.
Personally speaking, I prefer the patchset to take more usage of the
checkpoint system, or it's a little overkilled for current usage.
Thanks,
Qu
> + }
> + }
> + return 0;
> +}
> +
> +static int check_barrier_error(struct btrfs_fs_devices *fsdevs,
> + struct list_head *checkpoint)
> +{
> + int dropouts = 0;
> + struct btrfs_device *dev;
> +
> + list_for_each_entry_rcu(dev, &fsdevs->devices, dev_list) {
> + if (!dev->bdev || check_stat_flush(dev, checkpoint))
> + dropouts++;
> + }
> +
> + if (dropouts >
> + fsdevs->fs_info->num_tolerated_disk_barrier_failures)
> + return -EIO;
> +
> + return 0;
> +}
> +
> /*
> * send an empty flush down to each device in parallel,
> * then wait for them
> @@ -3574,8 +3644,10 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
> {
> struct list_head *head;
> struct btrfs_device *dev;
> - int dropouts = 0;
> int ret;
> + struct list_head checkpoint;
> +
> + INIT_LIST_HEAD(&checkpoint);
>
> /* send down all the barriers */
> head = &info->fs_devices->devices;
> @@ -3587,29 +3659,31 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
> if (!dev->in_fs_metadata || !dev->writeable)
> continue;
>
> + add_device_checkpoint(&checkpoint, dev);
> ret = write_dev_flush(dev, 0);
> - if (ret)
> + if (ret) {
> + fini_devices_checkpoint(&checkpoint);
> return ret;
> + }
> }
>
> /* wait for all the barriers */
> list_for_each_entry_rcu(dev, head, dev_list) {
> if (dev->missing)
> continue;
> - if (!dev->bdev) {
> - dropouts++;
> + if (!dev->bdev)
> continue;
> - }
> if (!dev->in_fs_metadata || !dev->writeable)
> continue;
>
> - ret = write_dev_flush(dev, 1);
> - if (ret)
> - dropouts++;
> + write_dev_flush(dev, 1);
> }
> - if (dropouts > info->num_tolerated_disk_barrier_failures)
> - return -EIO;
> - return 0;
> +
> + ret = check_barrier_error(info->fs_devices, &checkpoint);
> +
> + fini_devices_checkpoint(&checkpoint);
> +
> + return ret;
> }
>
> int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
>
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 4/4] btrfs: cleanup barrier_all_devices() to check dev stat flush error
2017-03-13 9:05 ` Qu Wenruo
@ 2017-03-13 16:21 ` Anand Jain
2017-03-14 0:28 ` Qu Wenruo
0 siblings, 1 reply; 22+ messages in thread
From: Anand Jain @ 2017-03-13 16:21 UTC (permalink / raw)
To: Qu Wenruo; +Cc: linux-btrfs, dsterba
Thanks for the review..
On 03/13/2017 05:05 PM, Qu Wenruo wrote:
>
>
> At 03/13/2017 03:42 PM, Anand Jain wrote:
>> The objective of this patch is to cleanup barrier_all_devices()
>> so that the error checking is in a separate loop independent of
>> of the loop which submits and waits on the device flush requests.
>
> The idea itself is quite good, and we do need it.
Thanks.
>>
>> By doing this it helps to further develop patches which would tune
>> the error-actions as needed.
>>
>> Here functions such as btrfs_dev_stats_dirty() couldn't be used
>> because it doesn't monitor the flush errors BTRFS_DEV_STAT_FLUSH_ERRS.
>>
>> Signed-off-by: Anand Jain <anand.jain@oracle.com>
>> ---
>> fs/btrfs/disk-io.c | 96
>> +++++++++++++++++++++++++++++++++++++++++++++++-------
>> 1 file changed, 85 insertions(+), 11 deletions(-)
>>
>> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
>> index 5719e036048b..12531a5b14ff 100644
>> --- a/fs/btrfs/disk-io.c
>> +++ b/fs/btrfs/disk-io.c
>> @@ -3566,6 +3566,76 @@ static int write_dev_flush(struct btrfs_device
>> *device, int wait)
>> return 0;
>> }
>>
>> +struct device_checkpoint {
>> + struct list_head list;
>> + struct btrfs_device *device;
>> + int stat_value_checkpoint;
>> +};
>> +
>> +static int add_device_checkpoint(struct list_head *checkpoint,
>
> Could we have another structure instead of list_head to record device
> checkpoints?
> The list_head is never a meaningful structure under most cases.
I didn't understand this, there is device_checkpoint and the context
of struct list_head *checkpoint would start and end within
barrier_all_devices().
>
>> + struct btrfs_device *device)
>> +{
>> + struct device_checkpoint *cdev =
>> + kzalloc(sizeof(struct device_checkpoint), GFP_KERNEL);
>> + if (!cdev)
>> + return -ENOMEM;
>
> This means that, we must check return value of add_device_checkpoint(),
> while later code doesn't check it at all.
oh. I will correct this.
>
>> +
>> + list_add(&cdev->list, checkpoint);
>
> And I prefer to do extra check, in case such device is already inserted
> once.
Hmm with the current code its not at all possible, but let me add it.
>> +
>> + cdev->device = device;
>> + cdev->stat_value_checkpoint =
>> + btrfs_dev_stat_read(device, BTRFS_DEV_STAT_FLUSH_ERRS);
>> +
>> + return 0;
>> +}
>> +
>> +static void fini_devices_checkpoint(struct list_head *checkpoint)
>
> Never a fan of the "fini_" naming.
> What about "release_"?
will change it.
>> +{
>> + struct device_checkpoint *cdev;
>> +
>> + while(!list_empty(checkpoint)) {
>> + cdev = list_entry(checkpoint->next,
>> + struct device_checkpoint, list);
>> + list_del(&cdev->list);
>> + kfree(cdev);
>> + }
>> +}
>> +
>> +static int check_stat_flush(struct btrfs_device *dev,
>> + struct list_head *checkpoint)
>> +{
>> + int val;
>> + struct device_checkpoint *cdev;
>> +
>> + list_for_each_entry(cdev, checkpoint, list) {
>> + if (cdev->device == dev) {
>> + val = btrfs_dev_stat_read(dev,
>> + BTRFS_DEV_STAT_FLUSH_ERRS);
>> + if (cdev->stat_value_checkpoint != val)
>> + return 1;
>
> This check implies that BTRFS_DEV_STAT_FLUSH_ERRS will only be modified
> by checkpoint related code.
> Or any other modifier can easily break the check, causing false alert.
I have already checked it, its not possible with the current code,
or do you see if that is possible with the current code ? or Did I
miss something ?
> IIRC that's the reason why I update my previous degraded patch.
>
>
> Personally speaking, I prefer the patchset to take more usage of the
> checkpoint system, or it's a little overkilled for current usage.
Just want to make sure things are done in the right way.
Thanks, Anand
> Thanks,
> Qu
>
>> + }
>> + }
>> + return 0;
>> +}
>> +
>> +static int check_barrier_error(struct btrfs_fs_devices *fsdevs,
>> + struct list_head *checkpoint)
>> +{
>> + int dropouts = 0;
>> + struct btrfs_device *dev;
>> +
>> + list_for_each_entry_rcu(dev, &fsdevs->devices, dev_list) {
>> + if (!dev->bdev || check_stat_flush(dev, checkpoint))
>> + dropouts++;
>> + }
>> +
>> + if (dropouts >
>> + fsdevs->fs_info->num_tolerated_disk_barrier_failures)
>> + return -EIO;
>> +
>> + return 0;
>> +}
>> +
>> /*
>> * send an empty flush down to each device in parallel,
>> * then wait for them
>> @@ -3574,8 +3644,10 @@ static int barrier_all_devices(struct
>> btrfs_fs_info *info)
>> {
>> struct list_head *head;
>> struct btrfs_device *dev;
>> - int dropouts = 0;
>> int ret;
>> + struct list_head checkpoint;
>> +
>> + INIT_LIST_HEAD(&checkpoint);
>>
>> /* send down all the barriers */
>> head = &info->fs_devices->devices;
>> @@ -3587,29 +3659,31 @@ static int barrier_all_devices(struct
>> btrfs_fs_info *info)
>> if (!dev->in_fs_metadata || !dev->writeable)
>> continue;
>>
>> + add_device_checkpoint(&checkpoint, dev);
>> ret = write_dev_flush(dev, 0);
>> - if (ret)
>> + if (ret) {
>> + fini_devices_checkpoint(&checkpoint);
>> return ret;
>> + }
>> }
>>
>> /* wait for all the barriers */
>> list_for_each_entry_rcu(dev, head, dev_list) {
>> if (dev->missing)
>> continue;
>> - if (!dev->bdev) {
>> - dropouts++;
>> + if (!dev->bdev)
>> continue;
>> - }
>> if (!dev->in_fs_metadata || !dev->writeable)
>> continue;
>>
>> - ret = write_dev_flush(dev, 1);
>> - if (ret)
>> - dropouts++;
>> + write_dev_flush(dev, 1);
>> }
>> - if (dropouts > info->num_tolerated_disk_barrier_failures)
>> - return -EIO;
>> - return 0;
>> +
>> + ret = check_barrier_error(info->fs_devices, &checkpoint);
>> +
>> + fini_devices_checkpoint(&checkpoint);
>> +
>> + return ret;
>> }
>>
>> int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
>>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 4/4] btrfs: cleanup barrier_all_devices() to check dev stat flush error
2017-03-13 16:21 ` Anand Jain
@ 2017-03-14 0:28 ` Qu Wenruo
2017-03-14 3:36 ` Anand Jain
0 siblings, 1 reply; 22+ messages in thread
From: Qu Wenruo @ 2017-03-14 0:28 UTC (permalink / raw)
To: Anand Jain; +Cc: linux-btrfs, dsterba
At 03/14/2017 12:21 AM, Anand Jain wrote:
>
>
> Thanks for the review..
>
>
> On 03/13/2017 05:05 PM, Qu Wenruo wrote:
>>
>>
>> At 03/13/2017 03:42 PM, Anand Jain wrote:
>>> The objective of this patch is to cleanup barrier_all_devices()
>>> so that the error checking is in a separate loop independent of
>>> of the loop which submits and waits on the device flush requests.
>>
>> The idea itself is quite good, and we do need it.
>
> Thanks.
>
>>>
>>> By doing this it helps to further develop patches which would tune
>>> the error-actions as needed.
>>>
>>> Here functions such as btrfs_dev_stats_dirty() couldn't be used
>>> because it doesn't monitor the flush errors BTRFS_DEV_STAT_FLUSH_ERRS.
>>>
>>> Signed-off-by: Anand Jain <anand.jain@oracle.com>
>>> ---
>>> fs/btrfs/disk-io.c | 96
>>> +++++++++++++++++++++++++++++++++++++++++++++++-------
>>> 1 file changed, 85 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
>>> index 5719e036048b..12531a5b14ff 100644
>>> --- a/fs/btrfs/disk-io.c
>>> +++ b/fs/btrfs/disk-io.c
>>> @@ -3566,6 +3566,76 @@ static int write_dev_flush(struct btrfs_device
>>> *device, int wait)
>>> return 0;
>>> }
>>>
>>> +struct device_checkpoint {
>>> + struct list_head list;
>>> + struct btrfs_device *device;
>>> + int stat_value_checkpoint;
>>> +};
>>> +
>>> +static int add_device_checkpoint(struct list_head *checkpoint,
>>
>> Could we have another structure instead of list_head to record device
>> checkpoints?
>> The list_head is never a meaningful structure under most cases.
>
> I didn't understand this, there is device_checkpoint and the context
> of struct list_head *checkpoint would start and end within
> barrier_all_devices().
I just mean to encapsulate the list_head into another structure, e.g,
call it device_checkpoints or something else.
Just a list_head is not quite meaningful.
>
>>
>>> + struct btrfs_device *device)
>>> +{
>>> + struct device_checkpoint *cdev =
>>> + kzalloc(sizeof(struct device_checkpoint), GFP_KERNEL);
>>> + if (!cdev)
>>> + return -ENOMEM;
>>
>> This means that, we must check return value of add_device_checkpoint(),
>> while later code doesn't check it at all.
>
> oh. I will correct this.
>
>>
>>> +
>>> + list_add(&cdev->list, checkpoint);
>>
>> And I prefer to do extra check, in case such device is already inserted
>> once.
>
> Hmm with the current code its not at all possible, but let me add it.
>
>>> +
>>> + cdev->device = device;
>>> + cdev->stat_value_checkpoint =
>>> + btrfs_dev_stat_read(device, BTRFS_DEV_STAT_FLUSH_ERRS);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static void fini_devices_checkpoint(struct list_head *checkpoint)
>>
>> Never a fan of the "fini_" naming.
>> What about "release_"?
>
> will change it.
>
>>> +{
>>> + struct device_checkpoint *cdev;
>>> +
>>> + while(!list_empty(checkpoint)) {
>>> + cdev = list_entry(checkpoint->next,
>>> + struct device_checkpoint, list);
>>> + list_del(&cdev->list);
>>> + kfree(cdev);
>>> + }
>>> +}
>>> +
>>> +static int check_stat_flush(struct btrfs_device *dev,
>>> + struct list_head *checkpoint)
>>> +{
>>> + int val;
>>> + struct device_checkpoint *cdev;
>>> +
>>> + list_for_each_entry(cdev, checkpoint, list) {
>>> + if (cdev->device == dev) {
>>> + val = btrfs_dev_stat_read(dev,
>>> + BTRFS_DEV_STAT_FLUSH_ERRS);
>>> + if (cdev->stat_value_checkpoint != val)
>>> + return 1;
>>
>> This check implies that BTRFS_DEV_STAT_FLUSH_ERRS will only be modified
>> by checkpoint related code.
>> Or any other modifier can easily break the check, causing false alert.
>
> I have already checked it, its not possible with the current code,
> or do you see if that is possible with the current code ? or Did I
> miss something ?
You didn't miss anything.
However I just got the same comment on better not to play around
something inside btrfs_device.
So I'm not sure if it's good to do it this time just for cleaning up
barrier_all_devices().
Thanks,
Qu
>
>> IIRC that's the reason why I update my previous degraded patch.
>>
>>
>> Personally speaking, I prefer the patchset to take more usage of the
>> checkpoint system, or it's a little overkilled for current usage.
>
> Just want to make sure things are done in the right way.
>
>
> Thanks, Anand
>
>
>> Thanks,
>> Qu
>>
>>> + }
>>> + }
>>> + return 0;
>>> +}
>>> +
>>> +static int check_barrier_error(struct btrfs_fs_devices *fsdevs,
>>> + struct list_head *checkpoint)
>>> +{
>>> + int dropouts = 0;
>>> + struct btrfs_device *dev;
>>> +
>>> + list_for_each_entry_rcu(dev, &fsdevs->devices, dev_list) {
>>> + if (!dev->bdev || check_stat_flush(dev, checkpoint))
>>> + dropouts++;
>>> + }
>>> +
>>> + if (dropouts >
>>> + fsdevs->fs_info->num_tolerated_disk_barrier_failures)
>>> + return -EIO;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> /*
>>> * send an empty flush down to each device in parallel,
>>> * then wait for them
>>> @@ -3574,8 +3644,10 @@ static int barrier_all_devices(struct
>>> btrfs_fs_info *info)
>>> {
>>> struct list_head *head;
>>> struct btrfs_device *dev;
>>> - int dropouts = 0;
>>> int ret;
>>> + struct list_head checkpoint;
>>> +
>>> + INIT_LIST_HEAD(&checkpoint);
>>>
>>> /* send down all the barriers */
>>> head = &info->fs_devices->devices;
>>> @@ -3587,29 +3659,31 @@ static int barrier_all_devices(struct
>>> btrfs_fs_info *info)
>>> if (!dev->in_fs_metadata || !dev->writeable)
>>> continue;
>>>
>>> + add_device_checkpoint(&checkpoint, dev);
>>> ret = write_dev_flush(dev, 0);
>>> - if (ret)
>>> + if (ret) {
>>> + fini_devices_checkpoint(&checkpoint);
>>> return ret;
>>> + }
>>> }
>>>
>>> /* wait for all the barriers */
>>> list_for_each_entry_rcu(dev, head, dev_list) {
>>> if (dev->missing)
>>> continue;
>>> - if (!dev->bdev) {
>>> - dropouts++;
>>> + if (!dev->bdev)
>>> continue;
>>> - }
>>> if (!dev->in_fs_metadata || !dev->writeable)
>>> continue;
>>>
>>> - ret = write_dev_flush(dev, 1);
>>> - if (ret)
>>> - dropouts++;
>>> + write_dev_flush(dev, 1);
>>> }
>>> - if (dropouts > info->num_tolerated_disk_barrier_failures)
>>> - return -EIO;
>>> - return 0;
>>> +
>>> + ret = check_barrier_error(info->fs_devices, &checkpoint);
>>> +
>>> + fini_devices_checkpoint(&checkpoint);
>>> +
>>> + return ret;
>>> }
>>>
>>> int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
>>>
>>
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 4/4] btrfs: cleanup barrier_all_devices() to check dev stat flush error
2017-03-14 0:28 ` Qu Wenruo
@ 2017-03-14 3:36 ` Anand Jain
0 siblings, 0 replies; 22+ messages in thread
From: Anand Jain @ 2017-03-14 3:36 UTC (permalink / raw)
To: Qu Wenruo; +Cc: linux-btrfs, dsterba
>>>>
>>>> +struct device_checkpoint {
>>>> + struct list_head list;
>>>> + struct btrfs_device *device;
>>>> + int stat_value_checkpoint;
>>>> +};
>>>> +
>>>> +static int add_device_checkpoint(struct list_head *checkpoint,
>>>
>>> Could we have another structure instead of list_head to record device
>>> checkpoints?
>>> The list_head is never a meaningful structure under most cases.
>>
>> I didn't understand this, there is device_checkpoint and the context
>> of struct list_head *checkpoint would start and end within
>> barrier_all_devices().
>
> I just mean to encapsulate the list_head into another structure, e.g,
> call it device_checkpoints or something else.
>
> Just a list_head is not quite meaningful.
OK. Will do.
>>>> +static int check_stat_flush(struct btrfs_device *dev,
>>>> + struct list_head *checkpoint)
>>>> +{
>>>> + int val;
>>>> + struct device_checkpoint *cdev;
>>>> +
>>>> + list_for_each_entry(cdev, checkpoint, list) {
>>>> + if (cdev->device == dev) {
>>>> + val = btrfs_dev_stat_read(dev,
>>>> + BTRFS_DEV_STAT_FLUSH_ERRS);
>>>> + if (cdev->stat_value_checkpoint != val)
>>>> + return 1;
>>>
>>> This check implies that BTRFS_DEV_STAT_FLUSH_ERRS will only be modified
>>> by checkpoint related code.
>>> Or any other modifier can easily break the check, causing false alert.
>>
>> I have already checked it, its not possible with the current code,
>> or do you see if that is possible with the current code ? or Did I
>> miss something ?
>
> You didn't miss anything.
>
> However I just got the same comment on better not to play around
> something inside btrfs_device.
>
> So I'm not sure if it's good to do it this time just for cleaning up
> barrier_all_devices().
Right. The reason I said that before was first of all the concept
of err_send and err_wait wasn't needed as shown in the patch set
1 to 3 in this patch-set. We have the dev_stat flush which keeps
track of the flush error in the right way.
Next to monitor the change in the flush error instead of checkpoint
probably dev_stat_ccnt is the correct way (which in the long term
we would need that kind on the per error-stat for rest of the
error-stat including flush). But unless we have all the requirements
well understood from the other pending stuff like device disappear
and reappear I won't change the struct btrfs_devices as of now.
Hope this clarifies better. Will send out the patch with the review
comment incorporated. Thanks. On top of which per chunk missing
device check patch can be added.
Thanks, Anand
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH V2 4/4] btrfs: cleanup barrier_all_devices() to check dev stat flush error
2017-03-13 7:42 ` [PATCH 4/4] btrfs: cleanup barrier_all_devices() to check dev stat flush error Anand Jain
2017-03-13 9:05 ` Qu Wenruo
@ 2017-03-14 8:26 ` Anand Jain
2017-03-14 8:47 ` Qu Wenruo
2017-03-28 16:19 ` David Sterba
2017-03-31 11:36 ` [PATCH 4/4 V2] " Anand Jain
2017-04-05 4:07 ` [PATCH 4/4 V3] " Anand Jain
3 siblings, 2 replies; 22+ messages in thread
From: Anand Jain @ 2017-03-14 8:26 UTC (permalink / raw)
To: linux-btrfs; +Cc: dsterba, quwenruo
The objective of this patch is to cleanup barrier_all_devices()
so that the error checking is in a separate loop independent of
of the loop which submits and waits on the device flush requests.
By doing this it helps to further develop patches which would tune
the error-actions as needed.
Here functions such as btrfs_dev_stats_dirty() couldn't be used
because it doesn't monitor the flush errors BTRFS_DEV_STAT_FLUSH_ERRS.
Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
v2: Address Qu review comments viz..
Add meaningful names, like cp_list (for checkpoint_list head).
(And actually it does not need a new struct type just to hold
the head pointer, list node is already named as device_checkpoint).
Check return value of add_device_checkpoint()
Check if the device is already added at add_device_checkpoint()
Rename fini_devices_checkpoint() to rel_devices_checkpoint()
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5719e036048b..d0c401884643 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3566,6 +3566,86 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
return 0;
}
+struct device_checkpoint {
+ struct list_head cp_node;
+ struct btrfs_device *device;
+ int stat_value_checkpoint;
+};
+
+static int add_device_checkpoint(struct btrfs_device *device,
+ struct list_head *cp_list)
+{
+ struct device_checkpoint *cdev;
+
+ list_for_each_entry(cdev, cp_list, cp_node) {
+ if (cdev->device == device) {
+ cdev->stat_value_checkpoint =
+ btrfs_dev_stat_read(device,
+ BTRFS_DEV_STAT_FLUSH_ERRS);
+ return 0;
+ }
+ }
+
+ cdev = kzalloc(sizeof(struct device_checkpoint), GFP_KERNEL);
+ if (!cdev)
+ return -ENOMEM;
+
+ list_add(&cdev->cp_node, cp_list);
+
+ cdev->device = device;
+ cdev->stat_value_checkpoint =
+ btrfs_dev_stat_read(device, BTRFS_DEV_STAT_FLUSH_ERRS);
+
+ return 0;
+}
+
+static void rel_devices_checkpoint(struct list_head *cp_list)
+{
+ struct device_checkpoint *cdev;
+
+ while(!list_empty(cp_list)) {
+ cdev = list_entry(cp_list->next,
+ struct device_checkpoint, cp_node);
+ list_del(&cdev->cp_node);
+ kfree(cdev);
+ }
+}
+
+static int check_stat_flush(struct btrfs_device *dev,
+ struct list_head *cp_list)
+{
+ int val;
+ struct device_checkpoint *cdev;
+
+ list_for_each_entry(cdev, cp_list, cp_node) {
+ if (cdev->device == dev) {
+ val = btrfs_dev_stat_read(dev,
+ BTRFS_DEV_STAT_FLUSH_ERRS);
+ if (cdev->stat_value_checkpoint != val)
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static int check_barrier_error(struct btrfs_fs_devices *fsdevs,
+ struct list_head *cp_list)
+{
+ int dropouts = 0;
+ struct btrfs_device *dev;
+
+ list_for_each_entry_rcu(dev, &fsdevs->devices, dev_list) {
+ if (!dev->bdev || check_stat_flush(dev, cp_list))
+ dropouts++;
+ }
+
+ if (dropouts >
+ fsdevs->fs_info->num_tolerated_disk_barrier_failures)
+ return -EIO;
+
+ return 0;
+}
+
/*
* send an empty flush down to each device in parallel,
* then wait for them
@@ -3574,8 +3654,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
{
struct list_head *head;
struct btrfs_device *dev;
- int dropouts = 0;
int ret;
+ static LIST_HEAD(cp_list);
/* send down all the barriers */
head = &info->fs_devices->devices;
@@ -3587,29 +3667,35 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
if (!dev->in_fs_metadata || !dev->writeable)
continue;
+ ret = add_device_checkpoint(dev, &cp_list);
+ if (ret) {
+ rel_devices_checkpoint(&cp_list);
+ return ret;
+ }
ret = write_dev_flush(dev, 0);
- if (ret)
+ if (ret) {
+ rel_devices_checkpoint(&cp_list);
return ret;
+ }
}
/* wait for all the barriers */
list_for_each_entry_rcu(dev, head, dev_list) {
if (dev->missing)
continue;
- if (!dev->bdev) {
- dropouts++;
+ if (!dev->bdev)
continue;
- }
if (!dev->in_fs_metadata || !dev->writeable)
continue;
- ret = write_dev_flush(dev, 1);
- if (ret)
- dropouts++;
+ write_dev_flush(dev, 1);
}
- if (dropouts > info->num_tolerated_disk_barrier_failures)
- return -EIO;
- return 0;
+
+ ret = check_barrier_error(info->fs_devices, &cp_list);
+
+ rel_devices_checkpoint(&cp_list);
+
+ return ret;
}
int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
--
2.10.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH V2 4/4] btrfs: cleanup barrier_all_devices() to check dev stat flush error
2017-03-14 8:26 ` [PATCH V2 " Anand Jain
@ 2017-03-14 8:47 ` Qu Wenruo
2017-03-28 16:19 ` David Sterba
1 sibling, 0 replies; 22+ messages in thread
From: Qu Wenruo @ 2017-03-14 8:47 UTC (permalink / raw)
To: Anand Jain, linux-btrfs; +Cc: dsterba
At 03/14/2017 04:26 PM, Anand Jain wrote:
> The objective of this patch is to cleanup barrier_all_devices()
> so that the error checking is in a separate loop independent of
> of the loop which submits and waits on the device flush requests.
>
> By doing this it helps to further develop patches which would tune
> the error-actions as needed.
>
> Here functions such as btrfs_dev_stats_dirty() couldn't be used
> because it doesn't monitor the flush errors BTRFS_DEV_STAT_FLUSH_ERRS.
>
> Signed-off-by: Anand Jain <anand.jain@oracle.com>
> ---
> v2: Address Qu review comments viz..
> Add meaningful names, like cp_list (for checkpoint_list head).
> (And actually it does not need a new struct type just to hold
> the head pointer, list node is already named as device_checkpoint).
> Check return value of add_device_checkpoint()
> Check if the device is already added at add_device_checkpoint()
> Rename fini_devices_checkpoint() to rel_devices_checkpoint()
>
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 5719e036048b..d0c401884643 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -3566,6 +3566,86 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
> return 0;
> }
>
> +struct device_checkpoint {
> + struct list_head cp_node;
> + struct btrfs_device *device;
> + int stat_value_checkpoint;
> +};
> +
> +static int add_device_checkpoint(struct btrfs_device *device,
> + struct list_head *cp_list)
> +{
> + struct device_checkpoint *cdev;
> +
> + list_for_each_entry(cdev, cp_list, cp_node) {
> + if (cdev->device == device) {
> + cdev->stat_value_checkpoint =
> + btrfs_dev_stat_read(device,
> + BTRFS_DEV_STAT_FLUSH_ERRS);
> + return 0;
> + }
> + }
> +
> + cdev = kzalloc(sizeof(struct device_checkpoint), GFP_KERNEL);
> + if (!cdev)
> + return -ENOMEM;
> +
> + list_add(&cdev->cp_node, cp_list);
> +
> + cdev->device = device;
> + cdev->stat_value_checkpoint =
> + btrfs_dev_stat_read(device, BTRFS_DEV_STAT_FLUSH_ERRS);
> +
> + return 0;
> +}
> +
> +static void rel_devices_checkpoint(struct list_head *cp_list)
I meant "release_devices_checkpoint", 'fini' and 'rel' is too short for
to be meaningful.
Despite of that, looks not too bad to me.
Although I prefer to do extra locking to protect the operation to make
the checkpoint system more independent and robust, but it can be done later.
So far so good. Although I'm still not sure if other reviewers will have
other comments.
Thanks,
Qu
> +{
> + struct device_checkpoint *cdev;
> +
> + while(!list_empty(cp_list)) {
> + cdev = list_entry(cp_list->next,
> + struct device_checkpoint, cp_node);
> + list_del(&cdev->cp_node);
> + kfree(cdev);
> + }
> +}
> +
> +static int check_stat_flush(struct btrfs_device *dev,
> + struct list_head *cp_list)
> +{
> + int val;
> + struct device_checkpoint *cdev;
> +
> + list_for_each_entry(cdev, cp_list, cp_node) {
> + if (cdev->device == dev) {
> + val = btrfs_dev_stat_read(dev,
> + BTRFS_DEV_STAT_FLUSH_ERRS);
> + if (cdev->stat_value_checkpoint != val)
> + return 1;
> + }
> + }
> + return 0;
> +}
> +
> +static int check_barrier_error(struct btrfs_fs_devices *fsdevs,
> + struct list_head *cp_list)
> +{
> + int dropouts = 0;
> + struct btrfs_device *dev;
> +
> + list_for_each_entry_rcu(dev, &fsdevs->devices, dev_list) {
> + if (!dev->bdev || check_stat_flush(dev, cp_list))
> + dropouts++;
> + }
> +
> + if (dropouts >
> + fsdevs->fs_info->num_tolerated_disk_barrier_failures)
> + return -EIO;
> +
> + return 0;
> +}
> +
> /*
> * send an empty flush down to each device in parallel,
> * then wait for them
> @@ -3574,8 +3654,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
> {
> struct list_head *head;
> struct btrfs_device *dev;
> - int dropouts = 0;
> int ret;
> + static LIST_HEAD(cp_list);
>
> /* send down all the barriers */
> head = &info->fs_devices->devices;
> @@ -3587,29 +3667,35 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
> if (!dev->in_fs_metadata || !dev->writeable)
> continue;
>
> + ret = add_device_checkpoint(dev, &cp_list);
> + if (ret) {
> + rel_devices_checkpoint(&cp_list);
> + return ret;
> + }
> ret = write_dev_flush(dev, 0);
> - if (ret)
> + if (ret) {
> + rel_devices_checkpoint(&cp_list);
> return ret;
> + }
> }
>
> /* wait for all the barriers */
> list_for_each_entry_rcu(dev, head, dev_list) {
> if (dev->missing)
> continue;
> - if (!dev->bdev) {
> - dropouts++;
> + if (!dev->bdev)
> continue;
> - }
> if (!dev->in_fs_metadata || !dev->writeable)
> continue;
>
> - ret = write_dev_flush(dev, 1);
> - if (ret)
> - dropouts++;
> + write_dev_flush(dev, 1);
> }
> - if (dropouts > info->num_tolerated_disk_barrier_failures)
> - return -EIO;
> - return 0;
> +
> + ret = check_barrier_error(info->fs_devices, &cp_list);
> +
> + rel_devices_checkpoint(&cp_list);
> +
> + return ret;
> }
>
> int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
>
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH V2 4/4] btrfs: cleanup barrier_all_devices() to check dev stat flush error
2017-03-14 8:26 ` [PATCH V2 " Anand Jain
2017-03-14 8:47 ` Qu Wenruo
@ 2017-03-28 16:19 ` David Sterba
2017-03-29 10:00 ` Anand Jain
1 sibling, 1 reply; 22+ messages in thread
From: David Sterba @ 2017-03-28 16:19 UTC (permalink / raw)
To: Anand Jain; +Cc: linux-btrfs, dsterba, quwenruo
On Tue, Mar 14, 2017 at 04:26:11PM +0800, Anand Jain wrote:
> The objective of this patch is to cleanup barrier_all_devices()
> so that the error checking is in a separate loop independent of
> of the loop which submits and waits on the device flush requests.
I think that getting completely rid of the ENOMEM as suggested under
patch 2, the split would not be necessary.
The send failures will be gone so we can simply count failures from the
waiting write_dev_flush(..., 1). There any returned error also implies
the device stat increase for FLUSH_ERRS. Then barrier_all_devices will
be a bit simpler.
> By doing this it helps to further develop patches which would tune
> the error-actions as needed.
>
> Here functions such as btrfs_dev_stats_dirty() couldn't be used
> because it doesn't monitor the flush errors BTRFS_DEV_STAT_FLUSH_ERRS.
>
> Signed-off-by: Anand Jain <anand.jain@oracle.com>
> ---
> v2: Address Qu review comments viz..
> Add meaningful names, like cp_list (for checkpoint_list head).
> (And actually it does not need a new struct type just to hold
> the head pointer, list node is already named as device_checkpoint).
> Check return value of add_device_checkpoint()
> Check if the device is already added at add_device_checkpoint()
> Rename fini_devices_checkpoint() to rel_devices_checkpoint()
>
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 5719e036048b..d0c401884643 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -3566,6 +3566,86 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
> return 0;
> }
>
> +struct device_checkpoint {
> + struct list_head cp_node;
> + struct btrfs_device *device;
> + int stat_value_checkpoint;
> +};
I find this approach particularly bad for several reasons.
You need to store just one int per device but introduce a bloated
temporary structure. If anything, the int could be embeded in
btrfs_device, set and checked at the right points. Instead, you add
memory allocations to a critical context and more potential failure
paths.
> +static int add_device_checkpoint(struct btrfs_device *device,
> + struct list_head *cp_list)
> +{
...
> + cdev = kzalloc(sizeof(struct device_checkpoint), GFP_KERNEL);
...
> +}
The memory allocations are GFP_KERNEL, this could lead to strange
lockups if the allocator tries to free memory and asks the filesystem(s)
to flush their data.
Traversing the structures leads to quadratic complexity in
check_barrier_error(), where the checkpoint list is traversed for
each iteration of device list.
> @@ -3574,8 +3654,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
> {
> struct list_head *head;
> struct btrfs_device *dev;
> - int dropouts = 0;
> int ret;
> + static LIST_HEAD(cp_list);
^^^^^^
What is this supposed to mean? What if several filesystems end up in
barrier_all_devices modifying the list?
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH V2 4/4] btrfs: cleanup barrier_all_devices() to check dev stat flush error
2017-03-28 16:19 ` David Sterba
@ 2017-03-29 10:00 ` Anand Jain
0 siblings, 0 replies; 22+ messages in thread
From: Anand Jain @ 2017-03-29 10:00 UTC (permalink / raw)
To: dsterba, linux-btrfs, quwenruo
On 03/29/2017 12:19 AM, David Sterba wrote:
> On Tue, Mar 14, 2017 at 04:26:11PM +0800, Anand Jain wrote:
>> The objective of this patch is to cleanup barrier_all_devices()
>> so that the error checking is in a separate loop independent of
>> of the loop which submits and waits on the device flush requests.
>
> I think that getting completely rid of the ENOMEM as suggested under
> patch 2, the split would not be necessary.
>
> The send failures will be gone so we can simply count failures from the
> waiting write_dev_flush(..., 1). There any returned error also implies
> the device stat increase for FLUSH_ERRS. Then barrier_all_devices will
> be a bit simpler.
However we need per device flush error, so we can deduce if
the volume is degrade-mountable [1]
(and so the temporary shelter for the per-device error was necessary).
[1] btrfs: Introduce a function to check if all chunks a OK for
degraded rw mount
>> By doing this it helps to further develop patches which would tune
>> the error-actions as needed.
>>
>> Here functions such as btrfs_dev_stats_dirty() couldn't be used
>> because it doesn't monitor the flush errors BTRFS_DEV_STAT_FLUSH_ERRS.
>>
>> Signed-off-by: Anand Jain <anand.jain@oracle.com>
>> ---
>> v2: Address Qu review comments viz..
>> Add meaningful names, like cp_list (for checkpoint_list head).
>> (And actually it does not need a new struct type just to hold
>> the head pointer, list node is already named as device_checkpoint).
>> Check return value of add_device_checkpoint()
>> Check if the device is already added at add_device_checkpoint()
>> Rename fini_devices_checkpoint() to rel_devices_checkpoint()
>>
>> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
>> index 5719e036048b..d0c401884643 100644
>> --- a/fs/btrfs/disk-io.c
>> +++ b/fs/btrfs/disk-io.c
>> @@ -3566,6 +3566,86 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
>> return 0;
>> }
>>
>> +struct device_checkpoint {
>> + struct list_head cp_node;
>> + struct btrfs_device *device;
>> + int stat_value_checkpoint;
>> +};
>
> I find this approach particularly bad for several reasons.
>
> You need to store just one int per device but introduce a bloated
> temporary structure. If anything, the int could be embeded in
> btrfs_device, set and checked at the right points. Instead, you add
> memory allocations to a critical context and more potential failure
> paths.
Right. As of now we are using dev_stats_ccnt to flag if the dev_stat
contents have changed in general, however its not specific to one of
the 5 error in which flush error is one among them. Here we are
tracking only the flush errors occurred in this-commit do-barrier
request.
I think one other idea is to track flush error similar to
dev_stats_ccnt but limited only to flush error. However in the long
term we may end up similarly tracking the other errors too,
and I was bit concerned about that, so limited the changes to local.
But now I think its good to try if you are ok, Unless there is any
other better way ?
Thanks for pointing out other misses as below, the above new plan
will fix them as well.
Thanks, Anand
>> +static int add_device_checkpoint(struct btrfs_device *device,
>> + struct list_head *cp_list)
>> +{
> ...
>> + cdev = kzalloc(sizeof(struct device_checkpoint), GFP_KERNEL);
> ...
>> +}
>
> The memory allocations are GFP_KERNEL, this could lead to strange
> lockups if the allocator tries to free memory and asks the filesystem(s)
> to flush their data.
>
> Traversing the structures leads to quadratic complexity in
> check_barrier_error(), where the checkpoint list is traversed for
> each iteration of device list.
>> @@ -3574,8 +3654,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
>> {
>> struct list_head *head;
>> struct btrfs_device *dev;
>> - int dropouts = 0;
>> int ret;
>> + static LIST_HEAD(cp_list);
> ^^^^^^
>
> What is this supposed to mean? What if several filesystems end up in
> barrier_all_devices modifying the list?
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH 4/4 V2] btrfs: cleanup barrier_all_devices() to check dev stat flush error
2017-03-13 7:42 ` [PATCH 4/4] btrfs: cleanup barrier_all_devices() to check dev stat flush error Anand Jain
2017-03-13 9:05 ` Qu Wenruo
2017-03-14 8:26 ` [PATCH V2 " Anand Jain
@ 2017-03-31 11:36 ` Anand Jain
2017-04-05 4:07 ` [PATCH 4/4 V3] " Anand Jain
3 siblings, 0 replies; 22+ messages in thread
From: Anand Jain @ 2017-03-31 11:36 UTC (permalink / raw)
To: linux-btrfs; +Cc: dsterba
The objective of this patch is to cleanup barrier_all_devices()
so that the error checking is in a separate loop independent of
of the loop which submits and waits on the device flush requests.
By doing this it helps to further develop patches which would tune
the error-actions as needed.
Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
V2: Now the flush error return is saved and checked instead of the
checkpoint of the dev_stat method earlier.
fs/btrfs/disk-io.c | 32 ++++++++++++++++++++++++++++++--
1 file changed, 30 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f8f534a32c2f..b6d047250ce2 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3535,6 +3535,23 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
return 0;
}
+static int check_barrier_error(struct btrfs_fs_devices *fsdevs)
+{
+ int dropouts = 0;
+ struct btrfs_device *dev;
+
+ list_for_each_entry_rcu(dev, &fsdevs->devices, dev_list) {
+ if (!dev->bdev || dev->last_flush_error)
+ dropouts++;
+ }
+
+ if (dropouts >
+ fsdevs->fs_info->num_tolerated_disk_barrier_failures)
+ return -EIO;
+
+ return 0;
+}
+
/*
* send an empty flush down to each device in parallel,
* then wait for them
@@ -3572,8 +3589,19 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
if (write_dev_flush(dev, 1))
dropouts++;
}
- if (dropouts > info->num_tolerated_disk_barrier_failures)
- return -EIO;
+
+ /*
+ * A slight optimization, we check for dropouts here which avoids
+ * a dev list loop when disks are healthy.
+ */
+ if (dropouts) {
+ /*
+ * As we need holistic view of the failed disks, so
+ * error checking is pushed to a separate loop.
+ */
+ return check_barrier_error(info->fs_devices);
+ }
+
return 0;
}
--
2.10.0
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 4/4 V3] btrfs: cleanup barrier_all_devices() to check dev stat flush error
2017-03-13 7:42 ` [PATCH 4/4] btrfs: cleanup barrier_all_devices() to check dev stat flush error Anand Jain
` (2 preceding siblings ...)
2017-03-31 11:36 ` [PATCH 4/4 V2] " Anand Jain
@ 2017-04-05 4:07 ` Anand Jain
3 siblings, 0 replies; 22+ messages in thread
From: Anand Jain @ 2017-04-05 4:07 UTC (permalink / raw)
To: linux-btrfs; +Cc: dsterba
The objective of this patch is to cleanup barrier_all_devices()
so that the error checking is in a separate loop independent of
of the loop which submits and waits on the device flush requests.
By doing this it helps to further develop patches which would tune
the error-actions as needed.
Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
v2: Address Qu review comments viz..
Add meaningful names, like cp_list (for checkpoint_list head).
(And actually it does not need a new struct type just to hold
the head pointer, list node is already named as device_checkpoint).
Check return value of add_device_checkpoint()
Check if the device is already added at add_device_checkpoint()
Rename fini_devices_checkpoint() to rel_devices_checkpoint()
v3:
(resent with the correct version of the patch).
Now the flush error return is saved and checked instead of the
checkpoint of the dev_stat method earlier.
fs/btrfs/disk-io.c | 32 ++++++++++++++++++++++++++++++--
1 file changed, 30 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 420753d37e1a..3c476b118440 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3538,6 +3538,23 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
return 0;
}
+static int check_barrier_error(struct btrfs_fs_devices *fsdevs)
+{
+ int dropouts = 0;
+ struct btrfs_device *dev;
+
+ list_for_each_entry_rcu(dev, &fsdevs->devices, dev_list) {
+ if (!dev->bdev || dev->last_flush_error)
+ dropouts++;
+ }
+
+ if (dropouts >
+ fsdevs->fs_info->num_tolerated_disk_barrier_failures)
+ return -EIO;
+
+ return 0;
+}
+
/*
* send an empty flush down to each device in parallel,
* then wait for them
@@ -3575,8 +3592,19 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
if (write_dev_flush(dev, 1))
dropouts++;
}
- if (dropouts > info->num_tolerated_disk_barrier_failures)
- return -EIO;
+
+ /*
+ * A slight optimization, we check for dropouts here which avoids
+ * a dev list loop when disks are healthy.
+ */
+ if (dropouts) {
+ /*
+ * As we need holistic view of the failed disks, so
+ * error checking is pushed to a separate loop.
+ */
+ return check_barrier_error(info->fs_devices);
+ }
+
return 0;
}
--
2.10.0
^ permalink raw reply related [flat|nested] 22+ messages in thread