* [PATCH v2 1/4] md/raid10: cleanup wait_barrier()
2022-09-14 1:49 [PATCH v2 0/4] md/raid10: reduce lock contention for io Yu Kuai
@ 2022-09-14 1:49 ` Yu Kuai
2022-09-14 6:36 ` Paul Menzel
2022-09-14 16:16 ` Logan Gunthorpe
2022-09-14 1:49 ` [PATCH v2 2/4] md/raid10: prevent unnecessary calls to wake_up() in fast path Yu Kuai
` (2 subsequent siblings)
3 siblings, 2 replies; 12+ messages in thread
From: Yu Kuai @ 2022-09-14 1:49 UTC (permalink / raw)
To: song, logang, guoqing.jiang, pmenzel
Cc: linux-raid, linux-kernel, yukuai3, yukuai1, yi.zhang
From: Yu Kuai <yukuai3@huawei.com>
Currently the nasty condition is wait_barrier() is hard to read. This
patch factor out the condition into a function.
There are no functional changes.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
drivers/md/raid10.c | 56 ++++++++++++++++++++++++++-------------------
1 file changed, 32 insertions(+), 24 deletions(-)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 64d6e4cd8a3a..56458a53043d 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -957,44 +957,52 @@ static void lower_barrier(struct r10conf *conf)
wake_up(&conf->wait_barrier);
}
+static bool stop_waiting_barrier(struct r10conf *conf)
+{
+ /* barrier is dropped */
+ if (!conf->barrier)
+ return true;
+
+ /*
+ * If there are already pending requests (preventing the barrier from
+ * rising completely), and the pre-process bio queue isn't empty, then
+ * don't wait, as we need to empty that queue to get the nr_pending
+ * count down.
+ */
+ if (atomic_read(&conf->nr_pending)) {
+ struct bio_list *bio_list = current->bio_list;
+
+ if (bio_list && (!bio_list_empty(&bio_list[0]) ||
+ !bio_list_empty(&bio_list[1])))
+ return true;
+ }
+
+ /* move on if recovery thread is blocked by us */
+ if (conf->mddev->thread->tsk == current &&
+ test_bit(MD_RECOVERY_RUNNING, &conf->mddev->recovery) &&
+ conf->nr_queued > 0)
+ return true;
+
+ return false;
+}
+
static bool wait_barrier(struct r10conf *conf, bool nowait)
{
bool ret = true;
spin_lock_irq(&conf->resync_lock);
if (conf->barrier) {
- struct bio_list *bio_list = current->bio_list;
- conf->nr_waiting++;
- /* Wait for the barrier to drop.
- * However if there are already pending
- * requests (preventing the barrier from
- * rising completely), and the
- * pre-process bio queue isn't empty,
- * then don't wait, as we need to empty
- * that queue to get the nr_pending
- * count down.
- */
/* Return false when nowait flag is set */
if (nowait) {
ret = false;
} else {
+ conf->nr_waiting++;
raid10_log(conf->mddev, "wait barrier");
wait_event_lock_irq(conf->wait_barrier,
- !conf->barrier ||
- (atomic_read(&conf->nr_pending) &&
- bio_list &&
- (!bio_list_empty(&bio_list[0]) ||
- !bio_list_empty(&bio_list[1]))) ||
- /* move on if recovery thread is
- * blocked by us
- */
- (conf->mddev->thread->tsk == current &&
- test_bit(MD_RECOVERY_RUNNING,
- &conf->mddev->recovery) &&
- conf->nr_queued > 0),
+ stop_waiting_barrier(conf),
conf->resync_lock);
+ conf->nr_waiting--;
}
- conf->nr_waiting--;
if (!conf->nr_waiting)
wake_up(&conf->wait_barrier);
}
--
2.31.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/4] md/raid10: cleanup wait_barrier()
2022-09-14 1:49 ` [PATCH v2 1/4] md/raid10: cleanup wait_barrier() Yu Kuai
@ 2022-09-14 6:36 ` Paul Menzel
2022-09-14 10:28 ` Yu Kuai
2022-09-14 16:16 ` Logan Gunthorpe
1 sibling, 1 reply; 12+ messages in thread
From: Paul Menzel @ 2022-09-14 6:36 UTC (permalink / raw)
To: Yu Kuai
Cc: song, logang, guoqing.jiang, linux-raid, linux-kernel, yukuai3, yi.zhang
Dear Yu,
Thank you for the improved patch. Three minor nits.
Am 14.09.22 um 03:49 schrieb Yu Kuai:
> From: Yu Kuai <yukuai3@huawei.com>
In the summary/title, I’d spell *Clean up* with a space. Maybe even use:
md/raid10: Factor out code from wait_barrier() to stop_waiting_barrier()
> Currently the nasty condition is wait_barrier() is hard to read. This
> patch factor out the condition into a function.
The first *is* above can be removed, and factor*s* needs an s.
> There are no functional changes.
>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
> drivers/md/raid10.c | 56 ++++++++++++++++++++++++++-------------------
> 1 file changed, 32 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 64d6e4cd8a3a..56458a53043d 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -957,44 +957,52 @@ static void lower_barrier(struct r10conf *conf)
> wake_up(&conf->wait_barrier);
> }
>
> +static bool stop_waiting_barrier(struct r10conf *conf)
> +{
> + /* barrier is dropped */
> + if (!conf->barrier)
> + return true;
> +
> + /*
> + * If there are already pending requests (preventing the barrier from
> + * rising completely), and the pre-process bio queue isn't empty, then
> + * don't wait, as we need to empty that queue to get the nr_pending
> + * count down.
> + */
> + if (atomic_read(&conf->nr_pending)) {
> + struct bio_list *bio_list = current->bio_list;
> +
> + if (bio_list && (!bio_list_empty(&bio_list[0]) ||
> + !bio_list_empty(&bio_list[1])))
> + return true;
> + }
> +
> + /* move on if recovery thread is blocked by us */
> + if (conf->mddev->thread->tsk == current &&
> + test_bit(MD_RECOVERY_RUNNING, &conf->mddev->recovery) &&
> + conf->nr_queued > 0)
> + return true;
> +
> + return false;
> +}
> +
> static bool wait_barrier(struct r10conf *conf, bool nowait)
> {
> bool ret = true;
>
> spin_lock_irq(&conf->resync_lock);
> if (conf->barrier) {
> - struct bio_list *bio_list = current->bio_list;
> - conf->nr_waiting++;
> - /* Wait for the barrier to drop.
> - * However if there are already pending
> - * requests (preventing the barrier from
> - * rising completely), and the
> - * pre-process bio queue isn't empty,
> - * then don't wait, as we need to empty
> - * that queue to get the nr_pending
> - * count down.
> - */
> /* Return false when nowait flag is set */
> if (nowait) {
> ret = false;
> } else {
> + conf->nr_waiting++;
> raid10_log(conf->mddev, "wait barrier");
> wait_event_lock_irq(conf->wait_barrier,
> - !conf->barrier ||
> - (atomic_read(&conf->nr_pending) &&
> - bio_list &&
> - (!bio_list_empty(&bio_list[0]) ||
> - !bio_list_empty(&bio_list[1]))) ||
> - /* move on if recovery thread is
> - * blocked by us
> - */
> - (conf->mddev->thread->tsk == current &&
> - test_bit(MD_RECOVERY_RUNNING,
> - &conf->mddev->recovery) &&
> - conf->nr_queued > 0),
> + stop_waiting_barrier(conf),
> conf->resync_lock);
> + conf->nr_waiting--;
> }
> - conf->nr_waiting--;
> if (!conf->nr_waiting)
> wake_up(&conf->wait_barrier);
> }
Acked-by: Paul Menzel <pmenzel@molgen.mpg.de>
Kind regards,
Paul
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/4] md/raid10: cleanup wait_barrier()
2022-09-14 6:36 ` Paul Menzel
@ 2022-09-14 10:28 ` Yu Kuai
0 siblings, 0 replies; 12+ messages in thread
From: Yu Kuai @ 2022-09-14 10:28 UTC (permalink / raw)
To: Paul Menzel, Yu Kuai
Cc: song, logang, guoqing.jiang, linux-raid, linux-kernel, yi.zhang,
yukuai (C)
Hi, Paul
在 2022/09/14 14:36, Paul Menzel 写道:
> Dear Yu,
>
>
> Thank you for the improved patch. Three minor nits.
>
> Am 14.09.22 um 03:49 schrieb Yu Kuai:
>> From: Yu Kuai <yukuai3@huawei.com>
>
> In the summary/title, I’d spell *Clean up* with a space. Maybe even use:
>
> md/raid10: Factor out code from wait_barrier() to stop_waiting_barrier()
Ok, sounds good, I'll change that in next version.
>
>> Currently the nasty condition is wait_barrier() is hard to read. This
>> patch factor out the condition into a function.
>
> The first *is* above can be removed, and factor*s* needs an s.
Yes, you're right.
>> There are no functional changes.
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>> drivers/md/raid10.c | 56 ++++++++++++++++++++++++++-------------------
>> 1 file changed, 32 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
>> index 64d6e4cd8a3a..56458a53043d 100644
>> --- a/drivers/md/raid10.c
>> +++ b/drivers/md/raid10.c
>> @@ -957,44 +957,52 @@ static void lower_barrier(struct r10conf *conf)
>> wake_up(&conf->wait_barrier);
>> }
>> +static bool stop_waiting_barrier(struct r10conf *conf)
>> +{
>> + /* barrier is dropped */
>> + if (!conf->barrier)
>> + return true;
>> +
>> + /*
>> + * If there are already pending requests (preventing the barrier
>> from
>> + * rising completely), and the pre-process bio queue isn't empty,
>> then
>> + * don't wait, as we need to empty that queue to get the nr_pending
>> + * count down.
>> + */
>> + if (atomic_read(&conf->nr_pending)) {
>> + struct bio_list *bio_list = current->bio_list;
>> +
>> + if (bio_list && (!bio_list_empty(&bio_list[0]) ||
>> + !bio_list_empty(&bio_list[1])))
>> + return true;
>> + }
>> +
>> + /* move on if recovery thread is blocked by us */
>> + if (conf->mddev->thread->tsk == current &&
>> + test_bit(MD_RECOVERY_RUNNING, &conf->mddev->recovery) &&
>> + conf->nr_queued > 0)
>> + return true;
>> +
>> + return false;
>> +}
>> +
>> static bool wait_barrier(struct r10conf *conf, bool nowait)
>> {
>> bool ret = true;
>> spin_lock_irq(&conf->resync_lock);
>> if (conf->barrier) {
>> - struct bio_list *bio_list = current->bio_list;
>> - conf->nr_waiting++;
>> - /* Wait for the barrier to drop.
>> - * However if there are already pending
>> - * requests (preventing the barrier from
>> - * rising completely), and the
>> - * pre-process bio queue isn't empty,
>> - * then don't wait, as we need to empty
>> - * that queue to get the nr_pending
>> - * count down.
>> - */
>> /* Return false when nowait flag is set */
>> if (nowait) {
>> ret = false;
>> } else {
>> + conf->nr_waiting++;
>> raid10_log(conf->mddev, "wait barrier");
>> wait_event_lock_irq(conf->wait_barrier,
>> - !conf->barrier ||
>> - (atomic_read(&conf->nr_pending) &&
>> - bio_list &&
>> - (!bio_list_empty(&bio_list[0]) ||
>> - !bio_list_empty(&bio_list[1]))) ||
>> - /* move on if recovery thread is
>> - * blocked by us
>> - */
>> - (conf->mddev->thread->tsk == current &&
>> - test_bit(MD_RECOVERY_RUNNING,
>> - &conf->mddev->recovery) &&
>> - conf->nr_queued > 0),
>> + stop_waiting_barrier(conf),
>> conf->resync_lock);
>> + conf->nr_waiting--;
>> }
>> - conf->nr_waiting--;
>> if (!conf->nr_waiting)
>> wake_up(&conf->wait_barrier);
>> }
>
> Acked-by: Paul Menzel <pmenzel@molgen.mpg.de>
Thanks,
Kuai
>
>
> Kind regards,
>
> Paul
>
> .
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/4] md/raid10: cleanup wait_barrier()
2022-09-14 1:49 ` [PATCH v2 1/4] md/raid10: cleanup wait_barrier() Yu Kuai
2022-09-14 6:36 ` Paul Menzel
@ 2022-09-14 16:16 ` Logan Gunthorpe
2022-09-15 7:21 ` Yu Kuai
1 sibling, 1 reply; 12+ messages in thread
From: Logan Gunthorpe @ 2022-09-14 16:16 UTC (permalink / raw)
To: Yu Kuai, song, guoqing.jiang, pmenzel
Cc: linux-raid, linux-kernel, yukuai3, yi.zhang
On 2022-09-13 19:49, Yu Kuai wrote:
> From: Yu Kuai <yukuai3@huawei.com>
>
> Currently the nasty condition is wait_barrier() is hard to read. This
> patch factor out the condition into a function.
>
> There are no functional changes.
>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
> drivers/md/raid10.c | 56 ++++++++++++++++++++++++++-------------------
> 1 file changed, 32 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 64d6e4cd8a3a..56458a53043d 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -957,44 +957,52 @@ static void lower_barrier(struct r10conf *conf)
> wake_up(&conf->wait_barrier);
> }
>
> +static bool stop_waiting_barrier(struct r10conf *conf)
> +{
> + /* barrier is dropped */
> + if (!conf->barrier)
> + return true;
> +
> + /*
> + * If there are already pending requests (preventing the barrier from
> + * rising completely), and the pre-process bio queue isn't empty, then
> + * don't wait, as we need to empty that queue to get the nr_pending
> + * count down.
> + */
> + if (atomic_read(&conf->nr_pending)) {
> + struct bio_list *bio_list = current->bio_list;
I'd probably just put the bio_list declaration at the top of this
function, then the nested if statements are not necessary. The compiler
should be able to optimize the access just fine.
> if (conf->barrier) {
> - struct bio_list *bio_list = current->bio_list;
> - conf->nr_waiting++;
> - /* Wait for the barrier to drop.
> - * However if there are already pending
> - * requests (preventing the barrier from
> - * rising completely), and the
> - * pre-process bio queue isn't empty,
> - * then don't wait, as we need to empty
> - * that queue to get the nr_pending
> - * count down.
> - */
> /* Return false when nowait flag is set */
> if (nowait) {
> ret = false;
> } else {
> + conf->nr_waiting++;
Technically speaking, I think moving nr_waiting counts as a functional
change. As best as I can see it is correct, but it should probably be at
least mentioned in the commit message, or maybe done as a separate
commit with it's own justification. That way if it causes problems down
the road, a bisect will make the issue clearer.
Thanks,
Logan
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/4] md/raid10: cleanup wait_barrier()
2022-09-14 16:16 ` Logan Gunthorpe
@ 2022-09-15 7:21 ` Yu Kuai
2022-09-15 9:12 ` Paul Menzel
0 siblings, 1 reply; 12+ messages in thread
From: Yu Kuai @ 2022-09-15 7:21 UTC (permalink / raw)
To: Logan Gunthorpe, Yu Kuai, song, guoqing.jiang, pmenzel, Paul Menzel
Cc: linux-raid, linux-kernel, yi.zhang, yukuai (C)
Hi,
在 2022/09/15 0:16, Logan Gunthorpe 写道:
>
>
> On 2022-09-13 19:49, Yu Kuai wrote:
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> Currently the nasty condition is wait_barrier() is hard to read. This
>> patch factor out the condition into a function.
>>
>> There are no functional changes.
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>> drivers/md/raid10.c | 56 ++++++++++++++++++++++++++-------------------
>> 1 file changed, 32 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
>> index 64d6e4cd8a3a..56458a53043d 100644
>> --- a/drivers/md/raid10.c
>> +++ b/drivers/md/raid10.c
>> @@ -957,44 +957,52 @@ static void lower_barrier(struct r10conf *conf)
>> wake_up(&conf->wait_barrier);
>> }
>>
>> +static bool stop_waiting_barrier(struct r10conf *conf)
>> +{
>> + /* barrier is dropped */
>> + if (!conf->barrier)
>> + return true;
>> +
>> + /*
>> + * If there are already pending requests (preventing the barrier from
>> + * rising completely), and the pre-process bio queue isn't empty, then
>> + * don't wait, as we need to empty that queue to get the nr_pending
>> + * count down.
>> + */
>> + if (atomic_read(&conf->nr_pending)) {
>> + struct bio_list *bio_list = current->bio_list;
>
> I'd probably just put the bio_list declaration at the top of this
> function, then the nested if statements are not necessary. The compiler
> should be able to optimize the access just fine.
>
>> if (conf->barrier) {
>> - struct bio_list *bio_list = current->bio_list;
>> - conf->nr_waiting++;
>> - /* Wait for the barrier to drop.
>> - * However if there are already pending
>> - * requests (preventing the barrier from
>> - * rising completely), and the
>> - * pre-process bio queue isn't empty,
>> - * then don't wait, as we need to empty
>> - * that queue to get the nr_pending
>> - * count down.
>> - */
>> /* Return false when nowait flag is set */
>> if (nowait) {
>> ret = false;
>> } else {
>> + conf->nr_waiting++;
>
> Technically speaking, I think moving nr_waiting counts as a functional
> change. As best as I can see it is correct, but it should probably be at
> least mentioned in the commit message, or maybe done as a separate
> commit with it's own justification. That way if it causes problems down
> the road, a bisect will make the issue clearer.
Thanks for your advice, I just think increase and decrease nr_waiting in
the case 'nowait' is pointless, and I move it incidentally.
I'll post a separate clean up patch to do that.
Paul, can I still add your Acked-by for this patch?
Thanks,
Kuai
>
> Thanks,
>
> Logan
> .
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/4] md/raid10: cleanup wait_barrier()
2022-09-15 7:21 ` Yu Kuai
@ 2022-09-15 9:12 ` Paul Menzel
0 siblings, 0 replies; 12+ messages in thread
From: Paul Menzel @ 2022-09-15 9:12 UTC (permalink / raw)
To: Yu Kuai
Cc: Logan Gunthorpe, song, guoqing.jiang, linux-raid, linux-kernel,
yi.zhang, yukuai (C)
Dear Yu,
Am 15.09.22 um 09:21 schrieb Yu Kuai:
> 在 2022/09/15 0:16, Logan Gunthorpe 写道:
>> On 2022-09-13 19:49, Yu Kuai wrote:
>>> From: Yu Kuai <yukuai3@huawei.com>
>>>
>>> Currently the nasty condition is wait_barrier() is hard to read. This
>>> patch factor out the condition into a function.
>>>
>>> There are no functional changes.
>>>
>>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>>> ---
>>> drivers/md/raid10.c | 56 ++++++++++++++++++++++++++-------------------
>>> 1 file changed, 32 insertions(+), 24 deletions(-)
>>>
>>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
>>> index 64d6e4cd8a3a..56458a53043d 100644
>>> --- a/drivers/md/raid10.c
>>> +++ b/drivers/md/raid10.c
>>> @@ -957,44 +957,52 @@ static void lower_barrier(struct r10conf *conf)
>>> wake_up(&conf->wait_barrier);
>>> }
>>> +static bool stop_waiting_barrier(struct r10conf *conf)
>>> +{
>>> + /* barrier is dropped */
>>> + if (!conf->barrier)
>>> + return true;
>>> +
>>> + /*
>>> + * If there are already pending requests (preventing the barrier
>>> from
>>> + * rising completely), and the pre-process bio queue isn't
>>> empty, then
>>> + * don't wait, as we need to empty that queue to get the nr_pending
>>> + * count down.
>>> + */
>>> + if (atomic_read(&conf->nr_pending)) {
>>> + struct bio_list *bio_list = current->bio_list;
>>
>> I'd probably just put the bio_list declaration at the top of this
>> function, then the nested if statements are not necessary. The compiler
>> should be able to optimize the access just fine.
>>
>>> if (conf->barrier) {
>>> - struct bio_list *bio_list = current->bio_list;
>>> - conf->nr_waiting++;
>>> - /* Wait for the barrier to drop.
>>> - * However if there are already pending
>>> - * requests (preventing the barrier from
>>> - * rising completely), and the
>>> - * pre-process bio queue isn't empty,
>>> - * then don't wait, as we need to empty
>>> - * that queue to get the nr_pending
>>> - * count down.
>>> - */
>>> /* Return false when nowait flag is set */
>>> if (nowait) {
>>> ret = false;
>>> } else {
>>> + conf->nr_waiting++;
>>
>> Technically speaking, I think moving nr_waiting counts as a functional
>> change. As best as I can see it is correct, but it should probably be at
>> least mentioned in the commit message, or maybe done as a separate
>> commit with it's own justification. That way if it causes problems down
>> the road, a bisect will make the issue clearer.
>
> Thanks for your advice, I just think increase and decrease nr_waiting in
> the case 'nowait' is pointless, and I move it incidentally.
>
> I'll post a separate clean up patch to do that.
>
> Paul, can I still add your Acked-by for this patch?
Yes, sure.
Kind regards,
Paul
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v2 2/4] md/raid10: prevent unnecessary calls to wake_up() in fast path
2022-09-14 1:49 [PATCH v2 0/4] md/raid10: reduce lock contention for io Yu Kuai
2022-09-14 1:49 ` [PATCH v2 1/4] md/raid10: cleanup wait_barrier() Yu Kuai
@ 2022-09-14 1:49 ` Yu Kuai
2022-09-14 10:21 ` Yu Kuai
2022-09-14 1:49 ` [PATCH v2 3/4] md/raid10: fix improper BUG_ON() in raise_barrier() Yu Kuai
2022-09-14 1:49 ` [PATCH v2 4/4] md/raid10: convert resync_lock to use seqlock Yu Kuai
3 siblings, 1 reply; 12+ messages in thread
From: Yu Kuai @ 2022-09-14 1:49 UTC (permalink / raw)
To: song, logang, guoqing.jiang, pmenzel
Cc: linux-raid, linux-kernel, yukuai3, yukuai1, yi.zhang
From: Yu Kuai <yukuai3@huawei.com>
Currently, wake_up() is called unconditionally in fast path such as
raid10_make_request(), which will cause lock contention under high
concurrency:
raid10_make_request
wake_up
__wake_up_common_lock
spin_lock_irqsave
Improve performance by only call wake_up() if waitqueue is not empty.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
drivers/md/raid10.c | 26 ++++++++++++++++----------
1 file changed, 16 insertions(+), 10 deletions(-)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 56458a53043d..0edcd98461fe 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -274,6 +274,12 @@ static void put_buf(struct r10bio *r10_bio)
lower_barrier(conf);
}
+static void wake_up_barrier(struct r10conf *conf)
+{
+ if (wq_has_sleeper(&conf->wait_barrier))
+ wake_up(&conf->wait_barrier);
+}
+
static void reschedule_retry(struct r10bio *r10_bio)
{
unsigned long flags;
@@ -286,7 +292,7 @@ static void reschedule_retry(struct r10bio *r10_bio)
spin_unlock_irqrestore(&conf->device_lock, flags);
/* wake up frozen array... */
- wake_up(&conf->wait_barrier);
+ wake_up_barrier(conf);
md_wakeup_thread(mddev->thread);
}
@@ -884,7 +890,7 @@ static void flush_pending_writes(struct r10conf *conf)
/* flush any pending bitmap writes to disk
* before proceeding w/ I/O */
md_bitmap_unplug(conf->mddev->bitmap);
- wake_up(&conf->wait_barrier);
+ wake_up_barrier(conf);
while (bio) { /* submit pending writes */
struct bio *next = bio->bi_next;
@@ -954,7 +960,7 @@ static void lower_barrier(struct r10conf *conf)
spin_lock_irqsave(&conf->resync_lock, flags);
conf->barrier--;
spin_unlock_irqrestore(&conf->resync_lock, flags);
- wake_up(&conf->wait_barrier);
+ wake_up_barrier(conf);
}
static bool stop_waiting_barrier(struct r10conf *conf)
@@ -1004,7 +1010,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
conf->nr_waiting--;
}
if (!conf->nr_waiting)
- wake_up(&conf->wait_barrier);
+ wake_up_barrier(conf);
}
/* Only increment nr_pending when we wait */
if (ret)
@@ -1017,7 +1023,7 @@ static void allow_barrier(struct r10conf *conf)
{
if ((atomic_dec_and_test(&conf->nr_pending)) ||
(conf->array_freeze_pending))
- wake_up(&conf->wait_barrier);
+ wake_up_barrier(conf);
}
static void freeze_array(struct r10conf *conf, int extra)
@@ -1053,7 +1059,7 @@ static void unfreeze_array(struct r10conf *conf)
spin_lock_irq(&conf->resync_lock);
conf->barrier--;
conf->nr_waiting--;
- wake_up(&conf->wait_barrier);
+ wake_up_barrier(conf);
spin_unlock_irq(&conf->resync_lock);
}
@@ -1078,7 +1084,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
spin_lock_irq(&conf->device_lock);
bio_list_merge(&conf->pending_bio_list, &plug->pending);
spin_unlock_irq(&conf->device_lock);
- wake_up(&conf->wait_barrier);
+ wake_up_barrier(conf);
md_wakeup_thread(mddev->thread);
kfree(plug);
return;
@@ -1087,7 +1093,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
/* we aren't scheduling, so we can do the write-out directly. */
bio = bio_list_get(&plug->pending);
md_bitmap_unplug(mddev->bitmap);
- wake_up(&conf->wait_barrier);
+ wake_up_barrier(conf);
while (bio) { /* submit pending writes */
struct bio *next = bio->bi_next;
@@ -1893,7 +1899,7 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
__make_request(mddev, bio, sectors);
/* In case raid10d snuck in to freeze_array */
- wake_up(&conf->wait_barrier);
+ wake_up_barrier(conf);
return true;
}
@@ -3040,7 +3046,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
* In case freeze_array() is waiting for condition
* nr_pending == nr_queued + extra to be true.
*/
- wake_up(&conf->wait_barrier);
+ wake_up_barrier(conf);
md_wakeup_thread(conf->mddev->thread);
} else {
if (test_bit(R10BIO_WriteError,
--
2.31.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v2 2/4] md/raid10: prevent unnecessary calls to wake_up() in fast path
2022-09-14 1:49 ` [PATCH v2 2/4] md/raid10: prevent unnecessary calls to wake_up() in fast path Yu Kuai
@ 2022-09-14 10:21 ` Yu Kuai
0 siblings, 0 replies; 12+ messages in thread
From: Yu Kuai @ 2022-09-14 10:21 UTC (permalink / raw)
To: Yu Kuai, song, logang, guoqing.jiang, pmenzel
Cc: linux-raid, linux-kernel, yi.zhang, yukuai (C)
在 2022/09/14 9:49, Yu Kuai 写道:
> From: Yu Kuai <yukuai3@huawei.com>
>
> Currently, wake_up() is called unconditionally in fast path such as
> raid10_make_request(), which will cause lock contention under high
> concurrency:
>
> raid10_make_request
> wake_up
> __wake_up_common_lock
> spin_lock_irqsave
>
> Improve performance by only call wake_up() if waitqueue is not empty.
>
Hi,
I'm replacing all the wake_up() here, currently I'm not quite sure it's
OK, "conf->wait_barrier" is used for many purpose.
Perhaps should I just replace host path here? (raid10_make_request
and allow_barrier().
Thanks,
Kuai
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
> drivers/md/raid10.c | 26 ++++++++++++++++----------
> 1 file changed, 16 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 56458a53043d..0edcd98461fe 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -274,6 +274,12 @@ static void put_buf(struct r10bio *r10_bio)
> lower_barrier(conf);
> }
>
> +static void wake_up_barrier(struct r10conf *conf)
> +{
> + if (wq_has_sleeper(&conf->wait_barrier))
> + wake_up(&conf->wait_barrier);
> +}
> +
> static void reschedule_retry(struct r10bio *r10_bio)
> {
> unsigned long flags;
> @@ -286,7 +292,7 @@ static void reschedule_retry(struct r10bio *r10_bio)
> spin_unlock_irqrestore(&conf->device_lock, flags);
>
> /* wake up frozen array... */
> - wake_up(&conf->wait_barrier);
> + wake_up_barrier(conf);
>
> md_wakeup_thread(mddev->thread);
> }
> @@ -884,7 +890,7 @@ static void flush_pending_writes(struct r10conf *conf)
> /* flush any pending bitmap writes to disk
> * before proceeding w/ I/O */
> md_bitmap_unplug(conf->mddev->bitmap);
> - wake_up(&conf->wait_barrier);
> + wake_up_barrier(conf);
>
> while (bio) { /* submit pending writes */
> struct bio *next = bio->bi_next;
> @@ -954,7 +960,7 @@ static void lower_barrier(struct r10conf *conf)
> spin_lock_irqsave(&conf->resync_lock, flags);
> conf->barrier--;
> spin_unlock_irqrestore(&conf->resync_lock, flags);
> - wake_up(&conf->wait_barrier);
> + wake_up_barrier(conf);
> }
>
> static bool stop_waiting_barrier(struct r10conf *conf)
> @@ -1004,7 +1010,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
> conf->nr_waiting--;
> }
> if (!conf->nr_waiting)
> - wake_up(&conf->wait_barrier);
> + wake_up_barrier(conf);
> }
> /* Only increment nr_pending when we wait */
> if (ret)
> @@ -1017,7 +1023,7 @@ static void allow_barrier(struct r10conf *conf)
> {
> if ((atomic_dec_and_test(&conf->nr_pending)) ||
> (conf->array_freeze_pending))
> - wake_up(&conf->wait_barrier);
> + wake_up_barrier(conf);
> }
>
> static void freeze_array(struct r10conf *conf, int extra)
> @@ -1053,7 +1059,7 @@ static void unfreeze_array(struct r10conf *conf)
> spin_lock_irq(&conf->resync_lock);
> conf->barrier--;
> conf->nr_waiting--;
> - wake_up(&conf->wait_barrier);
> + wake_up_barrier(conf);
> spin_unlock_irq(&conf->resync_lock);
> }
>
> @@ -1078,7 +1084,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
> spin_lock_irq(&conf->device_lock);
> bio_list_merge(&conf->pending_bio_list, &plug->pending);
> spin_unlock_irq(&conf->device_lock);
> - wake_up(&conf->wait_barrier);
> + wake_up_barrier(conf);
> md_wakeup_thread(mddev->thread);
> kfree(plug);
> return;
> @@ -1087,7 +1093,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
> /* we aren't scheduling, so we can do the write-out directly. */
> bio = bio_list_get(&plug->pending);
> md_bitmap_unplug(mddev->bitmap);
> - wake_up(&conf->wait_barrier);
> + wake_up_barrier(conf);
>
> while (bio) { /* submit pending writes */
> struct bio *next = bio->bi_next;
> @@ -1893,7 +1899,7 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
> __make_request(mddev, bio, sectors);
>
> /* In case raid10d snuck in to freeze_array */
> - wake_up(&conf->wait_barrier);
> + wake_up_barrier(conf);
> return true;
> }
>
> @@ -3040,7 +3046,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
> * In case freeze_array() is waiting for condition
> * nr_pending == nr_queued + extra to be true.
> */
> - wake_up(&conf->wait_barrier);
> + wake_up_barrier(conf);
> md_wakeup_thread(conf->mddev->thread);
> } else {
> if (test_bit(R10BIO_WriteError,
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v2 3/4] md/raid10: fix improper BUG_ON() in raise_barrier()
2022-09-14 1:49 [PATCH v2 0/4] md/raid10: reduce lock contention for io Yu Kuai
2022-09-14 1:49 ` [PATCH v2 1/4] md/raid10: cleanup wait_barrier() Yu Kuai
2022-09-14 1:49 ` [PATCH v2 2/4] md/raid10: prevent unnecessary calls to wake_up() in fast path Yu Kuai
@ 2022-09-14 1:49 ` Yu Kuai
2022-09-14 1:49 ` [PATCH v2 4/4] md/raid10: convert resync_lock to use seqlock Yu Kuai
3 siblings, 0 replies; 12+ messages in thread
From: Yu Kuai @ 2022-09-14 1:49 UTC (permalink / raw)
To: song, logang, guoqing.jiang, pmenzel
Cc: linux-raid, linux-kernel, yukuai3, yukuai1, yi.zhang
From: Yu Kuai <yukuai3@huawei.com>
'conf->barrier' is protected by 'conf->resync_lock', reading
'conf->barrier' without holding the lock is wrong.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
drivers/md/raid10.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 0edcd98461fe..377d4641bb54 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -936,8 +936,8 @@ static void flush_pending_writes(struct r10conf *conf)
static void raise_barrier(struct r10conf *conf, int force)
{
- BUG_ON(force && !conf->barrier);
spin_lock_irq(&conf->resync_lock);
+ BUG_ON(force && !conf->barrier);
/* Wait until no block IO is waiting (unless 'force') */
wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
--
2.31.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH v2 4/4] md/raid10: convert resync_lock to use seqlock
2022-09-14 1:49 [PATCH v2 0/4] md/raid10: reduce lock contention for io Yu Kuai
` (2 preceding siblings ...)
2022-09-14 1:49 ` [PATCH v2 3/4] md/raid10: fix improper BUG_ON() in raise_barrier() Yu Kuai
@ 2022-09-14 1:49 ` Yu Kuai
2022-09-14 10:26 ` Yu Kuai
3 siblings, 1 reply; 12+ messages in thread
From: Yu Kuai @ 2022-09-14 1:49 UTC (permalink / raw)
To: song, logang, guoqing.jiang, pmenzel
Cc: linux-raid, linux-kernel, yukuai3, yukuai1, yi.zhang
From: Yu Kuai <yukuai3@huawei.com>
Currently, wait_barrier() will hold 'resync_lock' to read 'conf->barrier',
and io can't be dispatched until 'barrier' is dropped.
Since holding the 'barrier' is not common, convert 'resync_lock' to use
seqlock so that holding lock can be avoided in fast path.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
drivers/md/raid10.c | 85 +++++++++++++++++++++++++++++----------------
drivers/md/raid10.h | 2 +-
2 files changed, 57 insertions(+), 30 deletions(-)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 377d4641bb54..6c2396fe75a0 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf);
#include "raid1-10.c"
+#define NULL_CMD
+#define cmd_before(conf, cmd) \
+ do { \
+ write_sequnlock_irq(&(conf)->resync_lock); \
+ cmd; \
+ } while (0)
+#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock)
+
+#define wait_event_barrier_cmd(conf, cond, cmd) \
+ wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf, cmd), \
+ cmd_after(conf))
+
+#define wait_event_barrier(conf, cond) \
+ wait_event_barrier_cmd(conf, cond, NULL_CMD)
+
/*
* for resync bio, r10bio pointer can be retrieved from the per-bio
* 'struct resync_pages'.
@@ -936,30 +951,29 @@ static void flush_pending_writes(struct r10conf *conf)
static void raise_barrier(struct r10conf *conf, int force)
{
- spin_lock_irq(&conf->resync_lock);
+ write_seqlock_irq(&conf->resync_lock);
BUG_ON(force && !conf->barrier);
/* Wait until no block IO is waiting (unless 'force') */
- wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
- conf->resync_lock);
+ wait_event_barrier(conf, force || !conf->nr_waiting);
/* block any new IO from starting */
- conf->barrier++;
+ WRITE_ONCE(conf->barrier, conf->barrier + 1);
/* Now wait for all pending IO to complete */
- wait_event_lock_irq(conf->wait_barrier,
- !atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH,
- conf->resync_lock);
+ wait_event_barrier(conf, !atomic_read(&conf->nr_pending) &&
+ conf->barrier < RESYNC_DEPTH);
- spin_unlock_irq(&conf->resync_lock);
+ write_sequnlock_irq(&conf->resync_lock);
}
static void lower_barrier(struct r10conf *conf)
{
unsigned long flags;
- spin_lock_irqsave(&conf->resync_lock, flags);
- conf->barrier--;
- spin_unlock_irqrestore(&conf->resync_lock, flags);
+
+ write_seqlock_irqsave(&conf->resync_lock, flags);
+ WRITE_ONCE(conf->barrier, conf->barrier - 1);
+ write_sequnlock_irqrestore(&conf->resync_lock, flags);
wake_up_barrier(conf);
}
@@ -992,11 +1006,29 @@ static bool stop_waiting_barrier(struct r10conf *conf)
return false;
}
+static bool wait_barrier_nolock(struct r10conf *conf)
+{
+ unsigned int seq = read_seqbegin(&conf->resync_lock);
+
+ if (READ_ONCE(conf->barrier))
+ return false;
+
+ atomic_inc(&conf->nr_pending);
+ if (!read_seqretry(&conf->resync_lock, seq))
+ return true;
+
+ atomic_dec(&conf->nr_pending);
+ return false;
+}
+
static bool wait_barrier(struct r10conf *conf, bool nowait)
{
bool ret = true;
- spin_lock_irq(&conf->resync_lock);
+ if (wait_barrier_nolock(conf))
+ return true;
+
+ write_seqlock_irq(&conf->resync_lock);
if (conf->barrier) {
/* Return false when nowait flag is set */
if (nowait) {
@@ -1004,9 +1036,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
} else {
conf->nr_waiting++;
raid10_log(conf->mddev, "wait barrier");
- wait_event_lock_irq(conf->wait_barrier,
- stop_waiting_barrier(conf),
- conf->resync_lock);
+ wait_event_barrier(conf, stop_waiting_barrier(conf));
conf->nr_waiting--;
}
if (!conf->nr_waiting)
@@ -1015,7 +1045,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
/* Only increment nr_pending when we wait */
if (ret)
atomic_inc(&conf->nr_pending);
- spin_unlock_irq(&conf->resync_lock);
+ write_sequnlock_irq(&conf->resync_lock);
return ret;
}
@@ -1040,27 +1070,24 @@ static void freeze_array(struct r10conf *conf, int extra)
* must match the number of pending IOs (nr_pending) before
* we continue.
*/
- spin_lock_irq(&conf->resync_lock);
+ write_seqlock_irq(&conf->resync_lock);
conf->array_freeze_pending++;
- conf->barrier++;
+ WRITE_ONCE(conf->barrier, conf->barrier + 1);
conf->nr_waiting++;
- wait_event_lock_irq_cmd(conf->wait_barrier,
- atomic_read(&conf->nr_pending) == conf->nr_queued+extra,
- conf->resync_lock,
- flush_pending_writes(conf));
-
+ wait_event_barrier_cmd(conf, atomic_read(&conf->nr_pending) ==
+ conf->nr_queued + extra, flush_pending_writes(conf));
conf->array_freeze_pending--;
- spin_unlock_irq(&conf->resync_lock);
+ write_sequnlock_irq(&conf->resync_lock);
}
static void unfreeze_array(struct r10conf *conf)
{
/* reverse the effect of the freeze */
- spin_lock_irq(&conf->resync_lock);
- conf->barrier--;
+ write_seqlock_irq(&conf->resync_lock);
+ WRITE_ONCE(conf->barrier, conf->barrier - 1);
conf->nr_waiting--;
wake_up_barrier(conf);
- spin_unlock_irq(&conf->resync_lock);
+ write_sequnlock_irq(&conf->resync_lock);
}
static sector_t choose_data_offset(struct r10bio *r10_bio,
@@ -4046,7 +4073,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
INIT_LIST_HEAD(&conf->retry_list);
INIT_LIST_HEAD(&conf->bio_end_io_list);
- spin_lock_init(&conf->resync_lock);
+ seqlock_init(&conf->resync_lock);
init_waitqueue_head(&conf->wait_barrier);
atomic_set(&conf->nr_pending, 0);
@@ -4365,7 +4392,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs)
rdev->new_raid_disk = rdev->raid_disk * 2;
rdev->sectors = size;
}
- conf->barrier = 1;
+ WRITE_ONCE(conf->barrier, 1);
}
return conf;
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 5c0804d8bb1f..8c072ce0bc54 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -76,7 +76,7 @@ struct r10conf {
/* queue pending writes and submit them on unplug */
struct bio_list pending_bio_list;
- spinlock_t resync_lock;
+ seqlock_t resync_lock;
atomic_t nr_pending;
int nr_waiting;
int nr_queued;
--
2.31.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v2 4/4] md/raid10: convert resync_lock to use seqlock
2022-09-14 1:49 ` [PATCH v2 4/4] md/raid10: convert resync_lock to use seqlock Yu Kuai
@ 2022-09-14 10:26 ` Yu Kuai
0 siblings, 0 replies; 12+ messages in thread
From: Yu Kuai @ 2022-09-14 10:26 UTC (permalink / raw)
To: Yu Kuai, song, logang, guoqing.jiang, pmenzel
Cc: linux-raid, linux-kernel, yi.zhang, yukuai (C)
Hi,
在 2022/09/14 9:49, Yu Kuai 写道:
> From: Yu Kuai <yukuai3@huawei.com>
>
> Currently, wait_barrier() will hold 'resync_lock' to read 'conf->barrier',
> and io can't be dispatched until 'barrier' is dropped.
>
> Since holding the 'barrier' is not common, convert 'resync_lock' to use
> seqlock so that holding lock can be avoided in fast path.
>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
> drivers/md/raid10.c | 85 +++++++++++++++++++++++++++++----------------
> drivers/md/raid10.h | 2 +-
> 2 files changed, 57 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 377d4641bb54..6c2396fe75a0 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf);
>
> #include "raid1-10.c"
>
> +#define NULL_CMD
> +#define cmd_before(conf, cmd) \
> + do { \
> + write_sequnlock_irq(&(conf)->resync_lock); \
> + cmd; \
> + } while (0)
> +#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock)
> +
> +#define wait_event_barrier_cmd(conf, cond, cmd) \
> + wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf, cmd), \
> + cmd_after(conf))
> +
> +#define wait_event_barrier(conf, cond) \
> + wait_event_barrier_cmd(conf, cond, NULL_CMD)
> +
> /*
> * for resync bio, r10bio pointer can be retrieved from the per-bio
> * 'struct resync_pages'.
> @@ -936,30 +951,29 @@ static void flush_pending_writes(struct r10conf *conf)
>
> static void raise_barrier(struct r10conf *conf, int force)
> {
> - spin_lock_irq(&conf->resync_lock);
> + write_seqlock_irq(&conf->resync_lock);
> BUG_ON(force && !conf->barrier);
>
> /* Wait until no block IO is waiting (unless 'force') */
> - wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
> - conf->resync_lock);
> + wait_event_barrier(conf, force || !conf->nr_waiting);
>
> /* block any new IO from starting */
> - conf->barrier++;
> + WRITE_ONCE(conf->barrier, conf->barrier + 1);
>
> /* Now wait for all pending IO to complete */
> - wait_event_lock_irq(conf->wait_barrier,
> - !atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH,
> - conf->resync_lock);
> + wait_event_barrier(conf, !atomic_read(&conf->nr_pending) &&
> + conf->barrier < RESYNC_DEPTH);
>
> - spin_unlock_irq(&conf->resync_lock);
> + write_sequnlock_irq(&conf->resync_lock);
> }
>
> static void lower_barrier(struct r10conf *conf)
> {
> unsigned long flags;
> - spin_lock_irqsave(&conf->resync_lock, flags);
> - conf->barrier--;
> - spin_unlock_irqrestore(&conf->resync_lock, flags);
> +
> + write_seqlock_irqsave(&conf->resync_lock, flags);
> + WRITE_ONCE(conf->barrier, conf->barrier - 1);
> + write_sequnlock_irqrestore(&conf->resync_lock, flags);
> wake_up_barrier(conf);
> }
>
> @@ -992,11 +1006,29 @@ static bool stop_waiting_barrier(struct r10conf *conf)
> return false;
> }
>
> +static bool wait_barrier_nolock(struct r10conf *conf)
> +{
> + unsigned int seq = read_seqbegin(&conf->resync_lock);
> +
> + if (READ_ONCE(conf->barrier))
> + return false;
> +
> + atomic_inc(&conf->nr_pending);
> + if (!read_seqretry(&conf->resync_lock, seq))
> + return true;
> +
> + atomic_dec(&conf->nr_pending);
During pressure test, I found that this is problematic, raise_barrier()
can wait for nr_pending to be zero, and the increase and decrease here
will cause raise_barrier() hang if nr_pending is decreased to 0 here.
I'll send to new version to fix this.
Thanks,
Kuai
> + return false;
> +}
> +
> static bool wait_barrier(struct r10conf *conf, bool nowait)
> {
> bool ret = true;
>
> - spin_lock_irq(&conf->resync_lock);
> + if (wait_barrier_nolock(conf))
> + return true;
> +
> + write_seqlock_irq(&conf->resync_lock);
> if (conf->barrier) {
> /* Return false when nowait flag is set */
> if (nowait) {
> @@ -1004,9 +1036,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
> } else {
> conf->nr_waiting++;
> raid10_log(conf->mddev, "wait barrier");
> - wait_event_lock_irq(conf->wait_barrier,
> - stop_waiting_barrier(conf),
> - conf->resync_lock);
> + wait_event_barrier(conf, stop_waiting_barrier(conf));
> conf->nr_waiting--;
> }
> if (!conf->nr_waiting)
> @@ -1015,7 +1045,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
> /* Only increment nr_pending when we wait */
> if (ret)
> atomic_inc(&conf->nr_pending);
> - spin_unlock_irq(&conf->resync_lock);
> + write_sequnlock_irq(&conf->resync_lock);
> return ret;
> }
>
> @@ -1040,27 +1070,24 @@ static void freeze_array(struct r10conf *conf, int extra)
> * must match the number of pending IOs (nr_pending) before
> * we continue.
> */
> - spin_lock_irq(&conf->resync_lock);
> + write_seqlock_irq(&conf->resync_lock);
> conf->array_freeze_pending++;
> - conf->barrier++;
> + WRITE_ONCE(conf->barrier, conf->barrier + 1);
> conf->nr_waiting++;
> - wait_event_lock_irq_cmd(conf->wait_barrier,
> - atomic_read(&conf->nr_pending) == conf->nr_queued+extra,
> - conf->resync_lock,
> - flush_pending_writes(conf));
> -
> + wait_event_barrier_cmd(conf, atomic_read(&conf->nr_pending) ==
> + conf->nr_queued + extra, flush_pending_writes(conf));
> conf->array_freeze_pending--;
> - spin_unlock_irq(&conf->resync_lock);
> + write_sequnlock_irq(&conf->resync_lock);
> }
>
> static void unfreeze_array(struct r10conf *conf)
> {
> /* reverse the effect of the freeze */
> - spin_lock_irq(&conf->resync_lock);
> - conf->barrier--;
> + write_seqlock_irq(&conf->resync_lock);
> + WRITE_ONCE(conf->barrier, conf->barrier - 1);
> conf->nr_waiting--;
> wake_up_barrier(conf);
> - spin_unlock_irq(&conf->resync_lock);
> + write_sequnlock_irq(&conf->resync_lock);
> }
>
> static sector_t choose_data_offset(struct r10bio *r10_bio,
> @@ -4046,7 +4073,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
> INIT_LIST_HEAD(&conf->retry_list);
> INIT_LIST_HEAD(&conf->bio_end_io_list);
>
> - spin_lock_init(&conf->resync_lock);
> + seqlock_init(&conf->resync_lock);
> init_waitqueue_head(&conf->wait_barrier);
> atomic_set(&conf->nr_pending, 0);
>
> @@ -4365,7 +4392,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs)
> rdev->new_raid_disk = rdev->raid_disk * 2;
> rdev->sectors = size;
> }
> - conf->barrier = 1;
> + WRITE_ONCE(conf->barrier, 1);
> }
>
> return conf;
> diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
> index 5c0804d8bb1f..8c072ce0bc54 100644
> --- a/drivers/md/raid10.h
> +++ b/drivers/md/raid10.h
> @@ -76,7 +76,7 @@ struct r10conf {
> /* queue pending writes and submit them on unplug */
> struct bio_list pending_bio_list;
>
> - spinlock_t resync_lock;
> + seqlock_t resync_lock;
> atomic_t nr_pending;
> int nr_waiting;
> int nr_queued;
>
^ permalink raw reply [flat|nested] 12+ messages in thread