From: Jianchao Wang <jianchao.w.wang@oracle.com>
To: axboe@kernel.dk
Cc: linux-block@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH] blk-wbt: get back the missed wakeup from __wbt_done
Date: Thu, 23 Aug 2018 21:08:38 +0800 [thread overview]
Message-ID: <1535029718-17259-1-git-send-email-jianchao.w.wang@oracle.com> (raw)
2887e41 (blk-wbt: Avoid lock contention and thundering herd
issue in wbt_wait) introduces two cases that could miss wakeup:
- __wbt_done only wakes up one waiter one time. There could be
multiple waiters and (limit - inflight) > 1 at the moment.
- When the waiter is waked up, it is still on wait queue and set
to TASK_UNINTERRUPTIBLE immediately, so this waiter could be
waked up one more time. If a __wbt_done comes and wakes up
again, the prevous waiter may waste a wakeup.
To fix them and avoid to introduce too much lock contention, we
introduce our own wake up func wbt_wake_function in __wbt_wait and
use wake_up_all in __wbt_done. wbt_wake_function will try to get
wbt budget firstly, if sucesses, wake up the process, otherwise,
return -1 to interrupt the wake up loop.
Signed-off-by: Jianchao Wang <jianchao.w.wang@oracle.com>
Fixes: 2887e41 (blk-wbt: Avoid lock contention and thundering herd issue in wbt_wait)
Cc: Anchal Agarwal <anchalag@amazon.com>
Cc: Frank van der Linden <fllinden@amazon.com>
---
block/blk-wbt.c | 78 +++++++++++++++++++++++++++++++++++++++------------------
1 file changed, 54 insertions(+), 24 deletions(-)
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index c9358f1..2667590 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -166,7 +166,7 @@ static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
int diff = limit - inflight;
if (!inflight || diff >= rwb->wb_background / 2)
- wake_up(&rqw->wait);
+ wake_up_all(&rqw->wait);
}
}
@@ -481,6 +481,40 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
return limit;
}
+struct wbt_wait_data {
+ struct task_struct *curr;
+ struct rq_wb *rwb;
+ struct rq_wait *rqw;
+ unsigned long rw;
+};
+
+static int wbt_wake_function(wait_queue_entry_t *curr, unsigned int mode,
+ int wake_flags, void *key)
+{
+ struct wbt_wait_data *data = curr->private;
+
+ /*
+ * If fail to get budget, return -1 to interrupt the wake up
+ * loop in __wake_up_common.
+ */
+ if (!rq_wait_inc_below(data->rqw, get_limit(data->rwb, data->rw)))
+ return -1;
+
+ wake_up_process(data->curr);
+
+ list_del_init(&curr->entry);
+ return 1;
+}
+
+static inline void wbt_init_wait(struct wait_queue_entry *wait,
+ struct wbt_wait_data *data)
+{
+ INIT_LIST_HEAD(&wait->entry);
+ wait->flags = 0;
+ wait->func = wbt_wake_function;
+ wait->private = data;
+}
+
/*
* Block if we will exceed our limit, or if we are currently waiting for
* the timer to kick off queuing again.
@@ -491,31 +525,27 @@ static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
__acquires(lock)
{
struct rq_wait *rqw = get_rq_wait(rwb, wb_acct);
- DECLARE_WAITQUEUE(wait, current);
- bool has_sleeper;
-
- has_sleeper = wq_has_sleeper(&rqw->wait);
- if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw)))
+ struct wait_queue_entry wait;
+ struct wbt_wait_data data = {
+ .curr = current,
+ .rwb = rwb,
+ .rqw = rqw,
+ .rw = rw,
+ };
+
+ if (!wq_has_sleeper(&rqw->wait) &&
+ rq_wait_inc_below(rqw, get_limit(rwb, rw)))
return;
- add_wait_queue_exclusive(&rqw->wait, &wait);
- do {
- set_current_state(TASK_UNINTERRUPTIBLE);
-
- if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw)))
- break;
-
- if (lock) {
- spin_unlock_irq(lock);
- io_schedule();
- spin_lock_irq(lock);
- } else
- io_schedule();
- has_sleeper = false;
- } while (1);
-
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&rqw->wait, &wait);
+ wbt_init_wait(&wait, &data);
+ prepare_to_wait_exclusive(&rqw->wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ if (lock) {
+ spin_unlock_irq(lock);
+ io_schedule();
+ spin_lock_irq(lock);
+ } else
+ io_schedule();
}
static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
--
2.7.4
next reply other threads:[~2018-08-23 13:08 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-08-23 13:08 Jianchao Wang [this message]
2018-08-23 15:37 ` [PATCH] blk-wbt: get back the missed wakeup from __wbt_done Jens Axboe
2018-08-23 16:24 ` van der Linden, Frank
[not found] ` <20180823210144.GB5624@kaos-source-ops-60001.pdx1.amazon.com>
2018-08-23 23:03 ` Jens Axboe
2018-08-23 23:14 ` Jens Axboe
2018-08-24 5:55 ` jianchao.wang
2018-08-24 16:40 ` van der Linden, Frank
2018-08-24 16:44 ` Jens Axboe
[not found] ` <20180824181223.GA9049@kaos-source-ops-60001.pdx1.amazon.com>
2018-08-24 18:50 ` Jens Axboe
[not found] ` <20180824203305.GA4690@kaos-source-ops-60001.pdx1.amazon.com>
2018-08-24 20:41 ` Jens Axboe
2018-08-25 15:41 ` Jens Axboe
2018-08-27 3:52 ` jianchao.wang
2018-08-27 6:15 ` jianchao.wang
2018-08-27 14:51 ` Jens Axboe
2018-08-28 2:52 ` jianchao.wang
2018-08-27 15:37 ` Jens Axboe
2018-08-23 15:42 ` Jens Axboe
2018-08-24 2:06 ` jianchao.wang
2018-08-24 14:40 ` Jens Axboe
2018-08-24 14:58 ` Jens Axboe
2018-08-24 17:14 ` Eduardo Valentin
2018-08-24 17:17 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1535029718-17259-1-git-send-email-jianchao.w.wang@oracle.com \
--to=jianchao.w.wang@oracle.com \
--cc=axboe@kernel.dk \
--cc=linux-block@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).