linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jianchao Wang <jianchao.w.wang@oracle.com>
To: axboe@kernel.dk
Cc: linux-block@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH] blk-wbt: get back the missed wakeup from __wbt_done
Date: Thu, 23 Aug 2018 21:08:38 +0800	[thread overview]
Message-ID: <1535029718-17259-1-git-send-email-jianchao.w.wang@oracle.com> (raw)

2887e41 (blk-wbt: Avoid lock contention and thundering herd
issue in wbt_wait) introduces two cases that could miss wakeup:
 - __wbt_done only wakes up one waiter one time. There could be
   multiple waiters and (limit - inflight) > 1 at the moment.

 - When the waiter is waked up, it is still on wait queue and set
   to TASK_UNINTERRUPTIBLE immediately, so this waiter could be
   waked up one more time. If a __wbt_done comes and wakes up
   again, the prevous waiter may waste a wakeup.

To fix them and avoid to introduce too much lock contention, we
introduce our own wake up func wbt_wake_function in __wbt_wait and
use wake_up_all in __wbt_done. wbt_wake_function will try to get
wbt budget firstly, if sucesses, wake up the process, otherwise,
return -1 to interrupt the wake up loop.

Signed-off-by: Jianchao Wang <jianchao.w.wang@oracle.com>
Fixes: 2887e41 (blk-wbt: Avoid lock contention and thundering herd issue in wbt_wait)
Cc: Anchal Agarwal <anchalag@amazon.com>
Cc: Frank van der Linden <fllinden@amazon.com>
---
 block/blk-wbt.c | 78 +++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 54 insertions(+), 24 deletions(-)

diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index c9358f1..2667590 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -166,7 +166,7 @@ static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
 		int diff = limit - inflight;
 
 		if (!inflight || diff >= rwb->wb_background / 2)
-			wake_up(&rqw->wait);
+			wake_up_all(&rqw->wait);
 	}
 }
 
@@ -481,6 +481,40 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
 	return limit;
 }
 
+struct wbt_wait_data {
+	struct task_struct *curr;
+	struct rq_wb *rwb;
+	struct rq_wait *rqw;
+	unsigned long rw;
+};
+
+static int wbt_wake_function(wait_queue_entry_t *curr, unsigned int mode,
+		int wake_flags, void *key)
+{
+	struct wbt_wait_data *data = curr->private;
+
+	/*
+	 * If fail to get budget, return -1 to interrupt the wake up
+	 * loop in __wake_up_common.
+	 */
+	if (!rq_wait_inc_below(data->rqw, get_limit(data->rwb, data->rw)))
+		return -1;
+
+	wake_up_process(data->curr);
+
+	list_del_init(&curr->entry);
+	return 1;
+}
+
+static inline void wbt_init_wait(struct wait_queue_entry *wait,
+		struct wbt_wait_data *data)
+{
+	INIT_LIST_HEAD(&wait->entry);
+	wait->flags = 0;
+	wait->func = wbt_wake_function;
+	wait->private = data;
+}
+
 /*
  * Block if we will exceed our limit, or if we are currently waiting for
  * the timer to kick off queuing again.
@@ -491,31 +525,27 @@ static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
 	__acquires(lock)
 {
 	struct rq_wait *rqw = get_rq_wait(rwb, wb_acct);
-	DECLARE_WAITQUEUE(wait, current);
-	bool has_sleeper;
-
-	has_sleeper = wq_has_sleeper(&rqw->wait);
-	if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw)))
+	struct wait_queue_entry wait;
+	struct wbt_wait_data data = {
+		.curr = current,
+		.rwb = rwb,
+		.rqw = rqw,
+		.rw = rw,
+	};
+
+	if (!wq_has_sleeper(&rqw->wait) &&
+			rq_wait_inc_below(rqw, get_limit(rwb, rw)))
 		return;
 
-	add_wait_queue_exclusive(&rqw->wait, &wait);
-	do {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-
-		if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw)))
-			break;
-
-		if (lock) {
-			spin_unlock_irq(lock);
-			io_schedule();
-			spin_lock_irq(lock);
-		} else
-			io_schedule();
-		has_sleeper = false;
-	} while (1);
-
-	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(&rqw->wait, &wait);
+	wbt_init_wait(&wait, &data);
+	prepare_to_wait_exclusive(&rqw->wait, &wait,
+			TASK_UNINTERRUPTIBLE);
+	if (lock) {
+		spin_unlock_irq(lock);
+		io_schedule();
+		spin_lock_irq(lock);
+	} else
+		io_schedule();
 }
 
 static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
-- 
2.7.4


             reply	other threads:[~2018-08-23 13:08 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-08-23 13:08 Jianchao Wang [this message]
2018-08-23 15:37 ` [PATCH] blk-wbt: get back the missed wakeup from __wbt_done Jens Axboe
2018-08-23 16:24   ` van der Linden, Frank
     [not found]     ` <20180823210144.GB5624@kaos-source-ops-60001.pdx1.amazon.com>
2018-08-23 23:03       ` Jens Axboe
2018-08-23 23:14         ` Jens Axboe
2018-08-24  5:55           ` jianchao.wang
2018-08-24 16:40             ` van der Linden, Frank
2018-08-24 16:44               ` Jens Axboe
     [not found]         ` <20180824181223.GA9049@kaos-source-ops-60001.pdx1.amazon.com>
2018-08-24 18:50           ` Jens Axboe
     [not found]             ` <20180824203305.GA4690@kaos-source-ops-60001.pdx1.amazon.com>
2018-08-24 20:41               ` Jens Axboe
2018-08-25 15:41                 ` Jens Axboe
2018-08-27  3:52                   ` jianchao.wang
2018-08-27  6:15                     ` jianchao.wang
2018-08-27 14:51                       ` Jens Axboe
2018-08-28  2:52                         ` jianchao.wang
2018-08-27 15:37                     ` Jens Axboe
2018-08-23 15:42 ` Jens Axboe
2018-08-24  2:06   ` jianchao.wang
2018-08-24 14:40     ` Jens Axboe
2018-08-24 14:58       ` Jens Axboe
2018-08-24 17:14         ` Eduardo Valentin
2018-08-24 17:17           ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1535029718-17259-1-git-send-email-jianchao.w.wang@oracle.com \
    --to=jianchao.w.wang@oracle.com \
    --cc=axboe@kernel.dk \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).