From: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
To: linux-block@vger.kernel.org
Cc: axboe@kernel.dk, jack@suse.cz
Subject: Re: [PATCH] blk-throttle: limit bios to fix amount of pages entering writeback prematurely
Date: Thu, 31 Jan 2019 10:03:34 +0800 [thread overview]
Message-ID: <9d722c67-dfae-2ee4-74ef-504164fed0bf@linux.alibaba.com> (raw)
In-Reply-To: <20181228115148.26474-1-xiaoguang.wang@linux.alibaba.com>
hi,
> Currently in blk_throtl_bio(), if one bio exceeds its throtl_grp's bps
> or iops limit, this bio will be queued throtl_grp's throtl_service_queue,
> then obviously mm subsys will submit more pages, even underlying device
> can not handle these io requests, also this will make large amount of pages
> entering writeback prematurely, later if some process writes some of these
> pages, it will wait for long time.
>
> I have done some tests: one process does buffered writes on a 1GB file,
> and make this process's blkcg max bps limit be 10MB/s, I observe this:
> #cat /proc/meminfo | grep -i back
> Writeback: 900024 kB
> WritebackTmp: 0 kB
>
> I think this Writeback value is just too big, indeed many bios have been
> queued in throtl_grp's throtl_service_queue, if one process try to write
> the last bio's page in this queue, it will call wait_on_page_writeback(page),
> which must wait the previous bios to finish and will take long time, we
> have also see 120s hung task warning in our server.
>
> To fix this issue, we can simply limit throtl_service_queue's max queued
> bios, currently we limit it to throtl_grp's bps_limit or iops limit, if it
> still exteeds, we just sleep for a while.
Ping :)
The fix method in this patch is not good, I had written a new patch that
uses wait queue, but do you think this is a blk-throttle design issue and
needs fixing? thanks.
Regards,
Xiaoguang Wang
>
> Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
> ---
> block/blk-throttle.c | 15 ++++++++++++++-
> include/linux/blk-cgroup.h | 15 ++++++++++++---
> 2 files changed, 26 insertions(+), 4 deletions(-)
>
> diff --git a/block/blk-throttle.c b/block/blk-throttle.c
> index 627c40f89827..59fe58c3c411 100644
> --- a/block/blk-throttle.c
> +++ b/block/blk-throttle.c
> @@ -81,6 +81,7 @@ struct throtl_service_queue {
> */
> struct list_head queued[2]; /* throtl_qnode [READ/WRITE] */
> unsigned int nr_queued[2]; /* number of queued bios */
> + unsigned int nr_queued_bytes[2]; /* number of queued bytes */
>
> /*
> * RB tree of active children throtl_grp's, which are sorted by
> @@ -1251,6 +1252,7 @@ static void throtl_add_bio_tg(struct bio *bio, struct throtl_qnode *qn,
> throtl_qnode_add_bio(bio, qn, &sq->queued[rw]);
>
> sq->nr_queued[rw]++;
> + sq->nr_queued_bytes[rw] += throtl_bio_data_size(bio);
> blkg_rwstat_add(&tg->total_bytes_queued, bio_op(bio), bio->bi_opf,
> throtl_bio_data_size(bio));
> blkg_rwstat_add(&tg->total_io_queued, bio_op(bio), bio->bi_opf, 1);
> @@ -1307,6 +1309,7 @@ static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw)
> */
> bio = throtl_pop_queued(&sq->queued[rw], &tg_to_put);
> sq->nr_queued[rw]--;
> + sq->nr_queued_bytes[rw] -= throtl_bio_data_size(bio);
>
> throtl_charge_bio(tg, bio);
>
> @@ -2563,7 +2566,7 @@ static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
> }
>
> bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
> - struct bio *bio)
> + struct bio *bio, unsigned long *wait)
> {
> struct throtl_qnode *qn = NULL;
> struct throtl_grp *orig_tg = blkg_to_tg(blkg ?: q->root_blkg);
> @@ -2572,6 +2575,8 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
> bool rw = bio_data_dir(bio);
> bool throttled = false;
> struct throtl_data *td = tg->td;
> + u64 bps_limit;
> + unsigned int iops_limit;
>
> WARN_ON_ONCE(!rcu_read_lock_held());
>
> @@ -2654,6 +2659,14 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
> sq->nr_queued[READ], sq->nr_queued[WRITE]);
>
> td->nr_queued[rw]++;
> + bps_limit = tg_bps_limit(tg, rw);
> + if (bps_limit != U64_MAX && sq->nr_queued_bytes[rw] > bps_limit)
> + *wait = HZ * 90 / 100;
> +
> + iops_limit = tg_iops_limit(tg, rw);
> + if (iops_limit != UINT_MAX && sq->nr_queued[rw] > iops_limit)
> + *wait = HZ * 90 / 100;
> +
> throtl_add_bio_tg(bio, qn, tg);
> throttled = true;
>
> diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
> index 2634f3696ac2..dc2174330066 100644
> --- a/include/linux/blk-cgroup.h
> +++ b/include/linux/blk-cgroup.h
> @@ -721,10 +721,13 @@ static inline void blkg_update_meta_stats(struct blkcg_gq *blkg)
>
> #ifdef CONFIG_BLK_DEV_THROTTLING
> extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
> - struct bio *bio);
> + struct bio *bio, unsigned long *wait);
> #else
> static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
> - struct bio *bio) { return false; }
> + struct bio *bio, unsigned long *wait)
> +{
> + return false;
> +}
> #endif
>
> static inline bool blkcg_bio_issue_check(struct request_queue *q,
> @@ -733,6 +736,7 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
> struct blkcg *blkcg;
> struct blkcg_gq *blkg;
> bool throtl = false;
> + unsigned long wait = 0;
>
> rcu_read_lock();
> blkcg = bio_blkcg(bio);
> @@ -742,7 +746,7 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
> if (unlikely(IS_ERR(blkg)))
> blkg = NULL;
>
> - throtl = blk_throtl_bio(q, blkg, bio);
> + throtl = blk_throtl_bio(q, blkg, bio, &wait);
> spin_unlock_irq(q->queue_lock);
>
> if (!throtl && !bio_flagged(bio, BIO_THROTL_COUNTED)) {
> @@ -754,6 +758,11 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
> }
>
> rcu_read_unlock();
> + if (wait > 0) {
> + __set_current_state(TASK_KILLABLE);
> + io_schedule_timeout(wait);
> + }
> +
> return !throtl;
> }
>
>
next prev parent reply other threads:[~2019-01-31 2:03 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-12-28 11:51 [PATCH] blk-throttle: limit bios to fix amount of pages entering writeback prematurely Xiaoguang Wang
2019-01-31 2:03 ` Xiaoguang Wang [this message]
2019-01-31 9:26 ` Jan Kara
2019-01-31 13:48 ` Jens Axboe
2019-01-31 21:20 ` Liu Bo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=9d722c67-dfae-2ee4-74ef-504164fed0bf@linux.alibaba.com \
--to=xiaoguang.wang@linux.alibaba.com \
--cc=axboe@kernel.dk \
--cc=jack@suse.cz \
--cc=linux-block@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).