linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/3] iolatency bug fix
@ 2019-01-25  0:12 Liu Bo
  2019-01-25  0:12 ` [PATCH v2 1/3] blk-iolatency: fix IO hang due to negative inflight counter Liu Bo
                   ` (4 more replies)
  0 siblings, 5 replies; 8+ messages in thread
From: Liu Bo @ 2019-01-25  0:12 UTC (permalink / raw)
  To: linux-block; +Cc: Josef Bacik, Jens Axboe

This set is to fix a hang case if blk-iolatency is in use.

The 1st patch explains the hang problem with more details and provides
a fix.
The 2nd patch adds a warning for unexpected inflight counter.
The 3rd patch is a cleanup of blk_mq_freeze_queue.

v2: reorder the actual fix to the first patch.

Liu Bo (3):
  blk-iolatency: fix IO hang due to negative inflight counter
  Blk-iolatency: warn on negative inflight IO counter
  blk-mq: remove duplicated definition of blk_mq_freeze_queue

 block/blk-iolatency.c | 56 ++++++++++++++++++++++++++++++++++++-------
 block/blk-mq.h        |  1 -
 2 files changed, 48 insertions(+), 9 deletions(-)

-- 
2.20.1.2.gb21ebb6


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2 1/3] blk-iolatency: fix IO hang due to negative inflight counter
  2019-01-25  0:12 [PATCH v2 0/3] iolatency bug fix Liu Bo
@ 2019-01-25  0:12 ` Liu Bo
  2019-01-25  8:30   ` Johannes Thumshirn
  2019-01-25  0:12 ` [PATCH v2 2/3] Blk-iolatency: warn on negative inflight IO counter Liu Bo
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 8+ messages in thread
From: Liu Bo @ 2019-01-25  0:12 UTC (permalink / raw)
  To: linux-block; +Cc: Josef Bacik, Jens Axboe

Our test reported the following stack, and vmcore showed that
->inflight counter is -1.

[ffffc9003fcc38d0] __schedule at ffffffff8173d95d
[ffffc9003fcc3958] schedule at ffffffff8173de26
[ffffc9003fcc3970] io_schedule at ffffffff810bb6b6
[ffffc9003fcc3988] blkcg_iolatency_throttle at ffffffff813911cb
[ffffc9003fcc3a20] rq_qos_throttle at ffffffff813847f3
[ffffc9003fcc3a48] blk_mq_make_request at ffffffff8137468a
[ffffc9003fcc3b08] generic_make_request at ffffffff81368b49
[ffffc9003fcc3b68] submit_bio at ffffffff81368d7d
[ffffc9003fcc3bb8] ext4_io_submit at ffffffffa031be00 [ext4]
[ffffc9003fcc3c00] ext4_writepages at ffffffffa03163de [ext4]
[ffffc9003fcc3d68] do_writepages at ffffffff811c49ae
[ffffc9003fcc3d78] __filemap_fdatawrite_range at ffffffff811b6188
[ffffc9003fcc3e30] filemap_write_and_wait_range at ffffffff811b6301
[ffffc9003fcc3e60] ext4_sync_file at ffffffffa030cee8 [ext4]
[ffffc9003fcc3ea8] vfs_fsync_range at ffffffff8128594b
[ffffc9003fcc3ee8] do_fsync at ffffffff81285abd
[ffffc9003fcc3f18] sys_fsync at ffffffff81285d50
[ffffc9003fcc3f28] do_syscall_64 at ffffffff81003c04
[ffffc9003fcc3f50] entry_SYSCALL_64_after_swapgs at ffffffff81742b8e

The ->inflight counter may be negative (-1) if

1) blk-iolatency was disabled when the IO was issued,

2) blk-iolatency was enabled before this IO reached its endio,

3) the ->inflight counter is decreased from 0 to -1 in endio()

In fact the hang can be easily reproduced by the below script,

H=/sys/fs/cgroup/unified/
P=/sys/fs/cgroup/unified/test

echo "+io" > $H/cgroup.subtree_control
mkdir -p $P

echo $$ > $P/cgroup.procs

xfs_io -f -d -c "pwrite 0 4k" /dev/sdg

echo "`cat /sys/block/sdg/dev` target=1000000" > $P/io.latency

xfs_io -f -d -c "pwrite 0 4k" /dev/sdg

This fixes the problem by freezing the queue so that while
enabling/disabling iolatency, there is no inflight rq running.

Note that quiesce_queue is not needed as this only updating iolatency
configuration about which dispatching request_queue doesn't care.

Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
---
 block/blk-iolatency.c | 52 +++++++++++++++++++++++++++++++++++++------
 1 file changed, 45 insertions(+), 7 deletions(-)

diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index fc714ef402a6..1893686a9c1f 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -72,6 +72,7 @@
 #include <linux/sched/loadavg.h>
 #include <linux/sched/signal.h>
 #include <trace/events/block.h>
+#include <linux/blk-mq.h>
 #include "blk-rq-qos.h"
 #include "blk-stat.h"
 
@@ -601,6 +602,9 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
 		return;
 
 	enabled = blk_iolatency_enabled(iolat->blkiolat);
+	if (!enabled)
+		return;
+
 	while (blkg && blkg->parent) {
 		iolat = blkg_to_lat(blkg);
 		if (!iolat) {
@@ -610,7 +614,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
 		rqw = &iolat->rq_wait;
 
 		atomic_dec(&rqw->inflight);
-		if (!enabled || iolat->min_lat_nsec == 0)
+		if (iolat->min_lat_nsec == 0)
 			goto next;
 		iolatency_record_time(iolat, &bio->bi_issue, now,
 				      issue_as_root);
@@ -754,10 +758,13 @@ int blk_iolatency_init(struct request_queue *q)
 	return 0;
 }
 
-static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
+/*
+ * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise
+ * return 0.
+ */
+static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
 {
 	struct iolatency_grp *iolat = blkg_to_lat(blkg);
-	struct blk_iolatency *blkiolat = iolat->blkiolat;
 	u64 oldval = iolat->min_lat_nsec;
 
 	iolat->min_lat_nsec = val;
@@ -766,9 +773,10 @@ static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
 				    BLKIOLATENCY_MAX_WIN_SIZE);
 
 	if (!oldval && val)
-		atomic_inc(&blkiolat->enabled);
+		return 1;
 	if (oldval && !val)
-		atomic_dec(&blkiolat->enabled);
+		return -1;
+	return 0;
 }
 
 static void iolatency_clear_scaling(struct blkcg_gq *blkg)
@@ -800,6 +808,7 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
 	u64 lat_val = 0;
 	u64 oldval;
 	int ret;
+	int enable = 0;
 
 	ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
 	if (ret)
@@ -834,7 +843,12 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
 	blkg = ctx.blkg;
 	oldval = iolat->min_lat_nsec;
 
-	iolatency_set_min_lat_nsec(blkg, lat_val);
+	enable = iolatency_set_min_lat_nsec(blkg, lat_val);
+	if (enable) {
+		WARN_ON_ONCE(!blk_get_queue(blkg->q));
+		blkg_get(blkg);
+	}
+
 	if (oldval != iolat->min_lat_nsec) {
 		iolatency_clear_scaling(blkg);
 	}
@@ -842,6 +856,24 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
 	ret = 0;
 out:
 	blkg_conf_finish(&ctx);
+	if (ret == 0 && enable) {
+		struct iolatency_grp *tmp = blkg_to_lat(blkg);
+		struct blk_iolatency *blkiolat = tmp->blkiolat;
+
+		blk_mq_freeze_queue(blkg->q);
+
+		if (enable == 1)
+			atomic_inc(&blkiolat->enabled);
+		else if (enable == -1)
+			atomic_dec(&blkiolat->enabled);
+		else
+			WARN_ON_ONCE(1);
+
+		blk_mq_unfreeze_queue(blkg->q);
+
+		blkg_put(blkg);
+		blk_put_queue(blkg->q);
+	}
 	return ret ?: nbytes;
 }
 
@@ -977,8 +1009,14 @@ static void iolatency_pd_offline(struct blkg_policy_data *pd)
 {
 	struct iolatency_grp *iolat = pd_to_lat(pd);
 	struct blkcg_gq *blkg = lat_to_blkg(iolat);
+	struct blk_iolatency *blkiolat = iolat->blkiolat;
+	int ret;
 
-	iolatency_set_min_lat_nsec(blkg, 0);
+	ret = iolatency_set_min_lat_nsec(blkg, 0);
+	if (ret == 1)
+		atomic_inc(&blkiolat->enabled);
+	if (ret == -1)
+		atomic_dec(&blkiolat->enabled);
 	iolatency_clear_scaling(blkg);
 }
 
-- 
2.20.1.2.gb21ebb6


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 2/3] Blk-iolatency: warn on negative inflight IO counter
  2019-01-25  0:12 [PATCH v2 0/3] iolatency bug fix Liu Bo
  2019-01-25  0:12 ` [PATCH v2 1/3] blk-iolatency: fix IO hang due to negative inflight counter Liu Bo
@ 2019-01-25  0:12 ` Liu Bo
  2019-01-25  0:12 ` [PATCH v2 3/3] blk-mq: remove duplicated definition of blk_mq_freeze_queue Liu Bo
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 8+ messages in thread
From: Liu Bo @ 2019-01-25  0:12 UTC (permalink / raw)
  To: linux-block; +Cc: Josef Bacik, Jens Axboe

This is to catch any unexpected negative value of inflight IO counter.

Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
---
 block/blk-iolatency.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 1893686a9c1f..2620baa1f699 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -592,6 +592,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
 	u64 now = ktime_to_ns(ktime_get());
 	bool issue_as_root = bio_issue_as_root_blkg(bio);
 	bool enabled = false;
+	int inflight = 0;
 
 	blkg = bio->bi_blkg;
 	if (!blkg || !bio_flagged(bio, BIO_TRACKED))
@@ -613,7 +614,8 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
 		}
 		rqw = &iolat->rq_wait;
 
-		atomic_dec(&rqw->inflight);
+		inflight = atomic_dec_return(&rqw->inflight);
+		WARN_ON_ONCE(inflight < 0);
 		if (iolat->min_lat_nsec == 0)
 			goto next;
 		iolatency_record_time(iolat, &bio->bi_issue, now,
-- 
2.20.1.2.gb21ebb6


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 3/3] blk-mq: remove duplicated definition of blk_mq_freeze_queue
  2019-01-25  0:12 [PATCH v2 0/3] iolatency bug fix Liu Bo
  2019-01-25  0:12 ` [PATCH v2 1/3] blk-iolatency: fix IO hang due to negative inflight counter Liu Bo
  2019-01-25  0:12 ` [PATCH v2 2/3] Blk-iolatency: warn on negative inflight IO counter Liu Bo
@ 2019-01-25  0:12 ` Liu Bo
  2019-02-08 19:14 ` [PATCH v2 0/3] iolatency bug fix Liu Bo
  2019-02-08 19:42 ` Jens Axboe
  4 siblings, 0 replies; 8+ messages in thread
From: Liu Bo @ 2019-01-25  0:12 UTC (permalink / raw)
  To: linux-block; +Cc: Josef Bacik, Jens Axboe

As the prototype has been defined in "include/linux/blk-mq.h", the one
in "block/blk-mq.h" can be removed then.

Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
---
 block/blk-mq.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/block/blk-mq.h b/block/blk-mq.h
index d943d46b0785..d0b3dd54ef8d 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -36,7 +36,6 @@ struct blk_mq_ctx {
 	struct kobject		kobj;
 } ____cacheline_aligned_in_smp;
 
-void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_free_queue(struct request_queue *q);
 int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
 void blk_mq_wake_waiters(struct request_queue *q);
-- 
2.20.1.2.gb21ebb6


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/3] blk-iolatency: fix IO hang due to negative inflight counter
  2019-01-25  0:12 ` [PATCH v2 1/3] blk-iolatency: fix IO hang due to negative inflight counter Liu Bo
@ 2019-01-25  8:30   ` Johannes Thumshirn
  2019-01-29 23:55     ` Liu Bo
  0 siblings, 1 reply; 8+ messages in thread
From: Johannes Thumshirn @ 2019-01-25  8:30 UTC (permalink / raw)
  To: Liu Bo, linux-block; +Cc: Josef Bacik, Jens Axboe

On 25/01/2019 01:12, Liu Bo wrote:
> In fact the hang can be easily reproduced by the below script,
> 
> H=/sys/fs/cgroup/unified/
> P=/sys/fs/cgroup/unified/test
> 
> echo "+io" > $H/cgroup.subtree_control
> mkdir -p $P
> 
> echo $$ > $P/cgroup.procs
> 
> xfs_io -f -d -c "pwrite 0 4k" /dev/sdg
> 
> echo "`cat /sys/block/sdg/dev` target=1000000" > $P/io.latency
> 
> xfs_io -f -d -c "pwrite 0 4k" /dev/sdg

Can you please turn this into a testcase for blktests as well?

Thanks,
	Johannes
-- 
Johannes Thumshirn                            SUSE Labs Filesystems
jthumshirn@suse.de                                +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/3] blk-iolatency: fix IO hang due to negative inflight counter
  2019-01-25  8:30   ` Johannes Thumshirn
@ 2019-01-29 23:55     ` Liu Bo
  0 siblings, 0 replies; 8+ messages in thread
From: Liu Bo @ 2019-01-29 23:55 UTC (permalink / raw)
  To: Johannes Thumshirn; +Cc: linux-block, Josef Bacik, Jens Axboe

On Fri, Jan 25, 2019 at 09:30:49AM +0100, Johannes Thumshirn wrote:
> On 25/01/2019 01:12, Liu Bo wrote:
> > In fact the hang can be easily reproduced by the below script,
> > 
> > H=/sys/fs/cgroup/unified/
> > P=/sys/fs/cgroup/unified/test
> > 
> > echo "+io" > $H/cgroup.subtree_control
> > mkdir -p $P
> > 
> > echo $$ > $P/cgroup.procs
> > 
> > xfs_io -f -d -c "pwrite 0 4k" /dev/sdg
> > 
> > echo "`cat /sys/block/sdg/dev` target=1000000" > $P/io.latency
> > 
> > xfs_io -f -d -c "pwrite 0 4k" /dev/sdg
> 
> Can you please turn this into a testcase for blktests as well?
>

Sure, please check out the patch.
https://patchwork.kernel.org/patch/10787361/

thanks,
-liubo

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 0/3] iolatency bug fix
  2019-01-25  0:12 [PATCH v2 0/3] iolatency bug fix Liu Bo
                   ` (2 preceding siblings ...)
  2019-01-25  0:12 ` [PATCH v2 3/3] blk-mq: remove duplicated definition of blk_mq_freeze_queue Liu Bo
@ 2019-02-08 19:14 ` Liu Bo
  2019-02-08 19:42 ` Jens Axboe
  4 siblings, 0 replies; 8+ messages in thread
From: Liu Bo @ 2019-02-08 19:14 UTC (permalink / raw)
  To: Liu Bo; +Cc: linux-block, Josef Bacik, Jens Axboe

Ping?

thanks,
liubo

On Thu, Jan 24, 2019 at 4:14 PM Liu Bo <bo.liu@linux.alibaba.com> wrote:
>
> This set is to fix a hang case if blk-iolatency is in use.
>
> The 1st patch explains the hang problem with more details and provides
> a fix.
> The 2nd patch adds a warning for unexpected inflight counter.
> The 3rd patch is a cleanup of blk_mq_freeze_queue.
>
> v2: reorder the actual fix to the first patch.
>
> Liu Bo (3):
>   blk-iolatency: fix IO hang due to negative inflight counter
>   Blk-iolatency: warn on negative inflight IO counter
>   blk-mq: remove duplicated definition of blk_mq_freeze_queue
>
>  block/blk-iolatency.c | 56 ++++++++++++++++++++++++++++++++++++-------
>  block/blk-mq.h        |  1 -
>  2 files changed, 48 insertions(+), 9 deletions(-)
>
> --
> 2.20.1.2.gb21ebb6
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 0/3] iolatency bug fix
  2019-01-25  0:12 [PATCH v2 0/3] iolatency bug fix Liu Bo
                   ` (3 preceding siblings ...)
  2019-02-08 19:14 ` [PATCH v2 0/3] iolatency bug fix Liu Bo
@ 2019-02-08 19:42 ` Jens Axboe
  4 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2019-02-08 19:42 UTC (permalink / raw)
  To: Liu Bo, linux-block; +Cc: Josef Bacik

On 1/24/19 5:12 PM, Liu Bo wrote:
> This set is to fix a hang case if blk-iolatency is in use.
> 
> The 1st patch explains the hang problem with more details and provides
> a fix.
> The 2nd patch adds a warning for unexpected inflight counter.
> The 3rd patch is a cleanup of blk_mq_freeze_queue.

Applied for 5.0, thanks.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2019-02-08 19:42 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-25  0:12 [PATCH v2 0/3] iolatency bug fix Liu Bo
2019-01-25  0:12 ` [PATCH v2 1/3] blk-iolatency: fix IO hang due to negative inflight counter Liu Bo
2019-01-25  8:30   ` Johannes Thumshirn
2019-01-29 23:55     ` Liu Bo
2019-01-25  0:12 ` [PATCH v2 2/3] Blk-iolatency: warn on negative inflight IO counter Liu Bo
2019-01-25  0:12 ` [PATCH v2 3/3] blk-mq: remove duplicated definition of blk_mq_freeze_queue Liu Bo
2019-02-08 19:14 ` [PATCH v2 0/3] iolatency bug fix Liu Bo
2019-02-08 19:42 ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).