From: Yu Kuai <yukuai1@huaweicloud.com>
To: axboe@kernel.dk, agk@redhat.com, snitzer@kernel.org,
dm-devel@redhat.com, kbusch@kernel.org, hch@lst.de,
sagi@grimberg.me, guz.fnst@cn.fujitsu.com
Cc: linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-nvme@lists.infradead.org, yukuai3@huawei.com,
yukuai1@huaweicloud.com, yi.zhang@huawei.com,
yangerkun@huawei.com
Subject: [PATCH -next RFC] block: count 'ios' and 'sectors' when io is done for bio-based device
Date: Thu, 23 Feb 2023 17:12:26 +0800 [thread overview]
Message-ID: <20230223091226.1135678-1-yukuai1@huaweicloud.com> (raw)
From: Yu Kuai <yukuai3@huawei.com>
While using iostat for raid, I observed very strange 'await'
occasionally, and turns out it's due to that 'ios' and 'sectors' is
counted in bdev_start_io_acct(), while 'nsecs' is counted in
bdev_end_io_acct(). I'm not sure why they are ccounted like that
but I think this behaviour is obviously wrong because user will get
wrong disk stats.
Fix the problem by counting 'ios' and 'sectors' when io is done, like
what rq-based device does.
Fixes: 394ffa503bc4 ("blk: introduce generic io stat accounting help function")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
block/blk-core.c | 16 ++++++----------
drivers/md/dm.c | 6 +++---
drivers/nvme/host/multipath.c | 8 ++++----
include/linux/blkdev.h | 5 ++---
4 files changed, 15 insertions(+), 20 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index 82b5b2c53f1e..fe1d320f5f07 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -953,16 +953,11 @@ void update_io_ticks(struct block_device *part, unsigned long now, bool end)
}
}
-unsigned long bdev_start_io_acct(struct block_device *bdev,
- unsigned int sectors, enum req_op op,
+unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op,
unsigned long start_time)
{
- const int sgrp = op_stat_group(op);
-
part_stat_lock();
update_io_ticks(bdev, start_time, false);
- part_stat_inc(bdev, ios[sgrp]);
- part_stat_add(bdev, sectors[sgrp], sectors);
part_stat_local_inc(bdev, in_flight[op_is_write(op)]);
part_stat_unlock();
@@ -978,13 +973,12 @@ EXPORT_SYMBOL(bdev_start_io_acct);
*/
unsigned long bio_start_io_acct(struct bio *bio)
{
- return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
- bio_op(bio), jiffies);
+ return bdev_start_io_acct(bio->bi_bdev, bio_op(bio), jiffies);
}
EXPORT_SYMBOL_GPL(bio_start_io_acct);
void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
- unsigned long start_time)
+ unsigned int sectors, unsigned long start_time)
{
const int sgrp = op_stat_group(op);
unsigned long now = READ_ONCE(jiffies);
@@ -992,6 +986,8 @@ void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
part_stat_lock();
update_io_ticks(bdev, now, true);
+ part_stat_inc(bdev, ios[sgrp]);
+ part_stat_add(bdev, sectors[sgrp], sectors);
part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration));
part_stat_local_dec(bdev, in_flight[op_is_write(op)]);
part_stat_unlock();
@@ -1001,7 +997,7 @@ EXPORT_SYMBOL(bdev_end_io_acct);
void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
struct block_device *orig_bdev)
{
- bdev_end_io_acct(orig_bdev, bio_op(bio), start_time);
+ bdev_end_io_acct(orig_bdev, bio_op(bio), bio_sectors(bio), start_time);
}
EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index eace45a18d45..f5cc330bb549 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -512,10 +512,10 @@ static void dm_io_acct(struct dm_io *io, bool end)
sectors = io->sectors;
if (!end)
- bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio),
- start_time);
+ bdev_start_io_acct(bio->bi_bdev, bio_op(bio), start_time);
else
- bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time);
+ bdev_end_io_acct(bio->bi_bdev, bio_op(bio), sectors,
+ start_time);
if (static_branch_unlikely(&stats_enabled) &&
unlikely(dm_stats_used(&md->stats))) {
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index fc39d01e7b63..9171452e2f6d 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -123,9 +123,8 @@ void nvme_mpath_start_request(struct request *rq)
return;
nvme_req(rq)->flags |= NVME_MPATH_IO_STATS;
- nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0,
- blk_rq_bytes(rq) >> SECTOR_SHIFT,
- req_op(rq), jiffies);
+ nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, req_op(rq),
+ jiffies);
}
EXPORT_SYMBOL_GPL(nvme_mpath_start_request);
@@ -136,7 +135,8 @@ void nvme_mpath_end_request(struct request *rq)
if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS))
return;
bdev_end_io_acct(ns->head->disk->part0, req_op(rq),
- nvme_req(rq)->start_time);
+ blk_rq_bytes(rq) >> SECTOR_SHIFT,
+ nvme_req(rq)->start_time);
}
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d1aee08f8c18..941304f17492 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1446,11 +1446,10 @@ static inline void blk_wake_io_task(struct task_struct *waiter)
wake_up_process(waiter);
}
-unsigned long bdev_start_io_acct(struct block_device *bdev,
- unsigned int sectors, enum req_op op,
+unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op,
unsigned long start_time);
void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
- unsigned long start_time);
+ unsigned int sectors, unsigned long start_time);
unsigned long bio_start_io_acct(struct bio *bio);
void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
--
2.31.1
WARNING: multiple messages have this Message-ID (diff)
From: Yu Kuai <yukuai1@huaweicloud.com>
To: axboe@kernel.dk, agk@redhat.com, snitzer@kernel.org,
dm-devel@redhat.com, kbusch@kernel.org, hch@lst.de,
sagi@grimberg.me, guz.fnst@cn.fujitsu.com
Cc: yi.zhang@huawei.com, yangerkun@huawei.com,
linux-kernel@vger.kernel.org, linux-nvme@lists.infradead.org,
linux-block@vger.kernel.org, yukuai1@huaweicloud.com,
yukuai3@huawei.com
Subject: [dm-devel] [PATCH -next RFC] block: count 'ios' and 'sectors' when io is done for bio-based device
Date: Thu, 23 Feb 2023 17:12:26 +0800 [thread overview]
Message-ID: <20230223091226.1135678-1-yukuai1@huaweicloud.com> (raw)
From: Yu Kuai <yukuai3@huawei.com>
While using iostat for raid, I observed very strange 'await'
occasionally, and turns out it's due to that 'ios' and 'sectors' is
counted in bdev_start_io_acct(), while 'nsecs' is counted in
bdev_end_io_acct(). I'm not sure why they are ccounted like that
but I think this behaviour is obviously wrong because user will get
wrong disk stats.
Fix the problem by counting 'ios' and 'sectors' when io is done, like
what rq-based device does.
Fixes: 394ffa503bc4 ("blk: introduce generic io stat accounting help function")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
block/blk-core.c | 16 ++++++----------
drivers/md/dm.c | 6 +++---
drivers/nvme/host/multipath.c | 8 ++++----
include/linux/blkdev.h | 5 ++---
4 files changed, 15 insertions(+), 20 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index 82b5b2c53f1e..fe1d320f5f07 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -953,16 +953,11 @@ void update_io_ticks(struct block_device *part, unsigned long now, bool end)
}
}
-unsigned long bdev_start_io_acct(struct block_device *bdev,
- unsigned int sectors, enum req_op op,
+unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op,
unsigned long start_time)
{
- const int sgrp = op_stat_group(op);
-
part_stat_lock();
update_io_ticks(bdev, start_time, false);
- part_stat_inc(bdev, ios[sgrp]);
- part_stat_add(bdev, sectors[sgrp], sectors);
part_stat_local_inc(bdev, in_flight[op_is_write(op)]);
part_stat_unlock();
@@ -978,13 +973,12 @@ EXPORT_SYMBOL(bdev_start_io_acct);
*/
unsigned long bio_start_io_acct(struct bio *bio)
{
- return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
- bio_op(bio), jiffies);
+ return bdev_start_io_acct(bio->bi_bdev, bio_op(bio), jiffies);
}
EXPORT_SYMBOL_GPL(bio_start_io_acct);
void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
- unsigned long start_time)
+ unsigned int sectors, unsigned long start_time)
{
const int sgrp = op_stat_group(op);
unsigned long now = READ_ONCE(jiffies);
@@ -992,6 +986,8 @@ void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
part_stat_lock();
update_io_ticks(bdev, now, true);
+ part_stat_inc(bdev, ios[sgrp]);
+ part_stat_add(bdev, sectors[sgrp], sectors);
part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration));
part_stat_local_dec(bdev, in_flight[op_is_write(op)]);
part_stat_unlock();
@@ -1001,7 +997,7 @@ EXPORT_SYMBOL(bdev_end_io_acct);
void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
struct block_device *orig_bdev)
{
- bdev_end_io_acct(orig_bdev, bio_op(bio), start_time);
+ bdev_end_io_acct(orig_bdev, bio_op(bio), bio_sectors(bio), start_time);
}
EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index eace45a18d45..f5cc330bb549 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -512,10 +512,10 @@ static void dm_io_acct(struct dm_io *io, bool end)
sectors = io->sectors;
if (!end)
- bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio),
- start_time);
+ bdev_start_io_acct(bio->bi_bdev, bio_op(bio), start_time);
else
- bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time);
+ bdev_end_io_acct(bio->bi_bdev, bio_op(bio), sectors,
+ start_time);
if (static_branch_unlikely(&stats_enabled) &&
unlikely(dm_stats_used(&md->stats))) {
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index fc39d01e7b63..9171452e2f6d 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -123,9 +123,8 @@ void nvme_mpath_start_request(struct request *rq)
return;
nvme_req(rq)->flags |= NVME_MPATH_IO_STATS;
- nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0,
- blk_rq_bytes(rq) >> SECTOR_SHIFT,
- req_op(rq), jiffies);
+ nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, req_op(rq),
+ jiffies);
}
EXPORT_SYMBOL_GPL(nvme_mpath_start_request);
@@ -136,7 +135,8 @@ void nvme_mpath_end_request(struct request *rq)
if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS))
return;
bdev_end_io_acct(ns->head->disk->part0, req_op(rq),
- nvme_req(rq)->start_time);
+ blk_rq_bytes(rq) >> SECTOR_SHIFT,
+ nvme_req(rq)->start_time);
}
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d1aee08f8c18..941304f17492 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1446,11 +1446,10 @@ static inline void blk_wake_io_task(struct task_struct *waiter)
wake_up_process(waiter);
}
-unsigned long bdev_start_io_acct(struct block_device *bdev,
- unsigned int sectors, enum req_op op,
+unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op,
unsigned long start_time);
void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
- unsigned long start_time);
+ unsigned int sectors, unsigned long start_time);
unsigned long bio_start_io_acct(struct bio *bio);
void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
--
2.31.1
--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
next reply other threads:[~2023-02-23 8:49 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-02-23 9:12 Yu Kuai [this message]
2023-02-23 9:12 ` [dm-devel] [PATCH -next RFC] block: count 'ios' and 'sectors' when io is done for bio-based device Yu Kuai
2023-02-28 1:01 ` Yu Kuai
2023-02-28 1:01 ` [dm-devel] " Yu Kuai
2023-03-07 10:44 ` Yu Kuai
2023-03-07 10:44 ` [dm-devel] " Yu Kuai
2023-03-15 15:10 ` Christoph Hellwig
2023-03-15 15:10 ` [dm-devel] " Christoph Hellwig
2023-03-15 15:25 ` Jens Axboe
2023-03-15 15:25 ` [dm-devel] " Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230223091226.1135678-1-yukuai1@huaweicloud.com \
--to=yukuai1@huaweicloud.com \
--cc=agk@redhat.com \
--cc=axboe@kernel.dk \
--cc=dm-devel@redhat.com \
--cc=guz.fnst@cn.fujitsu.com \
--cc=hch@lst.de \
--cc=kbusch@kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=sagi@grimberg.me \
--cc=snitzer@kernel.org \
--cc=yangerkun@huawei.com \
--cc=yi.zhang@huawei.com \
--cc=yukuai3@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.