* [PATCH] md: improve io stats accounting
@ 2020-06-01 16:12 Artur Paszkiewicz
2020-06-01 22:03 ` kbuild test robot
` (3 more replies)
0 siblings, 4 replies; 11+ messages in thread
From: Artur Paszkiewicz @ 2020-06-01 16:12 UTC (permalink / raw)
To: song; +Cc: linux-raid, Artur Paszkiewicz
Use generic io accounting functions to manage io stats. There was an
attempt to do this earlier in commit 18c0b223cf990172 ("md: use generic
io stats accounting functions to simplify io stat accounting"), but it
did not include a call to generic_end_io_acct() and caused issues with
tracking in-flight IOs, so it was later removed in commit
74672d069b298b03 ("md: fix md io stats accounting broken").
This patch attempts to fix this by using both generic_start_io_acct()
and generic_end_io_acct(). To make it possible, in md_make_request() a
bio is cloned with additional data - struct md_io, which includes the io
start_time. A new bioset is introduced for this purpose. We call
generic_start_io_acct() and pass the clone instead of the original to
md_handle_request(). When it completes, we call generic_end_io_acct()
and complete the original bio.
This adds correct statistics about in-flight IOs and IO processing time,
interpreted e.g. in iostat as await, svctm, aqu-sz and %util.
It also fixes a situation where too many IOs where reported if a bio was
re-submitted to the mddev, because io accounting is now performed only
on newly arriving bios.
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
drivers/md/md.c | 65 +++++++++++++++++++++++++++++++++++++++----------
drivers/md/md.h | 1 +
2 files changed, 53 insertions(+), 13 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index f567f536b529..5a9f167ef5b9 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -463,12 +463,32 @@ void md_handle_request(struct mddev *mddev, struct bio *bio)
}
EXPORT_SYMBOL(md_handle_request);
+struct md_io {
+ struct mddev *mddev;
+ struct bio *orig_bio;
+ unsigned long start_time;
+ struct bio orig_bio_clone;
+};
+
+static void md_end_request(struct bio *bio)
+{
+ struct md_io *md_io = bio->bi_private;
+ struct mddev *mddev = md_io->mddev;
+ struct bio *orig_bio = md_io->orig_bio;
+
+ orig_bio->bi_status = bio->bi_status;
+
+ generic_end_io_acct(mddev->queue, bio_op(orig_bio),
+ &mddev->gendisk->part0, md_io->start_time);
+ bio_put(bio);
+
+ bio_endio(orig_bio);
+}
+
static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
{
const int rw = bio_data_dir(bio);
- const int sgrp = op_stat_group(bio_op(bio));
struct mddev *mddev = bio->bi_disk->private_data;
- unsigned int sectors;
if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
bio_io_error(bio);
@@ -488,21 +508,30 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
return BLK_QC_T_NONE;
}
- /*
- * save the sectors now since our bio can
- * go away inside make_request
- */
- sectors = bio_sectors(bio);
+ if (bio->bi_pool != &mddev->md_io_bs) {
+ struct bio *clone;
+ struct md_io *md_io;
+
+ clone = bio_clone_fast(bio, GFP_NOIO, &mddev->md_io_bs);
+
+ md_io = container_of(clone, struct md_io, orig_bio_clone);
+ md_io->mddev = mddev;
+ md_io->orig_bio = bio;
+ md_io->start_time = jiffies;
+
+ clone->bi_end_io = md_end_request;
+ clone->bi_private = md_io;
+ bio = clone;
+
+ generic_start_io_acct(mddev->queue, bio_op(bio),
+ bio_sectors(bio), &mddev->gendisk->part0);
+ }
+
/* bio could be mergeable after passing to underlayer */
bio->bi_opf &= ~REQ_NOMERGE;
md_handle_request(mddev, bio);
- part_stat_lock();
- part_stat_inc(&mddev->gendisk->part0, ios[sgrp]);
- part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors);
- part_stat_unlock();
-
return BLK_QC_T_NONE;
}
@@ -2338,7 +2367,8 @@ int md_integrity_register(struct mddev *mddev)
bdev_get_integrity(reference->bdev));
pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
- if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) {
+ if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
+ bioset_integrity_create(&mddev->md_io_bs, BIO_POOL_SIZE)) {
pr_err("md: failed to create integrity pool for %s\n",
mdname(mddev));
return -EINVAL;
@@ -5545,6 +5575,7 @@ static void md_free(struct kobject *ko)
bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set);
+ bioset_exit(&mddev->md_io_bs);
kfree(mddev);
}
@@ -5838,6 +5869,12 @@ int md_run(struct mddev *mddev)
if (err)
return err;
}
+ if (!bioset_initialized(&mddev->md_io_bs)) {
+ err = bioset_init(&mddev->md_io_bs, BIO_POOL_SIZE,
+ offsetof(struct md_io, orig_bio_clone), 0);
+ if (err)
+ return err;
+ }
spin_lock(&pers_lock);
pers = find_pers(mddev->level, mddev->clevel);
@@ -6015,6 +6052,7 @@ int md_run(struct mddev *mddev)
abort:
bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set);
+ bioset_exit(&mddev->md_io_bs);
return err;
}
EXPORT_SYMBOL_GPL(md_run);
@@ -6239,6 +6277,7 @@ void md_stop(struct mddev *mddev)
__md_stop(mddev);
bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set);
+ bioset_exit(&mddev->md_io_bs);
}
EXPORT_SYMBOL_GPL(md_stop);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 612814d07d35..74273728b898 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -481,6 +481,7 @@ struct mddev {
struct bio_set sync_set; /* for sync operations like
* metadata and bitmap writes
*/
+ struct bio_set md_io_bs;
/* Generic flush handling.
* The last to finish preflush schedules a worker to submit
--
2.26.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH] md: improve io stats accounting
2020-06-01 16:12 [PATCH] md: improve io stats accounting Artur Paszkiewicz
@ 2020-06-01 22:03 ` kbuild test robot
2020-06-02 6:22 ` Song Liu
2020-06-02 6:48 ` Song Liu
` (2 subsequent siblings)
3 siblings, 1 reply; 11+ messages in thread
From: kbuild test robot @ 2020-06-01 22:03 UTC (permalink / raw)
To: song; +Cc: kbuild-all, linux-raid, Artur Paszkiewicz
[-- Attachment #1: Type: text/plain, Size: 4612 bytes --]
Hi Artur,
I love your patch! Yet something to improve:
[auto build test ERROR on next-20200529]
[cannot apply to linus/master md/for-next v5.7 v5.7-rc7 v5.7-rc6 v5.7]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]
url: https://github.com/0day-ci/linux/commits/Artur-Paszkiewicz/md-improve-io-stats-accounting/20200602-002835
base: e7b08814b16b80a0bf76eeca16317f8c2ed23b8c
config: ia64-allmodconfig (attached as .config)
compiler: ia64-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=ia64
If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot <lkp@intel.com>
All errors (new ones prefixed by >>, old ones prefixed by <<):
drivers/md/md.c: In function 'md_end_request':
>> drivers/md/md.c:481:2: error: implicit declaration of function 'generic_end_io_acct'; did you mean 'bio_end_io_acct'? [-Werror=implicit-function-declaration]
481 | generic_end_io_acct(mddev->queue, bio_op(orig_bio),
| ^~~~~~~~~~~~~~~~~~~
| bio_end_io_acct
drivers/md/md.c: In function 'md_make_request':
>> drivers/md/md.c:526:3: error: implicit declaration of function 'generic_start_io_acct'; did you mean 'bio_start_io_acct'? [-Werror=implicit-function-declaration]
526 | generic_start_io_acct(mddev->queue, bio_op(bio),
| ^~~~~~~~~~~~~~~~~~~~~
| bio_start_io_acct
drivers/md/md.c: In function 'bind_rdev_to_array':
drivers/md/md.c:2475:27: warning: suggest braces around empty body in an 'if' statement [-Wempty-body]
2475 | /* failure here is OK */;
| ^
drivers/md/md.c: In function 'slot_store':
drivers/md/md.c:3236:28: warning: suggest braces around empty body in an 'if' statement [-Wempty-body]
3236 | /* failure here is OK */;
| ^
drivers/md/md.c: In function 'remove_and_add_spares':
drivers/md/md.c:9103:29: warning: suggest braces around empty body in an 'if' statement [-Wempty-body]
9103 | /* failure here is OK */;
| ^
cc1: some warnings being treated as errors
vim +481 drivers/md/md.c
472
473 static void md_end_request(struct bio *bio)
474 {
475 struct md_io *md_io = bio->bi_private;
476 struct mddev *mddev = md_io->mddev;
477 struct bio *orig_bio = md_io->orig_bio;
478
479 orig_bio->bi_status = bio->bi_status;
480
> 481 generic_end_io_acct(mddev->queue, bio_op(orig_bio),
482 &mddev->gendisk->part0, md_io->start_time);
483 bio_put(bio);
484
485 bio_endio(orig_bio);
486 }
487
488 static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
489 {
490 const int rw = bio_data_dir(bio);
491 struct mddev *mddev = bio->bi_disk->private_data;
492
493 if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
494 bio_io_error(bio);
495 return BLK_QC_T_NONE;
496 }
497
498 blk_queue_split(q, &bio);
499
500 if (mddev == NULL || mddev->pers == NULL) {
501 bio_io_error(bio);
502 return BLK_QC_T_NONE;
503 }
504 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
505 if (bio_sectors(bio) != 0)
506 bio->bi_status = BLK_STS_IOERR;
507 bio_endio(bio);
508 return BLK_QC_T_NONE;
509 }
510
511 if (bio->bi_pool != &mddev->md_io_bs) {
512 struct bio *clone;
513 struct md_io *md_io;
514
515 clone = bio_clone_fast(bio, GFP_NOIO, &mddev->md_io_bs);
516
517 md_io = container_of(clone, struct md_io, orig_bio_clone);
518 md_io->mddev = mddev;
519 md_io->orig_bio = bio;
520 md_io->start_time = jiffies;
521
522 clone->bi_end_io = md_end_request;
523 clone->bi_private = md_io;
524 bio = clone;
525
> 526 generic_start_io_acct(mddev->queue, bio_op(bio),
527 bio_sectors(bio), &mddev->gendisk->part0);
528 }
529
530 /* bio could be mergeable after passing to underlayer */
531 bio->bi_opf &= ~REQ_NOMERGE;
532
533 md_handle_request(mddev, bio);
534
535 return BLK_QC_T_NONE;
536 }
537
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 60472 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] md: improve io stats accounting
2020-06-01 22:03 ` kbuild test robot
@ 2020-06-02 6:22 ` Song Liu
2020-06-02 6:31 ` [kbuild-all] " Rong Chen
0 siblings, 1 reply; 11+ messages in thread
From: Song Liu @ 2020-06-02 6:22 UTC (permalink / raw)
To: kbuild test robot; +Cc: Artur Paszkiewicz, kbuild-all, linux-raid
Hi kbuild test robot,
On Mon, Jun 1, 2020 at 3:03 PM kbuild test robot <lkp@intel.com> wrote:
>
> Hi Artur,
>
> I love your patch! Yet something to improve:
>
> [auto build test ERROR on next-20200529]
> [cannot apply to linus/master md/for-next v5.7 v5.7-rc7 v5.7-rc6 v5.7]
> [if your patch is applied to the wrong git tree, please drop us a note to help
> improve the system. BTW, we also suggest to use '--base' option to specify the
I am able to apply this to
https://git.kernel.org/pub/scm/linux/kernel/git/song/md.git md-next
Please use that branch for testing.
Thanks,
Song
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [kbuild-all] Re: [PATCH] md: improve io stats accounting
2020-06-02 6:22 ` Song Liu
@ 2020-06-02 6:31 ` Rong Chen
0 siblings, 0 replies; 11+ messages in thread
From: Rong Chen @ 2020-06-02 6:31 UTC (permalink / raw)
To: Song Liu, kbuild test robot; +Cc: Artur Paszkiewicz, kbuild-all, linux-raid
On 6/2/20 2:22 PM, Song Liu wrote:
> Hi kbuild test robot,
>
> On Mon, Jun 1, 2020 at 3:03 PM kbuild test robot <lkp@intel.com> wrote:
>> Hi Artur,
>>
>> I love your patch! Yet something to improve:
>>
>> [auto build test ERROR on next-20200529]
>> [cannot apply to linus/master md/for-next v5.7 v5.7-rc7 v5.7-rc6 v5.7]
>> [if your patch is applied to the wrong git tree, please drop us a note to help
>> improve the system. BTW, we also suggest to use '--base' option to specify the
> I am able to apply this to
>
> https://git.kernel.org/pub/scm/linux/kernel/git/song/md.git md-next
>
> Please use that branch for testing.
>
> Thanks,
> Song
Hi Song,
Thanks for your advice, we'll try.
Best Regards,
Rong Chen
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] md: improve io stats accounting
2020-06-01 16:12 [PATCH] md: improve io stats accounting Artur Paszkiewicz
2020-06-01 22:03 ` kbuild test robot
@ 2020-06-02 6:48 ` Song Liu
2020-06-02 11:47 ` Artur Paszkiewicz
2020-06-02 7:01 ` kbuild test robot
2020-06-08 14:37 ` Guoqing Jiang
3 siblings, 1 reply; 11+ messages in thread
From: Song Liu @ 2020-06-02 6:48 UTC (permalink / raw)
To: Artur Paszkiewicz; +Cc: linux-raid
Hi Artur,
Thanks for the patch.
On Mon, Jun 1, 2020 at 9:13 AM Artur Paszkiewicz
<artur.paszkiewicz@intel.com> wrote:
>
> Use generic io accounting functions to manage io stats. There was an
[...]
> Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
> ---
> drivers/md/md.c | 65 +++++++++++++++++++++++++++++++++++++++----------
> drivers/md/md.h | 1 +
> 2 files changed, 53 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index f567f536b529..5a9f167ef5b9 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -463,12 +463,32 @@ void md_handle_request(struct mddev *mddev, struct bio *bio)
> }
> EXPORT_SYMBOL(md_handle_request);
>
>
[...]
>
> - /*
> - * save the sectors now since our bio can
> - * go away inside make_request
> - */
> - sectors = bio_sectors(bio);
> + if (bio->bi_pool != &mddev->md_io_bs) {
> + struct bio *clone;
> + struct md_io *md_io;
> +
> + clone = bio_clone_fast(bio, GFP_NOIO, &mddev->md_io_bs);
Handle clone == NULL?
Also, have you done benchmarks with this change?
Song
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] md: improve io stats accounting
2020-06-01 16:12 [PATCH] md: improve io stats accounting Artur Paszkiewicz
2020-06-01 22:03 ` kbuild test robot
2020-06-02 6:48 ` Song Liu
@ 2020-06-02 7:01 ` kbuild test robot
2020-06-08 14:37 ` Guoqing Jiang
3 siblings, 0 replies; 11+ messages in thread
From: kbuild test robot @ 2020-06-02 7:01 UTC (permalink / raw)
To: song; +Cc: kbuild-all, clang-built-linux, linux-raid, Artur Paszkiewicz
[-- Attachment #1: Type: text/plain, Size: 4308 bytes --]
Hi Artur,
I love your patch! Yet something to improve:
[auto build test ERROR on next-20200529]
[cannot apply to linus/master md/for-next v5.7 v5.7-rc7 v5.7-rc6 v5.7]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]
url: https://github.com/0day-ci/linux/commits/Artur-Paszkiewicz/md-improve-io-stats-accounting/20200602-002835
base: e7b08814b16b80a0bf76eeca16317f8c2ed23b8c
config: powerpc-randconfig-r005-20200602 (attached as .config)
compiler: clang version 11.0.0 (https://github.com/llvm/llvm-project 2388a096e7865c043e83ece4e26654bd3d1a20d5)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install powerpc cross compiling tool for clang build
# apt-get install binutils-powerpc-linux-gnu
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=powerpc
If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot <lkp@intel.com>
All errors (new ones prefixed by >>, old ones prefixed by <<):
>> drivers/md/md.c:481:2: error: implicit declaration of function 'generic_end_io_acct' [-Werror,-Wimplicit-function-declaration]
generic_end_io_acct(mddev->queue, bio_op(orig_bio),
^
drivers/md/md.c:481:2: note: did you mean 'bio_end_io_acct'?
include/linux/blkdev.h:1917:20: note: 'bio_end_io_acct' declared here
static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time)
^
>> drivers/md/md.c:526:3: error: implicit declaration of function 'generic_start_io_acct' [-Werror,-Wimplicit-function-declaration]
generic_start_io_acct(mddev->queue, bio_op(bio),
^
drivers/md/md.c:526:3: note: did you mean 'bio_start_io_acct'?
include/linux/blkdev.h:1907:29: note: 'bio_start_io_acct' declared here
static inline unsigned long bio_start_io_acct(struct bio *bio)
^
2 errors generated.
vim +/generic_end_io_acct +481 drivers/md/md.c
472
473 static void md_end_request(struct bio *bio)
474 {
475 struct md_io *md_io = bio->bi_private;
476 struct mddev *mddev = md_io->mddev;
477 struct bio *orig_bio = md_io->orig_bio;
478
479 orig_bio->bi_status = bio->bi_status;
480
> 481 generic_end_io_acct(mddev->queue, bio_op(orig_bio),
482 &mddev->gendisk->part0, md_io->start_time);
483 bio_put(bio);
484
485 bio_endio(orig_bio);
486 }
487
488 static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
489 {
490 const int rw = bio_data_dir(bio);
491 struct mddev *mddev = bio->bi_disk->private_data;
492
493 if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
494 bio_io_error(bio);
495 return BLK_QC_T_NONE;
496 }
497
498 blk_queue_split(q, &bio);
499
500 if (mddev == NULL || mddev->pers == NULL) {
501 bio_io_error(bio);
502 return BLK_QC_T_NONE;
503 }
504 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
505 if (bio_sectors(bio) != 0)
506 bio->bi_status = BLK_STS_IOERR;
507 bio_endio(bio);
508 return BLK_QC_T_NONE;
509 }
510
511 if (bio->bi_pool != &mddev->md_io_bs) {
512 struct bio *clone;
513 struct md_io *md_io;
514
515 clone = bio_clone_fast(bio, GFP_NOIO, &mddev->md_io_bs);
516
517 md_io = container_of(clone, struct md_io, orig_bio_clone);
518 md_io->mddev = mddev;
519 md_io->orig_bio = bio;
520 md_io->start_time = jiffies;
521
522 clone->bi_end_io = md_end_request;
523 clone->bi_private = md_io;
524 bio = clone;
525
> 526 generic_start_io_acct(mddev->queue, bio_op(bio),
527 bio_sectors(bio), &mddev->gendisk->part0);
528 }
529
530 /* bio could be mergeable after passing to underlayer */
531 bio->bi_opf &= ~REQ_NOMERGE;
532
533 md_handle_request(mddev, bio);
534
535 return BLK_QC_T_NONE;
536 }
537
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 29684 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] md: improve io stats accounting
2020-06-02 6:48 ` Song Liu
@ 2020-06-02 11:47 ` Artur Paszkiewicz
2020-06-02 17:16 ` Song Liu
2020-06-02 17:32 ` John Stoffel
0 siblings, 2 replies; 11+ messages in thread
From: Artur Paszkiewicz @ 2020-06-02 11:47 UTC (permalink / raw)
To: Song Liu; +Cc: linux-raid
On 6/2/20 8:48 AM, Song Liu wrote:
>> + clone = bio_clone_fast(bio, GFP_NOIO, &mddev->md_io_bs);
>
> Handle clone == NULL?
I think this should never fail - bio_alloc_bioset() guarantees that. It
is used in a similar manner in raid1 and raid10. How about
BUG_ON(clone == NULL)?
> Also, have you done benchmarks with this change?
I tested 4k random reads on a raid0 (4x P4510 2TB) and it was 2550k vs
2567k IOPS, that's slower only by about 0.66%:
without patch:
# fio --direct=1 --thread --rw=randread --ioengine=libaio --iodepth=64 --bs=4k --name=fio --filename=/dev/md0 --time_based --runtime=300 --numjobs=16 --group_reporting --norandommap --randrepeat=0
fio: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=libaio, iodepth=64
...
fio-3.20
Starting 16 threads
Jobs: 16 (f=16): [r(16)][100.0%][r=9.81GiB/s][r=2571k IOPS][eta 00m:00s]
fio: (groupid=0, jobs=16): err= 0: pid=8678: Tue Jun 2 13:19:38 2020
read: IOPS=2567k, BW=9.79GiB/s (10.5GB/s)(2938GiB/300002msec)
slat (nsec): min=1384, max=798130, avg=2852.52, stdev=1108.15
clat (usec): min=48, max=12387, avg=395.81, stdev=260.52
lat (usec): min=51, max=12389, avg=398.70, stdev=260.51
clat percentiles (usec):
| 1.00th=[ 101], 5.00th=[ 135], 10.00th=[ 157], 20.00th=[ 196],
| 30.00th=[ 233], 40.00th=[ 273], 50.00th=[ 322], 60.00th=[ 379],
| 70.00th=[ 457], 80.00th=[ 562], 90.00th=[ 734], 95.00th=[ 889],
| 99.00th=[ 1287], 99.50th=[ 1500], 99.90th=[ 2147], 99.95th=[ 2442],
| 99.99th=[ 3130]
bw ( MiB/s): min= 9664, max=10215, per=100.00%, avg=10033.54, stdev= 4.59, samples=9584
iops : min=2474063, max=2615178, avg=2568585.96, stdev=1176.06, samples=9584
lat (usec) : 50=0.01%, 100=0.94%, 250=33.51%, 500=39.98%, 750=16.36%
lat (usec) : 1000=6.16%
lat (msec) : 2=2.92%, 4=0.14%, 10=0.01%, 20=0.01%
cpu : usr=16.07%, sys=41.29%, ctx=193003298, majf=0, minf=90976
IO depths : 1=0.1%, 2=0.1%, 4=0.1%, 8=0.1%, 16=0.1%, 32=0.1%, >=64=100.0%
submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.1%, >=64=0.0%
issued rwts: total=770061876,0,0,0 short=0,0,0,0 dropped=0,0,0,0
latency : target=0, window=0, percentile=100.00%, depth=64
Run status group 0 (all jobs):
READ: bw=9.79GiB/s (10.5GB/s), 9.79GiB/s-9.79GiB/s (10.5GB/s-10.5GB/s), io=2938GiB (3154GB), run=300002-300002msec
Disk stats (read/write):
md0: ios=769770571/0, merge=0/0, ticks=0/0, in_queue=0, util=0.00%, aggrios=192515469/0, aggrmerge=0/0, aggrticks=74947558/0, aggrin_queue=6006362, aggrutil=100.00%
nvme3n1: ios=192511957/0, merge=0/0, ticks=76700358/0, in_queue=6215887, util=100.00%
nvme6n1: ios=192503722/0, merge=0/0, ticks=72629807/0, in_queue=5520156, util=100.00%
nvme2n1: ios=192518930/0, merge=0/0, ticks=74719743/0, in_queue=5979779, util=100.00%
nvme1n1: ios=192527267/0, merge=0/0, ticks=75740325/0, in_queue=6309628, util=100.00%
with patch:
# fio --direct=1 --thread --rw=randread --ioengine=libaio --iodepth=64 --bs=4k --name=fio --filename=/dev/md0 --time_based --runtime=300 --numjobs=16 --group_reporting --norandommap --randrepeat=0
fio: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=libaio, iodepth=64
...
fio-3.20
Starting 16 threads
Jobs: 16 (f=16): [r(16)][100.0%][r=9934MiB/s][r=2543k IOPS][eta 00m:00s]
fio: (groupid=0, jobs=16): err= 0: pid=8463: Tue Jun 2 13:32:12 2020
read: IOPS=2550k, BW=9961MiB/s (10.4GB/s)(2918GiB/300002msec)
slat (nsec): min=1512, max=3578.1k, avg=5145.36, stdev=2145.71
clat (usec): min=50, max=12421, avg=396.13, stdev=210.38
lat (usec): min=52, max=12428, avg=401.33, stdev=210.45
clat percentiles (usec):
| 1.00th=[ 133], 5.00th=[ 178], 10.00th=[ 208], 20.00th=[ 247],
| 30.00th=[ 281], 40.00th=[ 314], 50.00th=[ 347], 60.00th=[ 383],
| 70.00th=[ 437], 80.00th=[ 510], 90.00th=[ 644], 95.00th=[ 783],
| 99.00th=[ 1156], 99.50th=[ 1369], 99.90th=[ 1991], 99.95th=[ 2311],
| 99.99th=[ 2999]
bw ( MiB/s): min= 9266, max=10648, per=100.00%, avg=9967.23, stdev=13.31, samples=9584
iops : min=2372118, max=2725915, avg=2551610.09, stdev=3407.18, samples=9584
lat (usec) : 100=0.13%, 250=20.62%, 500=58.25%, 750=15.25%, 1000=3.92%
lat (msec) : 2=1.72%, 4=0.10%, 10=0.01%, 20=0.01%
cpu : usr=15.97%, sys=66.59%, ctx=11235674, majf=0, minf=41238
IO depths : 1=0.1%, 2=0.1%, 4=0.1%, 8=0.1%, 16=0.1%, 32=0.1%, >=64=100.0%
submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.1%, >=64=0.0%
issued rwts: total=764997277,0,0,0 short=0,0,0,0 dropped=0,0,0,0
latency : target=0, window=0, percentile=100.00%, depth=64
Run status group 0 (all jobs):
READ: bw=9961MiB/s (10.4GB/s), 9961MiB/s-9961MiB/s (10.4GB/s-10.4GB/s), io=2918GiB (3133GB), run=300002-300002msec
Disk stats (read/write):
md0: ios=764702549/0, merge=0/0, ticks=242091778/0, in_queue=242091754, util=100.00%, aggrios=191249319/0, aggrmerge=0/0, aggrticks=59760064/0, aggrin_queue=2798855, aggrutil=100.00%
nvme3n1: ios=191250967/0, merge=0/0, ticks=61633420/0, in_queue=3032943, util=100.00%
nvme6n1: ios=191257919/0, merge=0/0, ticks=59065688/0, in_queue=2784603, util=100.00%
nvme2n1: ios=191255129/0, merge=0/0, ticks=58520284/0, in_queue=2461116, util=100.00%
nvme1n1: ios=191233262/0, merge=0/0, ticks=59820864/0, in_queue=2916760, util=100.00%
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] md: improve io stats accounting
2020-06-02 11:47 ` Artur Paszkiewicz
@ 2020-06-02 17:16 ` Song Liu
2020-06-02 17:32 ` John Stoffel
1 sibling, 0 replies; 11+ messages in thread
From: Song Liu @ 2020-06-02 17:16 UTC (permalink / raw)
To: Artur Paszkiewicz; +Cc: linux-raid
On Tue, Jun 2, 2020 at 4:47 AM Artur Paszkiewicz
<artur.paszkiewicz@intel.com> wrote:
>
> On 6/2/20 8:48 AM, Song Liu wrote:
> >> + clone = bio_clone_fast(bio, GFP_NOIO, &mddev->md_io_bs);
> >
> > Handle clone == NULL?
>
> I think this should never fail - bio_alloc_bioset() guarantees that. It
> is used in a similar manner in raid1 and raid10. How about
> BUG_ON(clone == NULL)?
I misread the code. Current version is fine.
>
> > Also, have you done benchmarks with this change?
>
> I tested 4k random reads on a raid0 (4x P4510 2TB) and it was 2550k vs
> 2567k IOPS, that's slower only by about 0.66%:
Thanks for the test. I will do some more tests and process the patch
after the merge
window.
Song
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] md: improve io stats accounting
2020-06-02 11:47 ` Artur Paszkiewicz
2020-06-02 17:16 ` Song Liu
@ 2020-06-02 17:32 ` John Stoffel
1 sibling, 0 replies; 11+ messages in thread
From: John Stoffel @ 2020-06-02 17:32 UTC (permalink / raw)
To: Artur Paszkiewicz; +Cc: Song Liu, linux-raid
>>>>> "Artur" == Artur Paszkiewicz <artur.paszkiewicz@intel.com> writes:
Artur> On 6/2/20 8:48 AM, Song Liu wrote:
>>> + clone = bio_clone_fast(bio, GFP_NOIO, &mddev->md_io_bs);
>>
>> Handle clone == NULL?
Artur> I think this should never fail - bio_alloc_bioset() guarantees that. It
Artur> is used in a similar manner in raid1 and raid10. How about
Artur> BUG_ON(clone == NULL)?
No, use WARN_ON() instead, why would you bug the entire system for
just one logical device throwing an error?
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] md: improve io stats accounting
2020-06-01 16:12 [PATCH] md: improve io stats accounting Artur Paszkiewicz
` (2 preceding siblings ...)
2020-06-02 7:01 ` kbuild test robot
@ 2020-06-08 14:37 ` Guoqing Jiang
2020-07-02 6:30 ` Song Liu
3 siblings, 1 reply; 11+ messages in thread
From: Guoqing Jiang @ 2020-06-08 14:37 UTC (permalink / raw)
To: Artur Paszkiewicz, song; +Cc: linux-raid
On 6/1/20 6:12 PM, Artur Paszkiewicz wrote:
> Use generic io accounting functions to manage io stats. There was an
> attempt to do this earlier in commit 18c0b223cf990172 ("md: use generic
> io stats accounting functions to simplify io stat accounting"), but it
> did not include a call to generic_end_io_acct() and caused issues with
> tracking in-flight IOs, so it was later removed in commit
> 74672d069b298b03 ("md: fix md io stats accounting broken").
>
> This patch attempts to fix this by using both generic_start_io_acct()
> and generic_end_io_acct(). To make it possible, in md_make_request() a
> bio is cloned with additional data - struct md_io, which includes the io
> start_time. A new bioset is introduced for this purpose. We call
> generic_start_io_acct() and pass the clone instead of the original to
> md_handle_request(). When it completes, we call generic_end_io_acct()
> and complete the original bio.
>
> This adds correct statistics about in-flight IOs and IO processing time,
> interpreted e.g. in iostat as await, svctm, aqu-sz and %util.
>
> It also fixes a situation where too many IOs where reported if a bio was
> re-submitted to the mddev, because io accounting is now performed only
> on newly arriving bios.
>
> Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
> ---
> drivers/md/md.c | 65 +++++++++++++++++++++++++++++++++++++++----------
> drivers/md/md.h | 1 +
> 2 files changed, 53 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index f567f536b529..5a9f167ef5b9 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -463,12 +463,32 @@ void md_handle_request(struct mddev *mddev, struct bio *bio)
> }
> EXPORT_SYMBOL(md_handle_request);
>
> +struct md_io {
> + struct mddev *mddev;
> + struct bio *orig_bio;
> + unsigned long start_time;
> + struct bio orig_bio_clone;
> +};
> +
> +static void md_end_request(struct bio *bio)
> +{
> + struct md_io *md_io = bio->bi_private;
> + struct mddev *mddev = md_io->mddev;
> + struct bio *orig_bio = md_io->orig_bio;
> +
> + orig_bio->bi_status = bio->bi_status;
> +
> + generic_end_io_acct(mddev->queue, bio_op(orig_bio),
> + &mddev->gendisk->part0, md_io->start_time);
[...]
> + generic_start_io_acct(mddev->queue, bio_op(bio),
> + bio_sectors(bio), &mddev->gendisk->part0);
> + }
> +
Now, you need to switch to call bio_{start,end}_io_acct instead of
generic_{start,end}_io_acct after the changes from Christoph.
Thanks,
Guoqing
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] md: improve io stats accounting
2020-06-08 14:37 ` Guoqing Jiang
@ 2020-07-02 6:30 ` Song Liu
0 siblings, 0 replies; 11+ messages in thread
From: Song Liu @ 2020-07-02 6:30 UTC (permalink / raw)
To: Guoqing Jiang; +Cc: Artur Paszkiewicz, linux-raid
On Mon, Jun 8, 2020 at 7:37 AM Guoqing Jiang
<guoqing.jiang@cloud.ionos.com> wrote:
>
> On 6/1/20 6:12 PM, Artur Paszkiewicz wrote:
> > Use generic io accounting functions to manage io stats. There was an
> > attempt to do this earlier in commit 18c0b223cf990172 ("md: use generic
> > io stats accounting functions to simplify io stat accounting"), but it
> > did not include a call to generic_end_io_acct() and caused issues with
> > tracking in-flight IOs, so it was later removed in commit
> > 74672d069b298b03 ("md: fix md io stats accounting broken").
> >
> > This patch attempts to fix this by using both generic_start_io_acct()
> > and generic_end_io_acct(). To make it possible, in md_make_request() a
> > bio is cloned with additional data - struct md_io, which includes the io
> > start_time. A new bioset is introduced for this purpose. We call
> > generic_start_io_acct() and pass the clone instead of the original to
> > md_handle_request(). When it completes, we call generic_end_io_acct()
> > and complete the original bio.
> >
> > This adds correct statistics about in-flight IOs and IO processing time,
> > interpreted e.g. in iostat as await, svctm, aqu-sz and %util.
> >
> > It also fixes a situation where too many IOs where reported if a bio was
> > re-submitted to the mddev, because io accounting is now performed only
> > on newly arriving bios.
> >
> > Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
> > ---
> > drivers/md/md.c | 65 +++++++++++++++++++++++++++++++++++++++----------
> > drivers/md/md.h | 1 +
> > 2 files changed, 53 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/md/md.c b/drivers/md/md.c
> > index f567f536b529..5a9f167ef5b9 100644
> > --- a/drivers/md/md.c
> > +++ b/drivers/md/md.c
> > @@ -463,12 +463,32 @@ void md_handle_request(struct mddev *mddev, struct bio *bio)
> > }
> > EXPORT_SYMBOL(md_handle_request);
> >
> > +struct md_io {
> > + struct mddev *mddev;
> > + struct bio *orig_bio;
> > + unsigned long start_time;
> > + struct bio orig_bio_clone;
> > +};
> > +
> > +static void md_end_request(struct bio *bio)
> > +{
> > + struct md_io *md_io = bio->bi_private;
> > + struct mddev *mddev = md_io->mddev;
> > + struct bio *orig_bio = md_io->orig_bio;
> > +
> > + orig_bio->bi_status = bio->bi_status;
> > +
> > + generic_end_io_acct(mddev->queue, bio_op(orig_bio),
> > + &mddev->gendisk->part0, md_io->start_time);
>
> [...]
>
> > + generic_start_io_acct(mddev->queue, bio_op(bio),
> > + bio_sectors(bio), &mddev->gendisk->part0);
> > + }
> > +
>
> Now, you need to switch to call bio_{start,end}_io_acct instead of
> generic_{start,end}_io_acct after the changes from Christoph.
Thanks Guoqing!
Hi Artur,
Please rebase your change on top of md-next branch:
https://git.kernel.org/pub/scm/linux/kernel/git/song/md.git/log/?h=md-next
Also, please check the .patch file with scripts/checkpatch.pl.
Thanks,
Song
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2020-07-02 6:30 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-01 16:12 [PATCH] md: improve io stats accounting Artur Paszkiewicz
2020-06-01 22:03 ` kbuild test robot
2020-06-02 6:22 ` Song Liu
2020-06-02 6:31 ` [kbuild-all] " Rong Chen
2020-06-02 6:48 ` Song Liu
2020-06-02 11:47 ` Artur Paszkiewicz
2020-06-02 17:16 ` Song Liu
2020-06-02 17:32 ` John Stoffel
2020-06-02 7:01 ` kbuild test robot
2020-06-08 14:37 ` Guoqing Jiang
2020-07-02 6:30 ` Song Liu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).