linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] md: improve io stats accounting
@ 2020-06-01 16:12 Artur Paszkiewicz
  2020-06-01 22:03 ` kbuild test robot
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Artur Paszkiewicz @ 2020-06-01 16:12 UTC (permalink / raw)
  To: song; +Cc: linux-raid, Artur Paszkiewicz

Use generic io accounting functions to manage io stats. There was an
attempt to do this earlier in commit 18c0b223cf990172 ("md: use generic
io stats accounting functions to simplify io stat accounting"), but it
did not include a call to generic_end_io_acct() and caused issues with
tracking in-flight IOs, so it was later removed in commit
74672d069b298b03 ("md: fix md io stats accounting broken").

This patch attempts to fix this by using both generic_start_io_acct()
and generic_end_io_acct(). To make it possible, in md_make_request() a
bio is cloned with additional data - struct md_io, which includes the io
start_time. A new bioset is introduced for this purpose. We call
generic_start_io_acct() and pass the clone instead of the original to
md_handle_request(). When it completes, we call generic_end_io_acct()
and complete the original bio.

This adds correct statistics about in-flight IOs and IO processing time,
interpreted e.g. in iostat as await, svctm, aqu-sz and %util.

It also fixes a situation where too many IOs where reported if a bio was
re-submitted to the mddev, because io accounting is now performed only
on newly arriving bios.

Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
---
 drivers/md/md.c | 65 +++++++++++++++++++++++++++++++++++++++----------
 drivers/md/md.h |  1 +
 2 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index f567f536b529..5a9f167ef5b9 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -463,12 +463,32 @@ void md_handle_request(struct mddev *mddev, struct bio *bio)
 }
 EXPORT_SYMBOL(md_handle_request);
 
+struct md_io {
+	struct mddev *mddev;
+	struct bio *orig_bio;
+	unsigned long start_time;
+	struct bio orig_bio_clone;
+};
+
+static void md_end_request(struct bio *bio)
+{
+	struct md_io *md_io = bio->bi_private;
+	struct mddev *mddev = md_io->mddev;
+	struct bio *orig_bio = md_io->orig_bio;
+
+	orig_bio->bi_status = bio->bi_status;
+
+	generic_end_io_acct(mddev->queue, bio_op(orig_bio),
+			    &mddev->gendisk->part0, md_io->start_time);
+	bio_put(bio);
+
+	bio_endio(orig_bio);
+}
+
 static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int rw = bio_data_dir(bio);
-	const int sgrp = op_stat_group(bio_op(bio));
 	struct mddev *mddev = bio->bi_disk->private_data;
-	unsigned int sectors;
 
 	if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
 		bio_io_error(bio);
@@ -488,21 +508,30 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 		return BLK_QC_T_NONE;
 	}
 
-	/*
-	 * save the sectors now since our bio can
-	 * go away inside make_request
-	 */
-	sectors = bio_sectors(bio);
+	if (bio->bi_pool != &mddev->md_io_bs) {
+		struct bio *clone;
+		struct md_io *md_io;
+
+		clone = bio_clone_fast(bio, GFP_NOIO, &mddev->md_io_bs);
+
+		md_io = container_of(clone, struct md_io, orig_bio_clone);
+		md_io->mddev = mddev;
+		md_io->orig_bio = bio;
+		md_io->start_time = jiffies;
+
+		clone->bi_end_io = md_end_request;
+		clone->bi_private = md_io;
+		bio = clone;
+
+		generic_start_io_acct(mddev->queue, bio_op(bio),
+				      bio_sectors(bio), &mddev->gendisk->part0);
+	}
+
 	/* bio could be mergeable after passing to underlayer */
 	bio->bi_opf &= ~REQ_NOMERGE;
 
 	md_handle_request(mddev, bio);
 
-	part_stat_lock();
-	part_stat_inc(&mddev->gendisk->part0, ios[sgrp]);
-	part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors);
-	part_stat_unlock();
-
 	return BLK_QC_T_NONE;
 }
 
@@ -2338,7 +2367,8 @@ int md_integrity_register(struct mddev *mddev)
 			       bdev_get_integrity(reference->bdev));
 
 	pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
-	if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) {
+	if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
+	    bioset_integrity_create(&mddev->md_io_bs, BIO_POOL_SIZE)) {
 		pr_err("md: failed to create integrity pool for %s\n",
 		       mdname(mddev));
 		return -EINVAL;
@@ -5545,6 +5575,7 @@ static void md_free(struct kobject *ko)
 
 	bioset_exit(&mddev->bio_set);
 	bioset_exit(&mddev->sync_set);
+	bioset_exit(&mddev->md_io_bs);
 	kfree(mddev);
 }
 
@@ -5838,6 +5869,12 @@ int md_run(struct mddev *mddev)
 		if (err)
 			return err;
 	}
+	if (!bioset_initialized(&mddev->md_io_bs)) {
+		err = bioset_init(&mddev->md_io_bs, BIO_POOL_SIZE,
+				  offsetof(struct md_io, orig_bio_clone), 0);
+		if (err)
+			return err;
+	}
 
 	spin_lock(&pers_lock);
 	pers = find_pers(mddev->level, mddev->clevel);
@@ -6015,6 +6052,7 @@ int md_run(struct mddev *mddev)
 abort:
 	bioset_exit(&mddev->bio_set);
 	bioset_exit(&mddev->sync_set);
+	bioset_exit(&mddev->md_io_bs);
 	return err;
 }
 EXPORT_SYMBOL_GPL(md_run);
@@ -6239,6 +6277,7 @@ void md_stop(struct mddev *mddev)
 	__md_stop(mddev);
 	bioset_exit(&mddev->bio_set);
 	bioset_exit(&mddev->sync_set);
+	bioset_exit(&mddev->md_io_bs);
 }
 
 EXPORT_SYMBOL_GPL(md_stop);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 612814d07d35..74273728b898 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -481,6 +481,7 @@ struct mddev {
 	struct bio_set			sync_set; /* for sync operations like
 						   * metadata and bitmap writes
 						   */
+	struct bio_set			md_io_bs;
 
 	/* Generic flush handling.
 	 * The last to finish preflush schedules a worker to submit
-- 
2.26.0

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] md: improve io stats accounting
  2020-06-01 16:12 [PATCH] md: improve io stats accounting Artur Paszkiewicz
@ 2020-06-01 22:03 ` kbuild test robot
  2020-06-02  6:22   ` Song Liu
  2020-06-02  6:48 ` Song Liu
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 11+ messages in thread
From: kbuild test robot @ 2020-06-01 22:03 UTC (permalink / raw)
  To: song; +Cc: kbuild-all, linux-raid, Artur Paszkiewicz

[-- Attachment #1: Type: text/plain, Size: 4612 bytes --]

Hi Artur,

I love your patch! Yet something to improve:

[auto build test ERROR on next-20200529]
[cannot apply to linus/master md/for-next v5.7 v5.7-rc7 v5.7-rc6 v5.7]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Artur-Paszkiewicz/md-improve-io-stats-accounting/20200602-002835
base:    e7b08814b16b80a0bf76eeca16317f8c2ed23b8c
config: ia64-allmodconfig (attached as .config)
compiler: ia64-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=ia64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot <lkp@intel.com>

All errors (new ones prefixed by >>, old ones prefixed by <<):

drivers/md/md.c: In function 'md_end_request':
>> drivers/md/md.c:481:2: error: implicit declaration of function 'generic_end_io_acct'; did you mean 'bio_end_io_acct'? [-Werror=implicit-function-declaration]
481 |  generic_end_io_acct(mddev->queue, bio_op(orig_bio),
|  ^~~~~~~~~~~~~~~~~~~
|  bio_end_io_acct
drivers/md/md.c: In function 'md_make_request':
>> drivers/md/md.c:526:3: error: implicit declaration of function 'generic_start_io_acct'; did you mean 'bio_start_io_acct'? [-Werror=implicit-function-declaration]
526 |   generic_start_io_acct(mddev->queue, bio_op(bio),
|   ^~~~~~~~~~~~~~~~~~~~~
|   bio_start_io_acct
drivers/md/md.c: In function 'bind_rdev_to_array':
drivers/md/md.c:2475:27: warning: suggest braces around empty body in an 'if' statement [-Wempty-body]
2475 |   /* failure here is OK */;
|                           ^
drivers/md/md.c: In function 'slot_store':
drivers/md/md.c:3236:28: warning: suggest braces around empty body in an 'if' statement [-Wempty-body]
3236 |    /* failure here is OK */;
|                            ^
drivers/md/md.c: In function 'remove_and_add_spares':
drivers/md/md.c:9103:29: warning: suggest braces around empty body in an 'if' statement [-Wempty-body]
9103 |     /* failure here is OK */;
|                             ^
cc1: some warnings being treated as errors

vim +481 drivers/md/md.c

   472	
   473	static void md_end_request(struct bio *bio)
   474	{
   475		struct md_io *md_io = bio->bi_private;
   476		struct mddev *mddev = md_io->mddev;
   477		struct bio *orig_bio = md_io->orig_bio;
   478	
   479		orig_bio->bi_status = bio->bi_status;
   480	
 > 481		generic_end_io_acct(mddev->queue, bio_op(orig_bio),
   482				    &mddev->gendisk->part0, md_io->start_time);
   483		bio_put(bio);
   484	
   485		bio_endio(orig_bio);
   486	}
   487	
   488	static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
   489	{
   490		const int rw = bio_data_dir(bio);
   491		struct mddev *mddev = bio->bi_disk->private_data;
   492	
   493		if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
   494			bio_io_error(bio);
   495			return BLK_QC_T_NONE;
   496		}
   497	
   498		blk_queue_split(q, &bio);
   499	
   500		if (mddev == NULL || mddev->pers == NULL) {
   501			bio_io_error(bio);
   502			return BLK_QC_T_NONE;
   503		}
   504		if (mddev->ro == 1 && unlikely(rw == WRITE)) {
   505			if (bio_sectors(bio) != 0)
   506				bio->bi_status = BLK_STS_IOERR;
   507			bio_endio(bio);
   508			return BLK_QC_T_NONE;
   509		}
   510	
   511		if (bio->bi_pool != &mddev->md_io_bs) {
   512			struct bio *clone;
   513			struct md_io *md_io;
   514	
   515			clone = bio_clone_fast(bio, GFP_NOIO, &mddev->md_io_bs);
   516	
   517			md_io = container_of(clone, struct md_io, orig_bio_clone);
   518			md_io->mddev = mddev;
   519			md_io->orig_bio = bio;
   520			md_io->start_time = jiffies;
   521	
   522			clone->bi_end_io = md_end_request;
   523			clone->bi_private = md_io;
   524			bio = clone;
   525	
 > 526			generic_start_io_acct(mddev->queue, bio_op(bio),
   527					      bio_sectors(bio), &mddev->gendisk->part0);
   528		}
   529	
   530		/* bio could be mergeable after passing to underlayer */
   531		bio->bi_opf &= ~REQ_NOMERGE;
   532	
   533		md_handle_request(mddev, bio);
   534	
   535		return BLK_QC_T_NONE;
   536	}
   537	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 60472 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] md: improve io stats accounting
  2020-06-01 22:03 ` kbuild test robot
@ 2020-06-02  6:22   ` Song Liu
  2020-06-02  6:31     ` [kbuild-all] " Rong Chen
  0 siblings, 1 reply; 11+ messages in thread
From: Song Liu @ 2020-06-02  6:22 UTC (permalink / raw)
  To: kbuild test robot; +Cc: Artur Paszkiewicz, kbuild-all, linux-raid

Hi kbuild test robot,

On Mon, Jun 1, 2020 at 3:03 PM kbuild test robot <lkp@intel.com> wrote:
>
> Hi Artur,
>
> I love your patch! Yet something to improve:
>
> [auto build test ERROR on next-20200529]
> [cannot apply to linus/master md/for-next v5.7 v5.7-rc7 v5.7-rc6 v5.7]
> [if your patch is applied to the wrong git tree, please drop us a note to help
> improve the system. BTW, we also suggest to use '--base' option to specify the

I am able to apply this to

https://git.kernel.org/pub/scm/linux/kernel/git/song/md.git md-next

Please use that branch for testing.

Thanks,
Song

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [kbuild-all] Re: [PATCH] md: improve io stats accounting
  2020-06-02  6:22   ` Song Liu
@ 2020-06-02  6:31     ` Rong Chen
  0 siblings, 0 replies; 11+ messages in thread
From: Rong Chen @ 2020-06-02  6:31 UTC (permalink / raw)
  To: Song Liu, kbuild test robot; +Cc: Artur Paszkiewicz, kbuild-all, linux-raid



On 6/2/20 2:22 PM, Song Liu wrote:
> Hi kbuild test robot,
>
> On Mon, Jun 1, 2020 at 3:03 PM kbuild test robot <lkp@intel.com> wrote:
>> Hi Artur,
>>
>> I love your patch! Yet something to improve:
>>
>> [auto build test ERROR on next-20200529]
>> [cannot apply to linus/master md/for-next v5.7 v5.7-rc7 v5.7-rc6 v5.7]
>> [if your patch is applied to the wrong git tree, please drop us a note to help
>> improve the system. BTW, we also suggest to use '--base' option to specify the
> I am able to apply this to
>
> https://git.kernel.org/pub/scm/linux/kernel/git/song/md.git md-next
>
> Please use that branch for testing.
>
> Thanks,
> Song

Hi Song,

Thanks for your advice, we'll try.

Best Regards,
Rong Chen

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] md: improve io stats accounting
  2020-06-01 16:12 [PATCH] md: improve io stats accounting Artur Paszkiewicz
  2020-06-01 22:03 ` kbuild test robot
@ 2020-06-02  6:48 ` Song Liu
  2020-06-02 11:47   ` Artur Paszkiewicz
  2020-06-02  7:01 ` kbuild test robot
  2020-06-08 14:37 ` Guoqing Jiang
  3 siblings, 1 reply; 11+ messages in thread
From: Song Liu @ 2020-06-02  6:48 UTC (permalink / raw)
  To: Artur Paszkiewicz; +Cc: linux-raid

Hi Artur,

Thanks for the patch.

On Mon, Jun 1, 2020 at 9:13 AM Artur Paszkiewicz
<artur.paszkiewicz@intel.com> wrote:
>
> Use generic io accounting functions to manage io stats. There was an
[...]

> Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
> ---
>  drivers/md/md.c | 65 +++++++++++++++++++++++++++++++++++++++----------
>  drivers/md/md.h |  1 +
>  2 files changed, 53 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index f567f536b529..5a9f167ef5b9 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -463,12 +463,32 @@ void md_handle_request(struct mddev *mddev, struct bio *bio)
>  }
>  EXPORT_SYMBOL(md_handle_request);
>
>

[...]

>
> -       /*
> -        * save the sectors now since our bio can
> -        * go away inside make_request
> -        */
> -       sectors = bio_sectors(bio);
> +       if (bio->bi_pool != &mddev->md_io_bs) {
> +               struct bio *clone;
> +               struct md_io *md_io;
> +
> +               clone = bio_clone_fast(bio, GFP_NOIO, &mddev->md_io_bs);

Handle clone == NULL?

Also, have you done benchmarks with this change?

Song

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] md: improve io stats accounting
  2020-06-01 16:12 [PATCH] md: improve io stats accounting Artur Paszkiewicz
  2020-06-01 22:03 ` kbuild test robot
  2020-06-02  6:48 ` Song Liu
@ 2020-06-02  7:01 ` kbuild test robot
  2020-06-08 14:37 ` Guoqing Jiang
  3 siblings, 0 replies; 11+ messages in thread
From: kbuild test robot @ 2020-06-02  7:01 UTC (permalink / raw)
  To: song; +Cc: kbuild-all, clang-built-linux, linux-raid, Artur Paszkiewicz

[-- Attachment #1: Type: text/plain, Size: 4308 bytes --]

Hi Artur,

I love your patch! Yet something to improve:

[auto build test ERROR on next-20200529]
[cannot apply to linus/master md/for-next v5.7 v5.7-rc7 v5.7-rc6 v5.7]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Artur-Paszkiewicz/md-improve-io-stats-accounting/20200602-002835
base:    e7b08814b16b80a0bf76eeca16317f8c2ed23b8c
config: powerpc-randconfig-r005-20200602 (attached as .config)
compiler: clang version 11.0.0 (https://github.com/llvm/llvm-project 2388a096e7865c043e83ece4e26654bd3d1a20d5)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install powerpc cross compiling tool for clang build
        # apt-get install binutils-powerpc-linux-gnu
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=powerpc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot <lkp@intel.com>

All errors (new ones prefixed by >>, old ones prefixed by <<):

>> drivers/md/md.c:481:2: error: implicit declaration of function 'generic_end_io_acct' [-Werror,-Wimplicit-function-declaration]
generic_end_io_acct(mddev->queue, bio_op(orig_bio),
^
drivers/md/md.c:481:2: note: did you mean 'bio_end_io_acct'?
include/linux/blkdev.h:1917:20: note: 'bio_end_io_acct' declared here
static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time)
^
>> drivers/md/md.c:526:3: error: implicit declaration of function 'generic_start_io_acct' [-Werror,-Wimplicit-function-declaration]
generic_start_io_acct(mddev->queue, bio_op(bio),
^
drivers/md/md.c:526:3: note: did you mean 'bio_start_io_acct'?
include/linux/blkdev.h:1907:29: note: 'bio_start_io_acct' declared here
static inline unsigned long bio_start_io_acct(struct bio *bio)
^
2 errors generated.

vim +/generic_end_io_acct +481 drivers/md/md.c

   472	
   473	static void md_end_request(struct bio *bio)
   474	{
   475		struct md_io *md_io = bio->bi_private;
   476		struct mddev *mddev = md_io->mddev;
   477		struct bio *orig_bio = md_io->orig_bio;
   478	
   479		orig_bio->bi_status = bio->bi_status;
   480	
 > 481		generic_end_io_acct(mddev->queue, bio_op(orig_bio),
   482				    &mddev->gendisk->part0, md_io->start_time);
   483		bio_put(bio);
   484	
   485		bio_endio(orig_bio);
   486	}
   487	
   488	static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
   489	{
   490		const int rw = bio_data_dir(bio);
   491		struct mddev *mddev = bio->bi_disk->private_data;
   492	
   493		if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
   494			bio_io_error(bio);
   495			return BLK_QC_T_NONE;
   496		}
   497	
   498		blk_queue_split(q, &bio);
   499	
   500		if (mddev == NULL || mddev->pers == NULL) {
   501			bio_io_error(bio);
   502			return BLK_QC_T_NONE;
   503		}
   504		if (mddev->ro == 1 && unlikely(rw == WRITE)) {
   505			if (bio_sectors(bio) != 0)
   506				bio->bi_status = BLK_STS_IOERR;
   507			bio_endio(bio);
   508			return BLK_QC_T_NONE;
   509		}
   510	
   511		if (bio->bi_pool != &mddev->md_io_bs) {
   512			struct bio *clone;
   513			struct md_io *md_io;
   514	
   515			clone = bio_clone_fast(bio, GFP_NOIO, &mddev->md_io_bs);
   516	
   517			md_io = container_of(clone, struct md_io, orig_bio_clone);
   518			md_io->mddev = mddev;
   519			md_io->orig_bio = bio;
   520			md_io->start_time = jiffies;
   521	
   522			clone->bi_end_io = md_end_request;
   523			clone->bi_private = md_io;
   524			bio = clone;
   525	
 > 526			generic_start_io_acct(mddev->queue, bio_op(bio),
   527					      bio_sectors(bio), &mddev->gendisk->part0);
   528		}
   529	
   530		/* bio could be mergeable after passing to underlayer */
   531		bio->bi_opf &= ~REQ_NOMERGE;
   532	
   533		md_handle_request(mddev, bio);
   534	
   535		return BLK_QC_T_NONE;
   536	}
   537	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 29684 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] md: improve io stats accounting
  2020-06-02  6:48 ` Song Liu
@ 2020-06-02 11:47   ` Artur Paszkiewicz
  2020-06-02 17:16     ` Song Liu
  2020-06-02 17:32     ` John Stoffel
  0 siblings, 2 replies; 11+ messages in thread
From: Artur Paszkiewicz @ 2020-06-02 11:47 UTC (permalink / raw)
  To: Song Liu; +Cc: linux-raid

On 6/2/20 8:48 AM, Song Liu wrote:
>> +               clone = bio_clone_fast(bio, GFP_NOIO, &mddev->md_io_bs);
> 
> Handle clone == NULL?

I think this should never fail - bio_alloc_bioset() guarantees that. It
is used in a similar manner in raid1 and raid10. How about
BUG_ON(clone == NULL)?

> Also, have you done benchmarks with this change?

I tested 4k random reads on a raid0 (4x P4510 2TB) and it was 2550k vs
2567k IOPS, that's slower only by about 0.66%:

without patch:

# fio --direct=1 --thread --rw=randread --ioengine=libaio --iodepth=64 --bs=4k --name=fio --filename=/dev/md0 --time_based --runtime=300 --numjobs=16 --group_reporting --norandommap --randrepeat=0
fio: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=libaio, iodepth=64
...
fio-3.20
Starting 16 threads
Jobs: 16 (f=16): [r(16)][100.0%][r=9.81GiB/s][r=2571k IOPS][eta 00m:00s]
fio: (groupid=0, jobs=16): err= 0: pid=8678: Tue Jun  2 13:19:38 2020
  read: IOPS=2567k, BW=9.79GiB/s (10.5GB/s)(2938GiB/300002msec)
    slat (nsec): min=1384, max=798130, avg=2852.52, stdev=1108.15
    clat (usec): min=48, max=12387, avg=395.81, stdev=260.52
     lat (usec): min=51, max=12389, avg=398.70, stdev=260.51
    clat percentiles (usec):
     |  1.00th=[  101],  5.00th=[  135], 10.00th=[  157], 20.00th=[  196],
     | 30.00th=[  233], 40.00th=[  273], 50.00th=[  322], 60.00th=[  379],
     | 70.00th=[  457], 80.00th=[  562], 90.00th=[  734], 95.00th=[  889],
     | 99.00th=[ 1287], 99.50th=[ 1500], 99.90th=[ 2147], 99.95th=[ 2442],
     | 99.99th=[ 3130]
   bw (  MiB/s): min= 9664, max=10215, per=100.00%, avg=10033.54, stdev= 4.59, samples=9584
   iops        : min=2474063, max=2615178, avg=2568585.96, stdev=1176.06, samples=9584
  lat (usec)   : 50=0.01%, 100=0.94%, 250=33.51%, 500=39.98%, 750=16.36%
  lat (usec)   : 1000=6.16%
  lat (msec)   : 2=2.92%, 4=0.14%, 10=0.01%, 20=0.01%
  cpu          : usr=16.07%, sys=41.29%, ctx=193003298, majf=0, minf=90976
  IO depths    : 1=0.1%, 2=0.1%, 4=0.1%, 8=0.1%, 16=0.1%, 32=0.1%, >=64=100.0%
     submit    : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     complete  : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.1%, >=64=0.0%
     issued rwts: total=770061876,0,0,0 short=0,0,0,0 dropped=0,0,0,0
     latency   : target=0, window=0, percentile=100.00%, depth=64

Run status group 0 (all jobs):
   READ: bw=9.79GiB/s (10.5GB/s), 9.79GiB/s-9.79GiB/s (10.5GB/s-10.5GB/s), io=2938GiB (3154GB), run=300002-300002msec

Disk stats (read/write):
    md0: ios=769770571/0, merge=0/0, ticks=0/0, in_queue=0, util=0.00%, aggrios=192515469/0, aggrmerge=0/0, aggrticks=74947558/0, aggrin_queue=6006362, aggrutil=100.00%
  nvme3n1: ios=192511957/0, merge=0/0, ticks=76700358/0, in_queue=6215887, util=100.00%
  nvme6n1: ios=192503722/0, merge=0/0, ticks=72629807/0, in_queue=5520156, util=100.00%
  nvme2n1: ios=192518930/0, merge=0/0, ticks=74719743/0, in_queue=5979779, util=100.00%
  nvme1n1: ios=192527267/0, merge=0/0, ticks=75740325/0, in_queue=6309628, util=100.00%

with patch:

# fio --direct=1 --thread --rw=randread --ioengine=libaio --iodepth=64 --bs=4k --name=fio --filename=/dev/md0 --time_based --runtime=300 --numjobs=16 --group_reporting --norandommap --randrepeat=0
fio: (g=0): rw=randread, bs=(R) 4096B-4096B, (W) 4096B-4096B, (T) 4096B-4096B, ioengine=libaio, iodepth=64
...
fio-3.20
Starting 16 threads
Jobs: 16 (f=16): [r(16)][100.0%][r=9934MiB/s][r=2543k IOPS][eta 00m:00s]
fio: (groupid=0, jobs=16): err= 0: pid=8463: Tue Jun  2 13:32:12 2020
  read: IOPS=2550k, BW=9961MiB/s (10.4GB/s)(2918GiB/300002msec)
    slat (nsec): min=1512, max=3578.1k, avg=5145.36, stdev=2145.71
    clat (usec): min=50, max=12421, avg=396.13, stdev=210.38
     lat (usec): min=52, max=12428, avg=401.33, stdev=210.45
    clat percentiles (usec):
     |  1.00th=[  133],  5.00th=[  178], 10.00th=[  208], 20.00th=[  247],
     | 30.00th=[  281], 40.00th=[  314], 50.00th=[  347], 60.00th=[  383],
     | 70.00th=[  437], 80.00th=[  510], 90.00th=[  644], 95.00th=[  783],
     | 99.00th=[ 1156], 99.50th=[ 1369], 99.90th=[ 1991], 99.95th=[ 2311],
     | 99.99th=[ 2999]
   bw (  MiB/s): min= 9266, max=10648, per=100.00%, avg=9967.23, stdev=13.31, samples=9584
   iops        : min=2372118, max=2725915, avg=2551610.09, stdev=3407.18, samples=9584
  lat (usec)   : 100=0.13%, 250=20.62%, 500=58.25%, 750=15.25%, 1000=3.92%
  lat (msec)   : 2=1.72%, 4=0.10%, 10=0.01%, 20=0.01%
  cpu          : usr=15.97%, sys=66.59%, ctx=11235674, majf=0, minf=41238
  IO depths    : 1=0.1%, 2=0.1%, 4=0.1%, 8=0.1%, 16=0.1%, 32=0.1%, >=64=100.0%
     submit    : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
     complete  : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.1%, >=64=0.0%
     issued rwts: total=764997277,0,0,0 short=0,0,0,0 dropped=0,0,0,0
     latency   : target=0, window=0, percentile=100.00%, depth=64

Run status group 0 (all jobs):
   READ: bw=9961MiB/s (10.4GB/s), 9961MiB/s-9961MiB/s (10.4GB/s-10.4GB/s), io=2918GiB (3133GB), run=300002-300002msec

Disk stats (read/write):
    md0: ios=764702549/0, merge=0/0, ticks=242091778/0, in_queue=242091754, util=100.00%, aggrios=191249319/0, aggrmerge=0/0, aggrticks=59760064/0, aggrin_queue=2798855, aggrutil=100.00%
  nvme3n1: ios=191250967/0, merge=0/0, ticks=61633420/0, in_queue=3032943, util=100.00%
  nvme6n1: ios=191257919/0, merge=0/0, ticks=59065688/0, in_queue=2784603, util=100.00%
  nvme2n1: ios=191255129/0, merge=0/0, ticks=58520284/0, in_queue=2461116, util=100.00%
  nvme1n1: ios=191233262/0, merge=0/0, ticks=59820864/0, in_queue=2916760, util=100.00%

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] md: improve io stats accounting
  2020-06-02 11:47   ` Artur Paszkiewicz
@ 2020-06-02 17:16     ` Song Liu
  2020-06-02 17:32     ` John Stoffel
  1 sibling, 0 replies; 11+ messages in thread
From: Song Liu @ 2020-06-02 17:16 UTC (permalink / raw)
  To: Artur Paszkiewicz; +Cc: linux-raid

On Tue, Jun 2, 2020 at 4:47 AM Artur Paszkiewicz
<artur.paszkiewicz@intel.com> wrote:
>
> On 6/2/20 8:48 AM, Song Liu wrote:
> >> +               clone = bio_clone_fast(bio, GFP_NOIO, &mddev->md_io_bs);
> >
> > Handle clone == NULL?
>
> I think this should never fail - bio_alloc_bioset() guarantees that. It
> is used in a similar manner in raid1 and raid10. How about
> BUG_ON(clone == NULL)?

I misread the code. Current version is fine.

>
> > Also, have you done benchmarks with this change?
>
> I tested 4k random reads on a raid0 (4x P4510 2TB) and it was 2550k vs
> 2567k IOPS, that's slower only by about 0.66%:

Thanks for the test. I will do some more tests and process the patch
after the merge
window.

Song

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] md: improve io stats accounting
  2020-06-02 11:47   ` Artur Paszkiewicz
  2020-06-02 17:16     ` Song Liu
@ 2020-06-02 17:32     ` John Stoffel
  1 sibling, 0 replies; 11+ messages in thread
From: John Stoffel @ 2020-06-02 17:32 UTC (permalink / raw)
  To: Artur Paszkiewicz; +Cc: Song Liu, linux-raid

>>>>> "Artur" == Artur Paszkiewicz <artur.paszkiewicz@intel.com> writes:

Artur> On 6/2/20 8:48 AM, Song Liu wrote:
>>> +               clone = bio_clone_fast(bio, GFP_NOIO, &mddev->md_io_bs);
>> 
>> Handle clone == NULL?

Artur> I think this should never fail - bio_alloc_bioset() guarantees that. It
Artur> is used in a similar manner in raid1 and raid10. How about
Artur> BUG_ON(clone == NULL)?

No, use WARN_ON() instead, why would you bug the entire system for
just one logical device throwing an error?  

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] md: improve io stats accounting
  2020-06-01 16:12 [PATCH] md: improve io stats accounting Artur Paszkiewicz
                   ` (2 preceding siblings ...)
  2020-06-02  7:01 ` kbuild test robot
@ 2020-06-08 14:37 ` Guoqing Jiang
  2020-07-02  6:30   ` Song Liu
  3 siblings, 1 reply; 11+ messages in thread
From: Guoqing Jiang @ 2020-06-08 14:37 UTC (permalink / raw)
  To: Artur Paszkiewicz, song; +Cc: linux-raid

On 6/1/20 6:12 PM, Artur Paszkiewicz wrote:
> Use generic io accounting functions to manage io stats. There was an
> attempt to do this earlier in commit 18c0b223cf990172 ("md: use generic
> io stats accounting functions to simplify io stat accounting"), but it
> did not include a call to generic_end_io_acct() and caused issues with
> tracking in-flight IOs, so it was later removed in commit
> 74672d069b298b03 ("md: fix md io stats accounting broken").
>
> This patch attempts to fix this by using both generic_start_io_acct()
> and generic_end_io_acct(). To make it possible, in md_make_request() a
> bio is cloned with additional data - struct md_io, which includes the io
> start_time. A new bioset is introduced for this purpose. We call
> generic_start_io_acct() and pass the clone instead of the original to
> md_handle_request(). When it completes, we call generic_end_io_acct()
> and complete the original bio.
>
> This adds correct statistics about in-flight IOs and IO processing time,
> interpreted e.g. in iostat as await, svctm, aqu-sz and %util.
>
> It also fixes a situation where too many IOs where reported if a bio was
> re-submitted to the mddev, because io accounting is now performed only
> on newly arriving bios.
>
> Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
> ---
>   drivers/md/md.c | 65 +++++++++++++++++++++++++++++++++++++++----------
>   drivers/md/md.h |  1 +
>   2 files changed, 53 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index f567f536b529..5a9f167ef5b9 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -463,12 +463,32 @@ void md_handle_request(struct mddev *mddev, struct bio *bio)
>   }
>   EXPORT_SYMBOL(md_handle_request);
>   
> +struct md_io {
> +	struct mddev *mddev;
> +	struct bio *orig_bio;
> +	unsigned long start_time;
> +	struct bio orig_bio_clone;
> +};
> +
> +static void md_end_request(struct bio *bio)
> +{
> +	struct md_io *md_io = bio->bi_private;
> +	struct mddev *mddev = md_io->mddev;
> +	struct bio *orig_bio = md_io->orig_bio;
> +
> +	orig_bio->bi_status = bio->bi_status;
> +
> +	generic_end_io_acct(mddev->queue, bio_op(orig_bio),
> +			    &mddev->gendisk->part0, md_io->start_time);

[...]

> +		generic_start_io_acct(mddev->queue, bio_op(bio),
> +				      bio_sectors(bio), &mddev->gendisk->part0);
> +	}
> +

Now, you need to switch to call bio_{start,end}_io_acct instead of
generic_{start,end}_io_acct after the changes from Christoph.

Thanks,
Guoqing

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] md: improve io stats accounting
  2020-06-08 14:37 ` Guoqing Jiang
@ 2020-07-02  6:30   ` Song Liu
  0 siblings, 0 replies; 11+ messages in thread
From: Song Liu @ 2020-07-02  6:30 UTC (permalink / raw)
  To: Guoqing Jiang; +Cc: Artur Paszkiewicz, linux-raid

On Mon, Jun 8, 2020 at 7:37 AM Guoqing Jiang
<guoqing.jiang@cloud.ionos.com> wrote:
>
> On 6/1/20 6:12 PM, Artur Paszkiewicz wrote:
> > Use generic io accounting functions to manage io stats. There was an
> > attempt to do this earlier in commit 18c0b223cf990172 ("md: use generic
> > io stats accounting functions to simplify io stat accounting"), but it
> > did not include a call to generic_end_io_acct() and caused issues with
> > tracking in-flight IOs, so it was later removed in commit
> > 74672d069b298b03 ("md: fix md io stats accounting broken").
> >
> > This patch attempts to fix this by using both generic_start_io_acct()
> > and generic_end_io_acct(). To make it possible, in md_make_request() a
> > bio is cloned with additional data - struct md_io, which includes the io
> > start_time. A new bioset is introduced for this purpose. We call
> > generic_start_io_acct() and pass the clone instead of the original to
> > md_handle_request(). When it completes, we call generic_end_io_acct()
> > and complete the original bio.
> >
> > This adds correct statistics about in-flight IOs and IO processing time,
> > interpreted e.g. in iostat as await, svctm, aqu-sz and %util.
> >
> > It also fixes a situation where too many IOs where reported if a bio was
> > re-submitted to the mddev, because io accounting is now performed only
> > on newly arriving bios.
> >
> > Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
> > ---
> >   drivers/md/md.c | 65 +++++++++++++++++++++++++++++++++++++++----------
> >   drivers/md/md.h |  1 +
> >   2 files changed, 53 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/md/md.c b/drivers/md/md.c
> > index f567f536b529..5a9f167ef5b9 100644
> > --- a/drivers/md/md.c
> > +++ b/drivers/md/md.c
> > @@ -463,12 +463,32 @@ void md_handle_request(struct mddev *mddev, struct bio *bio)
> >   }
> >   EXPORT_SYMBOL(md_handle_request);
> >
> > +struct md_io {
> > +     struct mddev *mddev;
> > +     struct bio *orig_bio;
> > +     unsigned long start_time;
> > +     struct bio orig_bio_clone;
> > +};
> > +
> > +static void md_end_request(struct bio *bio)
> > +{
> > +     struct md_io *md_io = bio->bi_private;
> > +     struct mddev *mddev = md_io->mddev;
> > +     struct bio *orig_bio = md_io->orig_bio;
> > +
> > +     orig_bio->bi_status = bio->bi_status;
> > +
> > +     generic_end_io_acct(mddev->queue, bio_op(orig_bio),
> > +                         &mddev->gendisk->part0, md_io->start_time);
>
> [...]
>
> > +             generic_start_io_acct(mddev->queue, bio_op(bio),
> > +                                   bio_sectors(bio), &mddev->gendisk->part0);
> > +     }
> > +
>
> Now, you need to switch to call bio_{start,end}_io_acct instead of
> generic_{start,end}_io_acct after the changes from Christoph.

Thanks Guoqing!

Hi Artur,

Please rebase your change on top of md-next branch:

https://git.kernel.org/pub/scm/linux/kernel/git/song/md.git/log/?h=md-next

Also, please check the .patch file with scripts/checkpatch.pl.

Thanks,
Song

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2020-07-02  6:30 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-01 16:12 [PATCH] md: improve io stats accounting Artur Paszkiewicz
2020-06-01 22:03 ` kbuild test robot
2020-06-02  6:22   ` Song Liu
2020-06-02  6:31     ` [kbuild-all] " Rong Chen
2020-06-02  6:48 ` Song Liu
2020-06-02 11:47   ` Artur Paszkiewicz
2020-06-02 17:16     ` Song Liu
2020-06-02 17:32     ` John Stoffel
2020-06-02  7:01 ` kbuild test robot
2020-06-08 14:37 ` Guoqing Jiang
2020-07-02  6:30   ` Song Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).