linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RESEND PATCH v5 1/2] bio: limit bio max size
       [not found] <CGME20210316080104epcas1p11483035c927c2e65600ae90309334b24@epcas1p1.samsung.com>
@ 2021-03-16  7:44 ` Changheun Lee
       [not found]   ` <CGME20210316080106epcas1p3522dda95e9c97fc39b40b008bbf87c04@epcas1p3.samsung.com>
       [not found]   ` <CGME20210406014905epcas1p16830a46b7ac6af95a0e2c2c6f4c04859@epcas1p1.samsung.com>
  0 siblings, 2 replies; 20+ messages in thread
From: Changheun Lee @ 2021-03-16  7:44 UTC (permalink / raw)
  To: Johannes.Thumshirn, asml.silence, axboe, damien.lemoal, gregkh,
	hch, linux-block, linux-kernel, ming.lei, osandov, patchwork-bot,
	tj, tom.leiming
  Cc: jisoo2146.oh, junho89.kim, mj0123.lee, seunghwan.hyun,
	sookwan7.kim, woosung2.lee, yt0928.kim, Changheun Lee

bio size can grow up to 4GB when muli-page bvec is enabled.
but sometimes it would lead to inefficient behaviors.
in case of large chunk direct I/O, - 32MB chunk read in user space -
all pages for 32MB would be merged to a bio structure if the pages
physical addresses are contiguous. it makes some delay to submit
until merge complete. bio max size should be limited to a proper size.

When 32MB chunk read with direct I/O option is coming from userspace,
kernel behavior is below now in do_direct_IO() loop. it's timeline.

 | bio merge for 32MB. total 8,192 pages are merged.
 | total elapsed time is over 2ms.
 |------------------ ... ----------------------->|
                                                 | 8,192 pages merged a bio.
                                                 | at this time, first bio submit is done.
                                                 | 1 bio is split to 32 read request and issue.
                                                 |--------------->
                                                  |--------------->
                                                   |--------------->
                                                              ......
                                                                   |--------------->
                                                                    |--------------->|
                          total 19ms elapsed to complete 32MB read done from device. |

If bio max size is limited with 1MB, behavior is changed below.

 | bio merge for 1MB. 256 pages are merged for each bio.
 | total 32 bio will be made.
 | total elapsed time is over 2ms. it's same.
 | but, first bio submit timing is fast. about 100us.
 |--->|--->|--->|---> ... -->|--->|--->|--->|--->|
      | 256 pages merged a bio.
      | at this time, first bio submit is done.
      | and 1 read request is issued for 1 bio.
      |--------------->
           |--------------->
                |--------------->
                                      ......
                                                 |--------------->
                                                  |--------------->|
        total 17ms elapsed to complete 32MB read done from device. |

As a result, read request issue timing is faster if bio max size is limited.
Current kernel behavior with multipage bvec, super large bio can be created.
And it lead to delay first I/O request issue.

Signed-off-by: Changheun Lee <nanich.lee@samsung.com>
---
 block/bio.c            | 13 ++++++++++++-
 include/linux/bio.h    |  2 +-
 include/linux/blkdev.h |  3 +++
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 1f2cc1fbe283..c528e1f944c7 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -287,6 +287,17 @@ void bio_init(struct bio *bio, struct bio_vec *table,
 }
 EXPORT_SYMBOL(bio_init);
 
+unsigned int bio_max_size(struct bio *bio)
+{
+	struct request_queue *q = bio->bi_disk->queue;
+
+	if (blk_queue_limit_bio_size(q))
+		return blk_queue_get_max_sectors(q, bio_op(bio))
+			<< SECTOR_SHIFT;
+
+	return UINT_MAX;
+}
+
 /**
  * bio_reset - reinitialize a bio
  * @bio:	bio to reset
@@ -877,7 +888,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
 		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
 
 		if (page_is_mergeable(bv, page, len, off, same_page)) {
-			if (bio->bi_iter.bi_size > UINT_MAX - len) {
+			if (bio->bi_iter.bi_size > bio_max_size(bio) - len) {
 				*same_page = false;
 				return false;
 			}
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 1edda614f7ce..13b6f6562a5b 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -113,7 +113,7 @@ static inline bool bio_full(struct bio *bio, unsigned len)
 	if (bio->bi_vcnt >= bio->bi_max_vecs)
 		return true;
 
-	if (bio->bi_iter.bi_size > UINT_MAX - len)
+	if (bio->bi_iter.bi_size > bio_max_size(bio) - len)
 		return true;
 
 	return false;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f94ee3089e01..3aeab9e7e97b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -621,6 +621,7 @@ struct request_queue {
 #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
 #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
 #define QUEUE_FLAG_NOWAIT       29	/* device supports NOWAIT */
+#define QUEUE_FLAG_LIMIT_BIO_SIZE 30	/* limit bio size */
 
 #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_SAME_COMP) |		\
@@ -667,6 +668,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 #define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
 #define blk_queue_registered(q)	test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
 #define blk_queue_nowait(q)	test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
+#define blk_queue_limit_bio_size(q)	\
+	test_bit(QUEUE_FLAG_LIMIT_BIO_SIZE, &(q)->queue_flags)
 
 extern void blk_set_pm_only(struct request_queue *q);
 extern void blk_clear_pm_only(struct request_queue *q);
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [RESEND PATCH v5 2/2] bio: add limit_bio_size sysfs
       [not found]   ` <CGME20210316080106epcas1p3522dda95e9c97fc39b40b008bbf87c04@epcas1p3.samsung.com>
@ 2021-03-16  7:44     ` Changheun Lee
  2021-04-06 21:05       ` Bart Van Assche
  0 siblings, 1 reply; 20+ messages in thread
From: Changheun Lee @ 2021-03-16  7:44 UTC (permalink / raw)
  To: Johannes.Thumshirn, asml.silence, axboe, damien.lemoal, gregkh,
	hch, linux-block, linux-kernel, ming.lei, osandov, patchwork-bot,
	tj, tom.leiming
  Cc: jisoo2146.oh, junho89.kim, mj0123.lee, seunghwan.hyun,
	sookwan7.kim, woosung2.lee, yt0928.kim, Changheun Lee

Add limit_bio_size block sysfs node to limit bio size.
Queue flag QUEUE_FLAG_LIMIT_BIO_SIZE will be set if limit_bio_size is set.
And bio max size will be limited by queue max sectors via
QUEUE_FLAG_LIMIT_BIO_SIZE set.

Signed-off-by: Changheun Lee <nanich.lee@samsung.com>
---
 Documentation/ABI/testing/sysfs-block | 10 ++++++++++
 Documentation/block/queue-sysfs.rst   |  7 +++++++
 block/blk-sysfs.c                     |  3 +++
 3 files changed, 20 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index e34cdeeeb9d4..86a7b15410cf 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -316,3 +316,13 @@ Description:
 		does not complete in this time then the block driver timeout
 		handler is invoked. That timeout handler can decide to retry
 		the request, to fail it or to start a device recovery strategy.
+
+What:		/sys/block/<disk>/queue/limit_bio_size
+Date:		Feb, 2021
+Contact:	Changheun Lee <nanich.lee@samsung.com>
+Description:
+		(RW) Toggle for set/clear QUEUE_FLAG_LIMIT_BIO_SIZE queue flag.
+		Queue flag QUEUE_FLAG_LIMIT_BIO_SIZE will be set if limit_bio_size
+		is set. And bio max size will be limited by queue max sectors.
+		QUEUE_FLAG_LIMIT_BIO_SIZE will be cleared if limit_bio_size is
+		cleard. And limit of bio max size will be cleard.
diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst
index 2638d3446b79..cd371a821855 100644
--- a/Documentation/block/queue-sysfs.rst
+++ b/Documentation/block/queue-sysfs.rst
@@ -273,4 +273,11 @@ devices are described in the ZBC (Zoned Block Commands) and ZAC
 do not support zone commands, they will be treated as regular block devices
 and zoned will report "none".
 
+limit_bio_size (RW)
+-------------------
+This indicates QUEUE_FLAG_LIMIT_BIO_SIZE queue flag value. And
+QUEUE_FLAG_LIMIT_BIO_SIZE can be changed via set(1)/clear(0) this node.
+bio max size will be limited by queue max sectors via set this node. And
+limit of bio max size will be cleard via clear this node.
+
 Jens Axboe <jens.axboe@oracle.com>, February 2009
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index b513f1683af0..840d97f427e6 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -288,6 +288,7 @@ QUEUE_SYSFS_BIT_FNS(nonrot, NONROT, 1);
 QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0);
 QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0);
 QUEUE_SYSFS_BIT_FNS(stable_writes, STABLE_WRITES, 0);
+QUEUE_SYSFS_BIT_FNS(limit_bio_size, LIMIT_BIO_SIZE, 0);
 #undef QUEUE_SYSFS_BIT_FNS
 
 static ssize_t queue_zoned_show(struct request_queue *q, char *page)
@@ -615,6 +616,7 @@ QUEUE_RW_ENTRY(queue_nonrot, "rotational");
 QUEUE_RW_ENTRY(queue_iostats, "iostats");
 QUEUE_RW_ENTRY(queue_random, "add_random");
 QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes");
+QUEUE_RW_ENTRY(queue_limit_bio_size, "limit_bio_size");
 
 static struct attribute *queue_attrs[] = {
 	&queue_requests_entry.attr,
@@ -648,6 +650,7 @@ static struct attribute *queue_attrs[] = {
 	&queue_rq_affinity_entry.attr,
 	&queue_iostats_entry.attr,
 	&queue_stable_writes_entry.attr,
+	&queue_limit_bio_size_entry.attr,
 	&queue_random_entry.attr,
 	&queue_poll_entry.attr,
 	&queue_wc_entry.attr,
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [RESEND PATCH v5 1/2] bio: limit bio max size
       [not found]   ` <CGME20210406014905epcas1p16830a46b7ac6af95a0e2c2c6f4c04859@epcas1p1.samsung.com>
@ 2021-04-06  1:31     ` Changheun Lee
  2021-04-06  7:32       ` Greg KH
  0 siblings, 1 reply; 20+ messages in thread
From: Changheun Lee @ 2021-04-06  1:31 UTC (permalink / raw)
  To: Johannes.Thumshirn, asml.silence, axboe, damien.lemoal, gregkh,
	hch, linux-block, linux-kernel, ming.lei, osandov, patchwork-bot,
	tj, tom.leiming
  Cc: jisoo2146.oh, junho89.kim, mj0123.lee, seunghwan.hyun,
	sookwan7.kim, woosung2.lee, yt0928.kim

> bio size can grow up to 4GB when muli-page bvec is enabled.
> but sometimes it would lead to inefficient behaviors.
> in case of large chunk direct I/O, - 32MB chunk read in user space -
> all pages for 32MB would be merged to a bio structure if the pages
> physical addresses are contiguous. it makes some delay to submit
> until merge complete. bio max size should be limited to a proper size.
> 
> When 32MB chunk read with direct I/O option is coming from userspace,
> kernel behavior is below now in do_direct_IO() loop. it's timeline.
> 
>  | bio merge for 32MB. total 8,192 pages are merged.
>  | total elapsed time is over 2ms.
>  |------------------ ... ----------------------->|
>                                                  | 8,192 pages merged a bio.
>                                                  | at this time, first bio submit is done.
>                                                  | 1 bio is split to 32 read request and issue.
>                                                  |--------------->
>                                                   |--------------->
>                                                    |--------------->
>                                                               ......
>                                                                    |--------------->
>                                                                     |--------------->|
>                           total 19ms elapsed to complete 32MB read done from device. |
> 
> If bio max size is limited with 1MB, behavior is changed below.
> 
>  | bio merge for 1MB. 256 pages are merged for each bio.
>  | total 32 bio will be made.
>  | total elapsed time is over 2ms. it's same.
>  | but, first bio submit timing is fast. about 100us.
>  |--->|--->|--->|---> ... -->|--->|--->|--->|--->|
>       | 256 pages merged a bio.
>       | at this time, first bio submit is done.
>       | and 1 read request is issued for 1 bio.
>       |--------------->
>            |--------------->
>                 |--------------->
>                                       ......
>                                                  |--------------->
>                                                   |--------------->|
>         total 17ms elapsed to complete 32MB read done from device. |
> 
> As a result, read request issue timing is faster if bio max size is limited.
> Current kernel behavior with multipage bvec, super large bio can be created.
> And it lead to delay first I/O request issue.
> 
> Signed-off-by: Changheun Lee <nanich.lee@samsung.com>
> ---
>  block/bio.c            | 13 ++++++++++++-
>  include/linux/bio.h    |  2 +-
>  include/linux/blkdev.h |  3 +++
>  3 files changed, 16 insertions(+), 2 deletions(-)
> 
> diff --git a/block/bio.c b/block/bio.c
> index 1f2cc1fbe283..c528e1f944c7 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -287,6 +287,17 @@ void bio_init(struct bio *bio, struct bio_vec *table,
>  }
>  EXPORT_SYMBOL(bio_init);
>  
> +unsigned int bio_max_size(struct bio *bio)
> +{
> +	struct request_queue *q = bio->bi_disk->queue;
> +
> +	if (blk_queue_limit_bio_size(q))
> +		return blk_queue_get_max_sectors(q, bio_op(bio))
> +			<< SECTOR_SHIFT;
> +
> +	return UINT_MAX;
> +}
> +
>  /**
>   * bio_reset - reinitialize a bio
>   * @bio:	bio to reset
> @@ -877,7 +888,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
>  		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
>  
>  		if (page_is_mergeable(bv, page, len, off, same_page)) {
> -			if (bio->bi_iter.bi_size > UINT_MAX - len) {
> +			if (bio->bi_iter.bi_size > bio_max_size(bio) - len) {
>  				*same_page = false;
>  				return false;
>  			}
> diff --git a/include/linux/bio.h b/include/linux/bio.h
> index 1edda614f7ce..13b6f6562a5b 100644
> --- a/include/linux/bio.h
> +++ b/include/linux/bio.h
> @@ -113,7 +113,7 @@ static inline bool bio_full(struct bio *bio, unsigned len)
>  	if (bio->bi_vcnt >= bio->bi_max_vecs)
>  		return true;
>  
> -	if (bio->bi_iter.bi_size > UINT_MAX - len)
> +	if (bio->bi_iter.bi_size > bio_max_size(bio) - len)
>  		return true;
>  
>  	return false;
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index f94ee3089e01..3aeab9e7e97b 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -621,6 +621,7 @@ struct request_queue {
>  #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
>  #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
>  #define QUEUE_FLAG_NOWAIT       29	/* device supports NOWAIT */
> +#define QUEUE_FLAG_LIMIT_BIO_SIZE 30	/* limit bio size */
>  
>  #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
>  				 (1 << QUEUE_FLAG_SAME_COMP) |		\
> @@ -667,6 +668,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
>  #define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
>  #define blk_queue_registered(q)	test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
>  #define blk_queue_nowait(q)	test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
> +#define blk_queue_limit_bio_size(q)	\
> +	test_bit(QUEUE_FLAG_LIMIT_BIO_SIZE, &(q)->queue_flags)
>  
>  extern void blk_set_pm_only(struct request_queue *q);
>  extern void blk_clear_pm_only(struct request_queue *q);
> -- 
> 2.28.0
> 

Please feedback to me if more modification is needed to apply. :)

---
Changheun Lee

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RESEND PATCH v5 1/2] bio: limit bio max size
  2021-04-06  1:31     ` [RESEND PATCH v5 1/2] bio: limit bio max size Changheun Lee
@ 2021-04-06  7:32       ` Greg KH
       [not found]         ` <CGME20210407003345epcas1p21376df37d3dfe933684139d3beaf9c51@epcas1p2.samsung.com>
  0 siblings, 1 reply; 20+ messages in thread
From: Greg KH @ 2021-04-06  7:32 UTC (permalink / raw)
  To: Changheun Lee
  Cc: Johannes.Thumshirn, asml.silence, axboe, damien.lemoal, hch,
	linux-block, linux-kernel, ming.lei, osandov, patchwork-bot, tj,
	tom.leiming, jisoo2146.oh, junho89.kim, mj0123.lee,
	seunghwan.hyun, sookwan7.kim, woosung2.lee, yt0928.kim

On Tue, Apr 06, 2021 at 10:31:28AM +0900, Changheun Lee wrote:
> > bio size can grow up to 4GB when muli-page bvec is enabled.
> > but sometimes it would lead to inefficient behaviors.
> > in case of large chunk direct I/O, - 32MB chunk read in user space -
> > all pages for 32MB would be merged to a bio structure if the pages
> > physical addresses are contiguous. it makes some delay to submit
> > until merge complete. bio max size should be limited to a proper size.
> > 
> > When 32MB chunk read with direct I/O option is coming from userspace,
> > kernel behavior is below now in do_direct_IO() loop. it's timeline.
> > 
> >  | bio merge for 32MB. total 8,192 pages are merged.
> >  | total elapsed time is over 2ms.
> >  |------------------ ... ----------------------->|
> >                                                  | 8,192 pages merged a bio.
> >                                                  | at this time, first bio submit is done.
> >                                                  | 1 bio is split to 32 read request and issue.
> >                                                  |--------------->
> >                                                   |--------------->
> >                                                    |--------------->
> >                                                               ......
> >                                                                    |--------------->
> >                                                                     |--------------->|
> >                           total 19ms elapsed to complete 32MB read done from device. |
> > 
> > If bio max size is limited with 1MB, behavior is changed below.
> > 
> >  | bio merge for 1MB. 256 pages are merged for each bio.
> >  | total 32 bio will be made.
> >  | total elapsed time is over 2ms. it's same.
> >  | but, first bio submit timing is fast. about 100us.
> >  |--->|--->|--->|---> ... -->|--->|--->|--->|--->|
> >       | 256 pages merged a bio.
> >       | at this time, first bio submit is done.
> >       | and 1 read request is issued for 1 bio.
> >       |--------------->
> >            |--------------->
> >                 |--------------->
> >                                       ......
> >                                                  |--------------->
> >                                                   |--------------->|
> >         total 17ms elapsed to complete 32MB read done from device. |
> > 
> > As a result, read request issue timing is faster if bio max size is limited.
> > Current kernel behavior with multipage bvec, super large bio can be created.
> > And it lead to delay first I/O request issue.
> > 
> > Signed-off-by: Changheun Lee <nanich.lee@samsung.com>
> > ---
> >  block/bio.c            | 13 ++++++++++++-
> >  include/linux/bio.h    |  2 +-
> >  include/linux/blkdev.h |  3 +++
> >  3 files changed, 16 insertions(+), 2 deletions(-)
> > 
> > diff --git a/block/bio.c b/block/bio.c
> > index 1f2cc1fbe283..c528e1f944c7 100644
> > --- a/block/bio.c
> > +++ b/block/bio.c
> > @@ -287,6 +287,17 @@ void bio_init(struct bio *bio, struct bio_vec *table,
> >  }
> >  EXPORT_SYMBOL(bio_init);
> >  
> > +unsigned int bio_max_size(struct bio *bio)
> > +{
> > +	struct request_queue *q = bio->bi_disk->queue;
> > +
> > +	if (blk_queue_limit_bio_size(q))
> > +		return blk_queue_get_max_sectors(q, bio_op(bio))
> > +			<< SECTOR_SHIFT;
> > +
> > +	return UINT_MAX;
> > +}
> > +
> >  /**
> >   * bio_reset - reinitialize a bio
> >   * @bio:	bio to reset
> > @@ -877,7 +888,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
> >  		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
> >  
> >  		if (page_is_mergeable(bv, page, len, off, same_page)) {
> > -			if (bio->bi_iter.bi_size > UINT_MAX - len) {
> > +			if (bio->bi_iter.bi_size > bio_max_size(bio) - len) {
> >  				*same_page = false;
> >  				return false;
> >  			}
> > diff --git a/include/linux/bio.h b/include/linux/bio.h
> > index 1edda614f7ce..13b6f6562a5b 100644
> > --- a/include/linux/bio.h
> > +++ b/include/linux/bio.h
> > @@ -113,7 +113,7 @@ static inline bool bio_full(struct bio *bio, unsigned len)
> >  	if (bio->bi_vcnt >= bio->bi_max_vecs)
> >  		return true;
> >  
> > -	if (bio->bi_iter.bi_size > UINT_MAX - len)
> > +	if (bio->bi_iter.bi_size > bio_max_size(bio) - len)
> >  		return true;
> >  
> >  	return false;
> > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> > index f94ee3089e01..3aeab9e7e97b 100644
> > --- a/include/linux/blkdev.h
> > +++ b/include/linux/blkdev.h
> > @@ -621,6 +621,7 @@ struct request_queue {
> >  #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
> >  #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
> >  #define QUEUE_FLAG_NOWAIT       29	/* device supports NOWAIT */
> > +#define QUEUE_FLAG_LIMIT_BIO_SIZE 30	/* limit bio size */
> >  
> >  #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
> >  				 (1 << QUEUE_FLAG_SAME_COMP) |		\
> > @@ -667,6 +668,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
> >  #define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
> >  #define blk_queue_registered(q)	test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
> >  #define blk_queue_nowait(q)	test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
> > +#define blk_queue_limit_bio_size(q)	\
> > +	test_bit(QUEUE_FLAG_LIMIT_BIO_SIZE, &(q)->queue_flags)
> >  
> >  extern void blk_set_pm_only(struct request_queue *q);
> >  extern void blk_clear_pm_only(struct request_queue *q);
> > -- 
> > 2.28.0
> > 
> 
> Please feedback to me if more modification is needed to apply. :)

You are adding code that tests for a value to be set, yet you never set
it in this code so why is it needed at all?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RESEND PATCH v5 2/2] bio: add limit_bio_size sysfs
  2021-03-16  7:44     ` [RESEND PATCH v5 2/2] bio: add limit_bio_size sysfs Changheun Lee
@ 2021-04-06 21:05       ` Bart Van Assche
       [not found]         ` <CGME20210407013850epcas1p2d305f138c9fa1431abba1ec44a382de9@epcas1p2.samsung.com>
  0 siblings, 1 reply; 20+ messages in thread
From: Bart Van Assche @ 2021-04-06 21:05 UTC (permalink / raw)
  To: Changheun Lee, Johannes.Thumshirn, asml.silence, axboe,
	damien.lemoal, gregkh, hch, linux-block, linux-kernel, ming.lei,
	osandov, patchwork-bot, tj, tom.leiming
  Cc: jisoo2146.oh, junho89.kim, mj0123.lee, seunghwan.hyun,
	sookwan7.kim, woosung2.lee, yt0928.kim

On 3/16/21 12:44 AM, Changheun Lee wrote:
> Add limit_bio_size block sysfs node to limit bio size.
> Queue flag QUEUE_FLAG_LIMIT_BIO_SIZE will be set if limit_bio_size is set.
> And bio max size will be limited by queue max sectors via
> QUEUE_FLAG_LIMIT_BIO_SIZE set.
> 
> Signed-off-by: Changheun Lee <nanich.lee@samsung.com>
> ---
>  Documentation/ABI/testing/sysfs-block | 10 ++++++++++
>  Documentation/block/queue-sysfs.rst   |  7 +++++++
>  block/blk-sysfs.c                     |  3 +++
>  3 files changed, 20 insertions(+)
> 
> diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
> index e34cdeeeb9d4..86a7b15410cf 100644
> --- a/Documentation/ABI/testing/sysfs-block
> +++ b/Documentation/ABI/testing/sysfs-block
> @@ -316,3 +316,13 @@ Description:
>  		does not complete in this time then the block driver timeout
>  		handler is invoked. That timeout handler can decide to retry
>  		the request, to fail it or to start a device recovery strategy.
> +
> +What:		/sys/block/<disk>/queue/limit_bio_size
> +Date:		Feb, 2021
> +Contact:	Changheun Lee <nanich.lee@samsung.com>
> +Description:
> +		(RW) Toggle for set/clear QUEUE_FLAG_LIMIT_BIO_SIZE queue flag.
> +		Queue flag QUEUE_FLAG_LIMIT_BIO_SIZE will be set if limit_bio_size
> +		is set. And bio max size will be limited by queue max sectors.
> +		QUEUE_FLAG_LIMIT_BIO_SIZE will be cleared if limit_bio_size is
> +		cleard. And limit of bio max size will be cleard.
> diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst
> index 2638d3446b79..cd371a821855 100644
> --- a/Documentation/block/queue-sysfs.rst
> +++ b/Documentation/block/queue-sysfs.rst
> @@ -273,4 +273,11 @@ devices are described in the ZBC (Zoned Block Commands) and ZAC
>  do not support zone commands, they will be treated as regular block devices
>  and zoned will report "none".
>  
> +limit_bio_size (RW)
> +-------------------
> +This indicates QUEUE_FLAG_LIMIT_BIO_SIZE queue flag value. And
> +QUEUE_FLAG_LIMIT_BIO_SIZE can be changed via set(1)/clear(0) this node.
> +bio max size will be limited by queue max sectors via set this node. And
> +limit of bio max size will be cleard via clear this node.
> +
>  Jens Axboe <jens.axboe@oracle.com>, February 2009
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index b513f1683af0..840d97f427e6 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -288,6 +288,7 @@ QUEUE_SYSFS_BIT_FNS(nonrot, NONROT, 1);
>  QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0);
>  QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0);
>  QUEUE_SYSFS_BIT_FNS(stable_writes, STABLE_WRITES, 0);
> +QUEUE_SYSFS_BIT_FNS(limit_bio_size, LIMIT_BIO_SIZE, 0);
>  #undef QUEUE_SYSFS_BIT_FNS
>  
>  static ssize_t queue_zoned_show(struct request_queue *q, char *page)
> @@ -615,6 +616,7 @@ QUEUE_RW_ENTRY(queue_nonrot, "rotational");
>  QUEUE_RW_ENTRY(queue_iostats, "iostats");
>  QUEUE_RW_ENTRY(queue_random, "add_random");
>  QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes");
> +QUEUE_RW_ENTRY(queue_limit_bio_size, "limit_bio_size");
>  
>  static struct attribute *queue_attrs[] = {
>  	&queue_requests_entry.attr,
> @@ -648,6 +650,7 @@ static struct attribute *queue_attrs[] = {
>  	&queue_rq_affinity_entry.attr,
>  	&queue_iostats_entry.attr,
>  	&queue_stable_writes_entry.attr,
> +	&queue_limit_bio_size_entry.attr,
>  	&queue_random_entry.attr,
>  	&queue_poll_entry.attr,
>  	&queue_wc_entry.attr,

Has it been considered to introduce a function to set the BIO size limit
instead of introducing a new sysfs attribute? See also
blk_queue_max_hw_sectors().

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RESEND PATCH v5 1/2] bio: limit bio max size
       [not found]         ` <CGME20210407003345epcas1p21376df37d3dfe933684139d3beaf9c51@epcas1p2.samsung.com>
@ 2021-04-07  0:16           ` Changheun Lee
  2021-04-07  5:05             ` Greg KH
  0 siblings, 1 reply; 20+ messages in thread
From: Changheun Lee @ 2021-04-07  0:16 UTC (permalink / raw)
  To: gregkh
  Cc: Johannes.Thumshirn, asml.silence, axboe, damien.lemoal, hch,
	jisoo2146.oh, junho89.kim, linux-block, linux-kernel, ming.lei,
	mj0123.lee, nanich.lee, osandov, patchwork-bot, seunghwan.hyun,
	sookwan7.kim, tj, tom.leiming, woosung2.lee, yt0928.kim

> On Tue, Apr 06, 2021 at 10:31:28AM +0900, Changheun Lee wrote:
> > > bio size can grow up to 4GB when muli-page bvec is enabled.
> > > but sometimes it would lead to inefficient behaviors.
> > > in case of large chunk direct I/O, - 32MB chunk read in user space -
> > > all pages for 32MB would be merged to a bio structure if the pages
> > > physical addresses are contiguous. it makes some delay to submit
> > > until merge complete. bio max size should be limited to a proper size.
> > > 
> > > When 32MB chunk read with direct I/O option is coming from userspace,
> > > kernel behavior is below now in do_direct_IO() loop. it's timeline.
> > > 
> > >  | bio merge for 32MB. total 8,192 pages are merged.
> > >  | total elapsed time is over 2ms.
> > >  |------------------ ... ----------------------->|
> > >                                                  | 8,192 pages merged a bio.
> > >                                                  | at this time, first bio submit is done.
> > >                                                  | 1 bio is split to 32 read request and issue.
> > >                                                  |--------------->
> > >                                                   |--------------->
> > >                                                    |--------------->
> > >                                                               ......
> > >                                                                    |--------------->
> > >                                                                     |--------------->|
> > >                           total 19ms elapsed to complete 32MB read done from device. |
> > > 
> > > If bio max size is limited with 1MB, behavior is changed below.
> > > 
> > >  | bio merge for 1MB. 256 pages are merged for each bio.
> > >  | total 32 bio will be made.
> > >  | total elapsed time is over 2ms. it's same.
> > >  | but, first bio submit timing is fast. about 100us.
> > >  |--->|--->|--->|---> ... -->|--->|--->|--->|--->|
> > >       | 256 pages merged a bio.
> > >       | at this time, first bio submit is done.
> > >       | and 1 read request is issued for 1 bio.
> > >       |--------------->
> > >            |--------------->
> > >                 |--------------->
> > >                                       ......
> > >                                                  |--------------->
> > >                                                   |--------------->|
> > >         total 17ms elapsed to complete 32MB read done from device. |
> > > 
> > > As a result, read request issue timing is faster if bio max size is limited.
> > > Current kernel behavior with multipage bvec, super large bio can be created.
> > > And it lead to delay first I/O request issue.
> > > 
> > > Signed-off-by: Changheun Lee <nanich.lee@samsung.com>
> > > ---
> > >  block/bio.c            | 13 ++++++++++++-
> > >  include/linux/bio.h    |  2 +-
> > >  include/linux/blkdev.h |  3 +++
> > >  3 files changed, 16 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/block/bio.c b/block/bio.c
> > > index 1f2cc1fbe283..c528e1f944c7 100644
> > > --- a/block/bio.c
> > > +++ b/block/bio.c
> > > @@ -287,6 +287,17 @@ void bio_init(struct bio *bio, struct bio_vec *table,
> > >  }
> > >  EXPORT_SYMBOL(bio_init);
> > >  
> > > +unsigned int bio_max_size(struct bio *bio)
> > > +{
> > > +	struct request_queue *q = bio->bi_disk->queue;
> > > +
> > > +	if (blk_queue_limit_bio_size(q))
> > > +		return blk_queue_get_max_sectors(q, bio_op(bio))
> > > +			<< SECTOR_SHIFT;
> > > +
> > > +	return UINT_MAX;
> > > +}
> > > +
> > >  /**
> > >   * bio_reset - reinitialize a bio
> > >   * @bio:	bio to reset
> > > @@ -877,7 +888,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
> > >  		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
> > >  
> > >  		if (page_is_mergeable(bv, page, len, off, same_page)) {
> > > -			if (bio->bi_iter.bi_size > UINT_MAX - len) {
> > > +			if (bio->bi_iter.bi_size > bio_max_size(bio) - len) {
> > >  				*same_page = false;
> > >  				return false;
> > >  			}
> > > diff --git a/include/linux/bio.h b/include/linux/bio.h
> > > index 1edda614f7ce..13b6f6562a5b 100644
> > > --- a/include/linux/bio.h
> > > +++ b/include/linux/bio.h
> > > @@ -113,7 +113,7 @@ static inline bool bio_full(struct bio *bio, unsigned len)
> > >  	if (bio->bi_vcnt >= bio->bi_max_vecs)
> > >  		return true;
> > >  
> > > -	if (bio->bi_iter.bi_size > UINT_MAX - len)
> > > +	if (bio->bi_iter.bi_size > bio_max_size(bio) - len)
> > >  		return true;
> > >  
> > >  	return false;
> > > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> > > index f94ee3089e01..3aeab9e7e97b 100644
> > > --- a/include/linux/blkdev.h
> > > +++ b/include/linux/blkdev.h
> > > @@ -621,6 +621,7 @@ struct request_queue {
> > >  #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
> > >  #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
> > >  #define QUEUE_FLAG_NOWAIT       29	/* device supports NOWAIT */
> > > +#define QUEUE_FLAG_LIMIT_BIO_SIZE 30	/* limit bio size */
> > >  
> > >  #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
> > >  				 (1 << QUEUE_FLAG_SAME_COMP) |		\
> > > @@ -667,6 +668,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
> > >  #define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
> > >  #define blk_queue_registered(q)	test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
> > >  #define blk_queue_nowait(q)	test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
> > > +#define blk_queue_limit_bio_size(q)	\
> > > +	test_bit(QUEUE_FLAG_LIMIT_BIO_SIZE, &(q)->queue_flags)
> > >  
> > >  extern void blk_set_pm_only(struct request_queue *q);
> > >  extern void blk_clear_pm_only(struct request_queue *q);
> > > -- 
> > > 2.28.0
> > > 
> > 
> > Please feedback to me if more modification is needed to apply. :)
> 
> You are adding code that tests for a value to be set, yet you never set
> it in this code so why is it needed at all?

This patch is a solution for some inefficient case of multipage bvec like
as current DIO scenario. So it's not set as a default.
It will be set when bio size limitation is needed in runtime.

> 
> thanks,
> 
> greg k-h

---
Changheun Lee

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RESEND PATCH v5 2/2] bio: add limit_bio_size sysfs
       [not found]         ` <CGME20210407013850epcas1p2d305f138c9fa1431abba1ec44a382de9@epcas1p2.samsung.com>
@ 2021-04-07  1:21           ` Changheun Lee
  2021-04-07  5:43             ` Greg KH
  0 siblings, 1 reply; 20+ messages in thread
From: Changheun Lee @ 2021-04-07  1:21 UTC (permalink / raw)
  To: bvanassche
  Cc: Johannes.Thumshirn, asml.silence, axboe, damien.lemoal, gregkh,
	hch, jisoo2146.oh, junho89.kim, linux-block, linux-kernel,
	ming.lei, mj0123.lee, nanich.lee, osandov, patchwork-bot,
	seunghwan.hyun, sookwan7.kim, tj, tom.leiming, woosung2.lee,
	yt0928.kim

> On 3/16/21 12:44 AM, Changheun Lee wrote:
> > Add limit_bio_size block sysfs node to limit bio size.
> > Queue flag QUEUE_FLAG_LIMIT_BIO_SIZE will be set if limit_bio_size is set.
> > And bio max size will be limited by queue max sectors via
> > QUEUE_FLAG_LIMIT_BIO_SIZE set.
> > 
> > Signed-off-by: Changheun Lee <nanich.lee@samsung.com>
> > ---
> >  Documentation/ABI/testing/sysfs-block | 10 ++++++++++
> >  Documentation/block/queue-sysfs.rst   |  7 +++++++
> >  block/blk-sysfs.c                     |  3 +++
> >  3 files changed, 20 insertions(+)
> > 
> > diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
> > index e34cdeeeb9d4..86a7b15410cf 100644
> > --- a/Documentation/ABI/testing/sysfs-block
> > +++ b/Documentation/ABI/testing/sysfs-block
> > @@ -316,3 +316,13 @@ Description:
> >  		does not complete in this time then the block driver timeout
> >  		handler is invoked. That timeout handler can decide to retry
> >  		the request, to fail it or to start a device recovery strategy.
> > +
> > +What:		/sys/block/<disk>/queue/limit_bio_size
> > +Date:		Feb, 2021
> > +Contact:	Changheun Lee <nanich.lee@samsung.com>
> > +Description:
> > +		(RW) Toggle for set/clear QUEUE_FLAG_LIMIT_BIO_SIZE queue flag.
> > +		Queue flag QUEUE_FLAG_LIMIT_BIO_SIZE will be set if limit_bio_size
> > +		is set. And bio max size will be limited by queue max sectors.
> > +		QUEUE_FLAG_LIMIT_BIO_SIZE will be cleared if limit_bio_size is
> > +		cleard. And limit of bio max size will be cleard.
> > diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst
> > index 2638d3446b79..cd371a821855 100644
> > --- a/Documentation/block/queue-sysfs.rst
> > +++ b/Documentation/block/queue-sysfs.rst
> > @@ -273,4 +273,11 @@ devices are described in the ZBC (Zoned Block Commands) and ZAC
> >  do not support zone commands, they will be treated as regular block devices
> >  and zoned will report "none".
> >  
> > +limit_bio_size (RW)
> > +-------------------
> > +This indicates QUEUE_FLAG_LIMIT_BIO_SIZE queue flag value. And
> > +QUEUE_FLAG_LIMIT_BIO_SIZE can be changed via set(1)/clear(0) this node.
> > +bio max size will be limited by queue max sectors via set this node. And
> > +limit of bio max size will be cleard via clear this node.
> > +
> >  Jens Axboe <jens.axboe@oracle.com>, February 2009
> > diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> > index b513f1683af0..840d97f427e6 100644
> > --- a/block/blk-sysfs.c
> > +++ b/block/blk-sysfs.c
> > @@ -288,6 +288,7 @@ QUEUE_SYSFS_BIT_FNS(nonrot, NONROT, 1);
> >  QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0);
> >  QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0);
> >  QUEUE_SYSFS_BIT_FNS(stable_writes, STABLE_WRITES, 0);
> > +QUEUE_SYSFS_BIT_FNS(limit_bio_size, LIMIT_BIO_SIZE, 0);
> >  #undef QUEUE_SYSFS_BIT_FNS
> >  
> >  static ssize_t queue_zoned_show(struct request_queue *q, char *page)
> > @@ -615,6 +616,7 @@ QUEUE_RW_ENTRY(queue_nonrot, "rotational");
> >  QUEUE_RW_ENTRY(queue_iostats, "iostats");
> >  QUEUE_RW_ENTRY(queue_random, "add_random");
> >  QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes");
> > +QUEUE_RW_ENTRY(queue_limit_bio_size, "limit_bio_size");
> >  
> >  static struct attribute *queue_attrs[] = {
> >  	&queue_requests_entry.attr,
> > @@ -648,6 +650,7 @@ static struct attribute *queue_attrs[] = {
> >  	&queue_rq_affinity_entry.attr,
> >  	&queue_iostats_entry.attr,
> >  	&queue_stable_writes_entry.attr,
> > +	&queue_limit_bio_size_entry.attr,
> >  	&queue_random_entry.attr,
> >  	&queue_poll_entry.attr,
> >  	&queue_wc_entry.attr,
> 
> Has it been considered to introduce a function to set the BIO size limit
> instead of introducing a new sysfs attribute? See also
> blk_queue_max_hw_sectors().

A function to set has been not considered yet.
But sysfs attribute should be supported I think. Because it can be
different depending on each system environment including policy.

> 
> Thanks,
> 
> Bart.

Thanks,

Changheun Lee

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RESEND PATCH v5 1/2] bio: limit bio max size
  2021-04-07  0:16           ` Changheun Lee
@ 2021-04-07  5:05             ` Greg KH
       [not found]               ` <CGME20210407052407epcas1p1d0b5d23ac04a68760f691954e7ada3dd@epcas1p1.samsung.com>
  0 siblings, 1 reply; 20+ messages in thread
From: Greg KH @ 2021-04-07  5:05 UTC (permalink / raw)
  To: Changheun Lee
  Cc: Johannes.Thumshirn, asml.silence, axboe, damien.lemoal, hch,
	jisoo2146.oh, junho89.kim, linux-block, linux-kernel, ming.lei,
	mj0123.lee, osandov, patchwork-bot, seunghwan.hyun, sookwan7.kim,
	tj, tom.leiming, woosung2.lee, yt0928.kim

On Wed, Apr 07, 2021 at 09:16:12AM +0900, Changheun Lee wrote:
> > On Tue, Apr 06, 2021 at 10:31:28AM +0900, Changheun Lee wrote:
> > > > bio size can grow up to 4GB when muli-page bvec is enabled.
> > > > but sometimes it would lead to inefficient behaviors.
> > > > in case of large chunk direct I/O, - 32MB chunk read in user space -
> > > > all pages for 32MB would be merged to a bio structure if the pages
> > > > physical addresses are contiguous. it makes some delay to submit
> > > > until merge complete. bio max size should be limited to a proper size.
> > > > 
> > > > When 32MB chunk read with direct I/O option is coming from userspace,
> > > > kernel behavior is below now in do_direct_IO() loop. it's timeline.
> > > > 
> > > >  | bio merge for 32MB. total 8,192 pages are merged.
> > > >  | total elapsed time is over 2ms.
> > > >  |------------------ ... ----------------------->|
> > > >                                                  | 8,192 pages merged a bio.
> > > >                                                  | at this time, first bio submit is done.
> > > >                                                  | 1 bio is split to 32 read request and issue.
> > > >                                                  |--------------->
> > > >                                                   |--------------->
> > > >                                                    |--------------->
> > > >                                                               ......
> > > >                                                                    |--------------->
> > > >                                                                     |--------------->|
> > > >                           total 19ms elapsed to complete 32MB read done from device. |
> > > > 
> > > > If bio max size is limited with 1MB, behavior is changed below.
> > > > 
> > > >  | bio merge for 1MB. 256 pages are merged for each bio.
> > > >  | total 32 bio will be made.
> > > >  | total elapsed time is over 2ms. it's same.
> > > >  | but, first bio submit timing is fast. about 100us.
> > > >  |--->|--->|--->|---> ... -->|--->|--->|--->|--->|
> > > >       | 256 pages merged a bio.
> > > >       | at this time, first bio submit is done.
> > > >       | and 1 read request is issued for 1 bio.
> > > >       |--------------->
> > > >            |--------------->
> > > >                 |--------------->
> > > >                                       ......
> > > >                                                  |--------------->
> > > >                                                   |--------------->|
> > > >         total 17ms elapsed to complete 32MB read done from device. |
> > > > 
> > > > As a result, read request issue timing is faster if bio max size is limited.
> > > > Current kernel behavior with multipage bvec, super large bio can be created.
> > > > And it lead to delay first I/O request issue.
> > > > 
> > > > Signed-off-by: Changheun Lee <nanich.lee@samsung.com>
> > > > ---
> > > >  block/bio.c            | 13 ++++++++++++-
> > > >  include/linux/bio.h    |  2 +-
> > > >  include/linux/blkdev.h |  3 +++
> > > >  3 files changed, 16 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/block/bio.c b/block/bio.c
> > > > index 1f2cc1fbe283..c528e1f944c7 100644
> > > > --- a/block/bio.c
> > > > +++ b/block/bio.c
> > > > @@ -287,6 +287,17 @@ void bio_init(struct bio *bio, struct bio_vec *table,
> > > >  }
> > > >  EXPORT_SYMBOL(bio_init);
> > > >  
> > > > +unsigned int bio_max_size(struct bio *bio)
> > > > +{
> > > > +	struct request_queue *q = bio->bi_disk->queue;
> > > > +
> > > > +	if (blk_queue_limit_bio_size(q))
> > > > +		return blk_queue_get_max_sectors(q, bio_op(bio))
> > > > +			<< SECTOR_SHIFT;
> > > > +
> > > > +	return UINT_MAX;
> > > > +}
> > > > +
> > > >  /**
> > > >   * bio_reset - reinitialize a bio
> > > >   * @bio:	bio to reset
> > > > @@ -877,7 +888,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
> > > >  		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
> > > >  
> > > >  		if (page_is_mergeable(bv, page, len, off, same_page)) {
> > > > -			if (bio->bi_iter.bi_size > UINT_MAX - len) {
> > > > +			if (bio->bi_iter.bi_size > bio_max_size(bio) - len) {
> > > >  				*same_page = false;
> > > >  				return false;
> > > >  			}
> > > > diff --git a/include/linux/bio.h b/include/linux/bio.h
> > > > index 1edda614f7ce..13b6f6562a5b 100644
> > > > --- a/include/linux/bio.h
> > > > +++ b/include/linux/bio.h
> > > > @@ -113,7 +113,7 @@ static inline bool bio_full(struct bio *bio, unsigned len)
> > > >  	if (bio->bi_vcnt >= bio->bi_max_vecs)
> > > >  		return true;
> > > >  
> > > > -	if (bio->bi_iter.bi_size > UINT_MAX - len)
> > > > +	if (bio->bi_iter.bi_size > bio_max_size(bio) - len)
> > > >  		return true;
> > > >  
> > > >  	return false;
> > > > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> > > > index f94ee3089e01..3aeab9e7e97b 100644
> > > > --- a/include/linux/blkdev.h
> > > > +++ b/include/linux/blkdev.h
> > > > @@ -621,6 +621,7 @@ struct request_queue {
> > > >  #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
> > > >  #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
> > > >  #define QUEUE_FLAG_NOWAIT       29	/* device supports NOWAIT */
> > > > +#define QUEUE_FLAG_LIMIT_BIO_SIZE 30	/* limit bio size */
> > > >  
> > > >  #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
> > > >  				 (1 << QUEUE_FLAG_SAME_COMP) |		\
> > > > @@ -667,6 +668,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
> > > >  #define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
> > > >  #define blk_queue_registered(q)	test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
> > > >  #define blk_queue_nowait(q)	test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
> > > > +#define blk_queue_limit_bio_size(q)	\
> > > > +	test_bit(QUEUE_FLAG_LIMIT_BIO_SIZE, &(q)->queue_flags)
> > > >  
> > > >  extern void blk_set_pm_only(struct request_queue *q);
> > > >  extern void blk_clear_pm_only(struct request_queue *q);
> > > > -- 
> > > > 2.28.0
> > > > 
> > > 
> > > Please feedback to me if more modification is needed to apply. :)
> > 
> > You are adding code that tests for a value to be set, yet you never set
> > it in this code so why is it needed at all?
> 
> This patch is a solution for some inefficient case of multipage bvec like
> as current DIO scenario. So it's not set as a default.
> It will be set when bio size limitation is needed in runtime.

Set where?

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RESEND PATCH v5 1/2] bio: limit bio max size
       [not found]               ` <CGME20210407052407epcas1p1d0b5d23ac04a68760f691954e7ada3dd@epcas1p1.samsung.com>
@ 2021-04-07  5:06                 ` Changheun Lee
  2021-04-07  5:36                   ` Greg KH
  0 siblings, 1 reply; 20+ messages in thread
From: Changheun Lee @ 2021-04-07  5:06 UTC (permalink / raw)
  To: gregkh
  Cc: Johannes.Thumshirn, asml.silence, axboe, damien.lemoal, hch,
	jisoo2146.oh, junho89.kim, linux-block, linux-kernel, ming.lei,
	mj0123.lee, nanich.lee, osandov, patchwork-bot, seunghwan.hyun,
	sookwan7.kim, tj, tom.leiming, woosung2.lee, yt0928.kim

> On Wed, Apr 07, 2021 at 09:16:12AM +0900, Changheun Lee wrote:
> > > On Tue, Apr 06, 2021 at 10:31:28AM +0900, Changheun Lee wrote:
> > > > > bio size can grow up to 4GB when muli-page bvec is enabled.
> > > > > but sometimes it would lead to inefficient behaviors.
> > > > > in case of large chunk direct I/O, - 32MB chunk read in user space -
> > > > > all pages for 32MB would be merged to a bio structure if the pages
> > > > > physical addresses are contiguous. it makes some delay to submit
> > > > > until merge complete. bio max size should be limited to a proper size.
> > > > > 
> > > > > When 32MB chunk read with direct I/O option is coming from userspace,
> > > > > kernel behavior is below now in do_direct_IO() loop. it's timeline.
> > > > > 
> > > > >  | bio merge for 32MB. total 8,192 pages are merged.
> > > > >  | total elapsed time is over 2ms.
> > > > >  |------------------ ... ----------------------->|
> > > > >                                                  | 8,192 pages merged a bio.
> > > > >                                                  | at this time, first bio submit is done.
> > > > >                                                  | 1 bio is split to 32 read request and issue.
> > > > >                                                  |--------------->
> > > > >                                                   |--------------->
> > > > >                                                    |--------------->
> > > > >                                                               ......
> > > > >                                                                    |--------------->
> > > > >                                                                     |--------------->|
> > > > >                           total 19ms elapsed to complete 32MB read done from device. |
> > > > > 
> > > > > If bio max size is limited with 1MB, behavior is changed below.
> > > > > 
> > > > >  | bio merge for 1MB. 256 pages are merged for each bio.
> > > > >  | total 32 bio will be made.
> > > > >  | total elapsed time is over 2ms. it's same.
> > > > >  | but, first bio submit timing is fast. about 100us.
> > > > >  |--->|--->|--->|---> ... -->|--->|--->|--->|--->|
> > > > >       | 256 pages merged a bio.
> > > > >       | at this time, first bio submit is done.
> > > > >       | and 1 read request is issued for 1 bio.
> > > > >       |--------------->
> > > > >            |--------------->
> > > > >                 |--------------->
> > > > >                                       ......
> > > > >                                                  |--------------->
> > > > >                                                   |--------------->|
> > > > >         total 17ms elapsed to complete 32MB read done from device. |
> > > > > 
> > > > > As a result, read request issue timing is faster if bio max size is limited.
> > > > > Current kernel behavior with multipage bvec, super large bio can be created.
> > > > > And it lead to delay first I/O request issue.
> > > > > 
> > > > > Signed-off-by: Changheun Lee <nanich.lee@samsung.com>
> > > > > ---
> > > > >  block/bio.c            | 13 ++++++++++++-
> > > > >  include/linux/bio.h    |  2 +-
> > > > >  include/linux/blkdev.h |  3 +++
> > > > >  3 files changed, 16 insertions(+), 2 deletions(-)
> > > > > 
> > > > > diff --git a/block/bio.c b/block/bio.c
> > > > > index 1f2cc1fbe283..c528e1f944c7 100644
> > > > > --- a/block/bio.c
> > > > > +++ b/block/bio.c
> > > > > @@ -287,6 +287,17 @@ void bio_init(struct bio *bio, struct bio_vec *table,
> > > > >  }
> > > > >  EXPORT_SYMBOL(bio_init);
> > > > >  
> > > > > +unsigned int bio_max_size(struct bio *bio)
> > > > > +{
> > > > > +	struct request_queue *q = bio->bi_disk->queue;
> > > > > +
> > > > > +	if (blk_queue_limit_bio_size(q))
> > > > > +		return blk_queue_get_max_sectors(q, bio_op(bio))
> > > > > +			<< SECTOR_SHIFT;
> > > > > +
> > > > > +	return UINT_MAX;
> > > > > +}
> > > > > +
> > > > >  /**
> > > > >   * bio_reset - reinitialize a bio
> > > > >   * @bio:	bio to reset
> > > > > @@ -877,7 +888,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
> > > > >  		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
> > > > >  
> > > > >  		if (page_is_mergeable(bv, page, len, off, same_page)) {
> > > > > -			if (bio->bi_iter.bi_size > UINT_MAX - len) {
> > > > > +			if (bio->bi_iter.bi_size > bio_max_size(bio) - len) {
> > > > >  				*same_page = false;
> > > > >  				return false;
> > > > >  			}
> > > > > diff --git a/include/linux/bio.h b/include/linux/bio.h
> > > > > index 1edda614f7ce..13b6f6562a5b 100644
> > > > > --- a/include/linux/bio.h
> > > > > +++ b/include/linux/bio.h
> > > > > @@ -113,7 +113,7 @@ static inline bool bio_full(struct bio *bio, unsigned len)
> > > > >  	if (bio->bi_vcnt >= bio->bi_max_vecs)
> > > > >  		return true;
> > > > >  
> > > > > -	if (bio->bi_iter.bi_size > UINT_MAX - len)
> > > > > +	if (bio->bi_iter.bi_size > bio_max_size(bio) - len)
> > > > >  		return true;
> > > > >  
> > > > >  	return false;
> > > > > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> > > > > index f94ee3089e01..3aeab9e7e97b 100644
> > > > > --- a/include/linux/blkdev.h
> > > > > +++ b/include/linux/blkdev.h
> > > > > @@ -621,6 +621,7 @@ struct request_queue {
> > > > >  #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
> > > > >  #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
> > > > >  #define QUEUE_FLAG_NOWAIT       29	/* device supports NOWAIT */
> > > > > +#define QUEUE_FLAG_LIMIT_BIO_SIZE 30	/* limit bio size */
> > > > >  
> > > > >  #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
> > > > >  				 (1 << QUEUE_FLAG_SAME_COMP) |		\
> > > > > @@ -667,6 +668,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
> > > > >  #define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
> > > > >  #define blk_queue_registered(q)	test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
> > > > >  #define blk_queue_nowait(q)	test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
> > > > > +#define blk_queue_limit_bio_size(q)	\
> > > > > +	test_bit(QUEUE_FLAG_LIMIT_BIO_SIZE, &(q)->queue_flags)
> > > > >  
> > > > >  extern void blk_set_pm_only(struct request_queue *q);
> > > > >  extern void blk_clear_pm_only(struct request_queue *q);
> > > > > -- 
> > > > > 2.28.0
> > > > > 
> > > > 
> > > > Please feedback to me if more modification is needed to apply. :)
> > > 
> > > You are adding code that tests for a value to be set, yet you never set
> > > it in this code so why is it needed at all?
> > 
> > This patch is a solution for some inefficient case of multipage bvec like
> > as current DIO scenario. So it's not set as a default.
> > It will be set when bio size limitation is needed in runtime.
> 
> Set where?

In my environment, set it on init.rc file like as below.
"echo 1 > /sys/block/sda/queue/limit_bio_size"


Thanks,

Changheun Lee

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RESEND PATCH v5 1/2] bio: limit bio max size
  2021-04-07  5:06                 ` Changheun Lee
@ 2021-04-07  5:36                   ` Greg KH
       [not found]                     ` <CGME20210407071241epcas1p2559ea674299b686c9001326894c933bc@epcas1p2.samsung.com>
  0 siblings, 1 reply; 20+ messages in thread
From: Greg KH @ 2021-04-07  5:36 UTC (permalink / raw)
  To: Changheun Lee
  Cc: Johannes.Thumshirn, asml.silence, axboe, damien.lemoal, hch,
	jisoo2146.oh, junho89.kim, linux-block, linux-kernel, ming.lei,
	mj0123.lee, osandov, patchwork-bot, seunghwan.hyun, sookwan7.kim,
	tj, tom.leiming, woosung2.lee, yt0928.kim

On Wed, Apr 07, 2021 at 02:06:33PM +0900, Changheun Lee wrote:
> > On Wed, Apr 07, 2021 at 09:16:12AM +0900, Changheun Lee wrote:
> > > > On Tue, Apr 06, 2021 at 10:31:28AM +0900, Changheun Lee wrote:
> > > > > > bio size can grow up to 4GB when muli-page bvec is enabled.
> > > > > > but sometimes it would lead to inefficient behaviors.
> > > > > > in case of large chunk direct I/O, - 32MB chunk read in user space -
> > > > > > all pages for 32MB would be merged to a bio structure if the pages
> > > > > > physical addresses are contiguous. it makes some delay to submit
> > > > > > until merge complete. bio max size should be limited to a proper size.
> > > > > > 
> > > > > > When 32MB chunk read with direct I/O option is coming from userspace,
> > > > > > kernel behavior is below now in do_direct_IO() loop. it's timeline.
> > > > > > 
> > > > > >  | bio merge for 32MB. total 8,192 pages are merged.
> > > > > >  | total elapsed time is over 2ms.
> > > > > >  |------------------ ... ----------------------->|
> > > > > >                                                  | 8,192 pages merged a bio.
> > > > > >                                                  | at this time, first bio submit is done.
> > > > > >                                                  | 1 bio is split to 32 read request and issue.
> > > > > >                                                  |--------------->
> > > > > >                                                   |--------------->
> > > > > >                                                    |--------------->
> > > > > >                                                               ......
> > > > > >                                                                    |--------------->
> > > > > >                                                                     |--------------->|
> > > > > >                           total 19ms elapsed to complete 32MB read done from device. |
> > > > > > 
> > > > > > If bio max size is limited with 1MB, behavior is changed below.
> > > > > > 
> > > > > >  | bio merge for 1MB. 256 pages are merged for each bio.
> > > > > >  | total 32 bio will be made.
> > > > > >  | total elapsed time is over 2ms. it's same.
> > > > > >  | but, first bio submit timing is fast. about 100us.
> > > > > >  |--->|--->|--->|---> ... -->|--->|--->|--->|--->|
> > > > > >       | 256 pages merged a bio.
> > > > > >       | at this time, first bio submit is done.
> > > > > >       | and 1 read request is issued for 1 bio.
> > > > > >       |--------------->
> > > > > >            |--------------->
> > > > > >                 |--------------->
> > > > > >                                       ......
> > > > > >                                                  |--------------->
> > > > > >                                                   |--------------->|
> > > > > >         total 17ms elapsed to complete 32MB read done from device. |
> > > > > > 
> > > > > > As a result, read request issue timing is faster if bio max size is limited.
> > > > > > Current kernel behavior with multipage bvec, super large bio can be created.
> > > > > > And it lead to delay first I/O request issue.
> > > > > > 
> > > > > > Signed-off-by: Changheun Lee <nanich.lee@samsung.com>
> > > > > > ---
> > > > > >  block/bio.c            | 13 ++++++++++++-
> > > > > >  include/linux/bio.h    |  2 +-
> > > > > >  include/linux/blkdev.h |  3 +++
> > > > > >  3 files changed, 16 insertions(+), 2 deletions(-)
> > > > > > 
> > > > > > diff --git a/block/bio.c b/block/bio.c
> > > > > > index 1f2cc1fbe283..c528e1f944c7 100644
> > > > > > --- a/block/bio.c
> > > > > > +++ b/block/bio.c
> > > > > > @@ -287,6 +287,17 @@ void bio_init(struct bio *bio, struct bio_vec *table,
> > > > > >  }
> > > > > >  EXPORT_SYMBOL(bio_init);
> > > > > >  
> > > > > > +unsigned int bio_max_size(struct bio *bio)
> > > > > > +{
> > > > > > +	struct request_queue *q = bio->bi_disk->queue;
> > > > > > +
> > > > > > +	if (blk_queue_limit_bio_size(q))
> > > > > > +		return blk_queue_get_max_sectors(q, bio_op(bio))
> > > > > > +			<< SECTOR_SHIFT;
> > > > > > +
> > > > > > +	return UINT_MAX;
> > > > > > +}
> > > > > > +
> > > > > >  /**
> > > > > >   * bio_reset - reinitialize a bio
> > > > > >   * @bio:	bio to reset
> > > > > > @@ -877,7 +888,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
> > > > > >  		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
> > > > > >  
> > > > > >  		if (page_is_mergeable(bv, page, len, off, same_page)) {
> > > > > > -			if (bio->bi_iter.bi_size > UINT_MAX - len) {
> > > > > > +			if (bio->bi_iter.bi_size > bio_max_size(bio) - len) {
> > > > > >  				*same_page = false;
> > > > > >  				return false;
> > > > > >  			}
> > > > > > diff --git a/include/linux/bio.h b/include/linux/bio.h
> > > > > > index 1edda614f7ce..13b6f6562a5b 100644
> > > > > > --- a/include/linux/bio.h
> > > > > > +++ b/include/linux/bio.h
> > > > > > @@ -113,7 +113,7 @@ static inline bool bio_full(struct bio *bio, unsigned len)
> > > > > >  	if (bio->bi_vcnt >= bio->bi_max_vecs)
> > > > > >  		return true;
> > > > > >  
> > > > > > -	if (bio->bi_iter.bi_size > UINT_MAX - len)
> > > > > > +	if (bio->bi_iter.bi_size > bio_max_size(bio) - len)
> > > > > >  		return true;
> > > > > >  
> > > > > >  	return false;
> > > > > > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> > > > > > index f94ee3089e01..3aeab9e7e97b 100644
> > > > > > --- a/include/linux/blkdev.h
> > > > > > +++ b/include/linux/blkdev.h
> > > > > > @@ -621,6 +621,7 @@ struct request_queue {
> > > > > >  #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
> > > > > >  #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
> > > > > >  #define QUEUE_FLAG_NOWAIT       29	/* device supports NOWAIT */
> > > > > > +#define QUEUE_FLAG_LIMIT_BIO_SIZE 30	/* limit bio size */
> > > > > >  
> > > > > >  #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
> > > > > >  				 (1 << QUEUE_FLAG_SAME_COMP) |		\
> > > > > > @@ -667,6 +668,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
> > > > > >  #define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
> > > > > >  #define blk_queue_registered(q)	test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
> > > > > >  #define blk_queue_nowait(q)	test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
> > > > > > +#define blk_queue_limit_bio_size(q)	\
> > > > > > +	test_bit(QUEUE_FLAG_LIMIT_BIO_SIZE, &(q)->queue_flags)
> > > > > >  
> > > > > >  extern void blk_set_pm_only(struct request_queue *q);
> > > > > >  extern void blk_clear_pm_only(struct request_queue *q);
> > > > > > -- 
> > > > > > 2.28.0
> > > > > > 
> > > > > 
> > > > > Please feedback to me if more modification is needed to apply. :)
> > > > 
> > > > You are adding code that tests for a value to be set, yet you never set
> > > > it in this code so why is it needed at all?
> > > 
> > > This patch is a solution for some inefficient case of multipage bvec like
> > > as current DIO scenario. So it's not set as a default.
> > > It will be set when bio size limitation is needed in runtime.
> > 
> > Set where?
> 
> In my environment, set it on init.rc file like as below.
> "echo 1 > /sys/block/sda/queue/limit_bio_size"

I do not see any sysfs file in this patch, and why would you ever want
to be forced to manually do this?  The hardware should know the limits
itself, and should automatically tune things like this, do not force a
user to do it as that's just not going to go well at all.

So if this patch series is forcing a new option to be configured by
sysfs only, that's not acceptable, sorry.

greg k-h

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RESEND PATCH v5 2/2] bio: add limit_bio_size sysfs
  2021-04-07  1:21           ` Changheun Lee
@ 2021-04-07  5:43             ` Greg KH
  0 siblings, 0 replies; 20+ messages in thread
From: Greg KH @ 2021-04-07  5:43 UTC (permalink / raw)
  To: Changheun Lee
  Cc: bvanassche, Johannes.Thumshirn, asml.silence, axboe,
	damien.lemoal, hch, jisoo2146.oh, junho89.kim, linux-block,
	linux-kernel, ming.lei, mj0123.lee, osandov, patchwork-bot,
	seunghwan.hyun, sookwan7.kim, tj, tom.leiming, woosung2.lee,
	yt0928.kim

On Wed, Apr 07, 2021 at 10:21:17AM +0900, Changheun Lee wrote:
> > On 3/16/21 12:44 AM, Changheun Lee wrote:
> > > Add limit_bio_size block sysfs node to limit bio size.
> > > Queue flag QUEUE_FLAG_LIMIT_BIO_SIZE will be set if limit_bio_size is set.
> > > And bio max size will be limited by queue max sectors via
> > > QUEUE_FLAG_LIMIT_BIO_SIZE set.
> > > 
> > > Signed-off-by: Changheun Lee <nanich.lee@samsung.com>
> > > ---
> > >  Documentation/ABI/testing/sysfs-block | 10 ++++++++++
> > >  Documentation/block/queue-sysfs.rst   |  7 +++++++
> > >  block/blk-sysfs.c                     |  3 +++
> > >  3 files changed, 20 insertions(+)
> > > 
> > > diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
> > > index e34cdeeeb9d4..86a7b15410cf 100644
> > > --- a/Documentation/ABI/testing/sysfs-block
> > > +++ b/Documentation/ABI/testing/sysfs-block
> > > @@ -316,3 +316,13 @@ Description:
> > >  		does not complete in this time then the block driver timeout
> > >  		handler is invoked. That timeout handler can decide to retry
> > >  		the request, to fail it or to start a device recovery strategy.
> > > +
> > > +What:		/sys/block/<disk>/queue/limit_bio_size
> > > +Date:		Feb, 2021
> > > +Contact:	Changheun Lee <nanich.lee@samsung.com>
> > > +Description:
> > > +		(RW) Toggle for set/clear QUEUE_FLAG_LIMIT_BIO_SIZE queue flag.
> > > +		Queue flag QUEUE_FLAG_LIMIT_BIO_SIZE will be set if limit_bio_size
> > > +		is set. And bio max size will be limited by queue max sectors.
> > > +		QUEUE_FLAG_LIMIT_BIO_SIZE will be cleared if limit_bio_size is
> > > +		cleard. And limit of bio max size will be cleard.
> > > diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst
> > > index 2638d3446b79..cd371a821855 100644
> > > --- a/Documentation/block/queue-sysfs.rst
> > > +++ b/Documentation/block/queue-sysfs.rst
> > > @@ -273,4 +273,11 @@ devices are described in the ZBC (Zoned Block Commands) and ZAC
> > >  do not support zone commands, they will be treated as regular block devices
> > >  and zoned will report "none".
> > >  
> > > +limit_bio_size (RW)
> > > +-------------------
> > > +This indicates QUEUE_FLAG_LIMIT_BIO_SIZE queue flag value. And
> > > +QUEUE_FLAG_LIMIT_BIO_SIZE can be changed via set(1)/clear(0) this node.
> > > +bio max size will be limited by queue max sectors via set this node. And
> > > +limit of bio max size will be cleard via clear this node.
> > > +
> > >  Jens Axboe <jens.axboe@oracle.com>, February 2009
> > > diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> > > index b513f1683af0..840d97f427e6 100644
> > > --- a/block/blk-sysfs.c
> > > +++ b/block/blk-sysfs.c
> > > @@ -288,6 +288,7 @@ QUEUE_SYSFS_BIT_FNS(nonrot, NONROT, 1);
> > >  QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0);
> > >  QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0);
> > >  QUEUE_SYSFS_BIT_FNS(stable_writes, STABLE_WRITES, 0);
> > > +QUEUE_SYSFS_BIT_FNS(limit_bio_size, LIMIT_BIO_SIZE, 0);
> > >  #undef QUEUE_SYSFS_BIT_FNS
> > >  
> > >  static ssize_t queue_zoned_show(struct request_queue *q, char *page)
> > > @@ -615,6 +616,7 @@ QUEUE_RW_ENTRY(queue_nonrot, "rotational");
> > >  QUEUE_RW_ENTRY(queue_iostats, "iostats");
> > >  QUEUE_RW_ENTRY(queue_random, "add_random");
> > >  QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes");
> > > +QUEUE_RW_ENTRY(queue_limit_bio_size, "limit_bio_size");
> > >  
> > >  static struct attribute *queue_attrs[] = {
> > >  	&queue_requests_entry.attr,
> > > @@ -648,6 +650,7 @@ static struct attribute *queue_attrs[] = {
> > >  	&queue_rq_affinity_entry.attr,
> > >  	&queue_iostats_entry.attr,
> > >  	&queue_stable_writes_entry.attr,
> > > +	&queue_limit_bio_size_entry.attr,
> > >  	&queue_random_entry.attr,
> > >  	&queue_poll_entry.attr,
> > >  	&queue_wc_entry.attr,
> > 
> > Has it been considered to introduce a function to set the BIO size limit
> > instead of introducing a new sysfs attribute? See also
> > blk_queue_max_hw_sectors().
> 
> A function to set has been not considered yet.
> But sysfs attribute should be supported I think. Because it can be
> different depending on each system environment including policy.

But what tool is now responsible for setting this new value?  Where will
it get that information from?  And why can't the kernel just
automatically set this correctly in the first place without any need for
userspace interaction?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RESEND PATCH v5 1/2] bio: limit bio max size
       [not found]                     ` <CGME20210407071241epcas1p2559ea674299b686c9001326894c933bc@epcas1p2.samsung.com>
@ 2021-04-07  6:55                       ` Changheun Lee
  2021-04-07  7:40                         ` Greg KH
  0 siblings, 1 reply; 20+ messages in thread
From: Changheun Lee @ 2021-04-07  6:55 UTC (permalink / raw)
  To: gregkh
  Cc: Johannes.Thumshirn, asml.silence, axboe, damien.lemoal, hch,
	jisoo2146.oh, junho89.kim, linux-block, linux-kernel, ming.lei,
	mj0123.lee, nanich.lee, osandov, patchwork-bot, seunghwan.hyun,
	sookwan7.kim, tj, tom.leiming, woosung2.lee, yt0928.kim

> On Wed, Apr 07, 2021 at 02:06:33PM +0900, Changheun Lee wrote:
> > > On Wed, Apr 07, 2021 at 09:16:12AM +0900, Changheun Lee wrote:
> > > > > On Tue, Apr 06, 2021 at 10:31:28AM +0900, Changheun Lee wrote:
> > > > > > > bio size can grow up to 4GB when muli-page bvec is enabled.
> > > > > > > but sometimes it would lead to inefficient behaviors.
> > > > > > > in case of large chunk direct I/O, - 32MB chunk read in user space -
> > > > > > > all pages for 32MB would be merged to a bio structure if the pages
> > > > > > > physical addresses are contiguous. it makes some delay to submit
> > > > > > > until merge complete. bio max size should be limited to a proper size.
> > > > > > > 
> > > > > > > When 32MB chunk read with direct I/O option is coming from userspace,
> > > > > > > kernel behavior is below now in do_direct_IO() loop. it's timeline.
> > > > > > > 
> > > > > > >  | bio merge for 32MB. total 8,192 pages are merged.
> > > > > > >  | total elapsed time is over 2ms.
> > > > > > >  |------------------ ... ----------------------->|
> > > > > > >                                                  | 8,192 pages merged a bio.
> > > > > > >                                                  | at this time, first bio submit is done.
> > > > > > >                                                  | 1 bio is split to 32 read request and issue.
> > > > > > >                                                  |--------------->
> > > > > > >                                                   |--------------->
> > > > > > >                                                    |--------------->
> > > > > > >                                                               ......
> > > > > > >                                                                    |--------------->
> > > > > > >                                                                     |--------------->|
> > > > > > >                           total 19ms elapsed to complete 32MB read done from device. |
> > > > > > > 
> > > > > > > If bio max size is limited with 1MB, behavior is changed below.
> > > > > > > 
> > > > > > >  | bio merge for 1MB. 256 pages are merged for each bio.
> > > > > > >  | total 32 bio will be made.
> > > > > > >  | total elapsed time is over 2ms. it's same.
> > > > > > >  | but, first bio submit timing is fast. about 100us.
> > > > > > >  |--->|--->|--->|---> ... -->|--->|--->|--->|--->|
> > > > > > >       | 256 pages merged a bio.
> > > > > > >       | at this time, first bio submit is done.
> > > > > > >       | and 1 read request is issued for 1 bio.
> > > > > > >       |--------------->
> > > > > > >            |--------------->
> > > > > > >                 |--------------->
> > > > > > >                                       ......
> > > > > > >                                                  |--------------->
> > > > > > >                                                   |--------------->|
> > > > > > >         total 17ms elapsed to complete 32MB read done from device. |
> > > > > > > 
> > > > > > > As a result, read request issue timing is faster if bio max size is limited.
> > > > > > > Current kernel behavior with multipage bvec, super large bio can be created.
> > > > > > > And it lead to delay first I/O request issue.
> > > > > > > 
> > > > > > > Signed-off-by: Changheun Lee <nanich.lee@samsung.com>
> > > > > > > ---
> > > > > > >  block/bio.c            | 13 ++++++++++++-
> > > > > > >  include/linux/bio.h    |  2 +-
> > > > > > >  include/linux/blkdev.h |  3 +++
> > > > > > >  3 files changed, 16 insertions(+), 2 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/block/bio.c b/block/bio.c
> > > > > > > index 1f2cc1fbe283..c528e1f944c7 100644
> > > > > > > --- a/block/bio.c
> > > > > > > +++ b/block/bio.c
> > > > > > > @@ -287,6 +287,17 @@ void bio_init(struct bio *bio, struct bio_vec *table,
> > > > > > >  }
> > > > > > >  EXPORT_SYMBOL(bio_init);
> > > > > > >  
> > > > > > > +unsigned int bio_max_size(struct bio *bio)
> > > > > > > +{
> > > > > > > +	struct request_queue *q = bio->bi_disk->queue;
> > > > > > > +
> > > > > > > +	if (blk_queue_limit_bio_size(q))
> > > > > > > +		return blk_queue_get_max_sectors(q, bio_op(bio))
> > > > > > > +			<< SECTOR_SHIFT;
> > > > > > > +
> > > > > > > +	return UINT_MAX;
> > > > > > > +}
> > > > > > > +
> > > > > > >  /**
> > > > > > >   * bio_reset - reinitialize a bio
> > > > > > >   * @bio:	bio to reset
> > > > > > > @@ -877,7 +888,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
> > > > > > >  		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
> > > > > > >  
> > > > > > >  		if (page_is_mergeable(bv, page, len, off, same_page)) {
> > > > > > > -			if (bio->bi_iter.bi_size > UINT_MAX - len) {
> > > > > > > +			if (bio->bi_iter.bi_size > bio_max_size(bio) - len) {
> > > > > > >  				*same_page = false;
> > > > > > >  				return false;
> > > > > > >  			}
> > > > > > > diff --git a/include/linux/bio.h b/include/linux/bio.h
> > > > > > > index 1edda614f7ce..13b6f6562a5b 100644
> > > > > > > --- a/include/linux/bio.h
> > > > > > > +++ b/include/linux/bio.h
> > > > > > > @@ -113,7 +113,7 @@ static inline bool bio_full(struct bio *bio, unsigned len)
> > > > > > >  	if (bio->bi_vcnt >= bio->bi_max_vecs)
> > > > > > >  		return true;
> > > > > > >  
> > > > > > > -	if (bio->bi_iter.bi_size > UINT_MAX - len)
> > > > > > > +	if (bio->bi_iter.bi_size > bio_max_size(bio) - len)
> > > > > > >  		return true;
> > > > > > >  
> > > > > > >  	return false;
> > > > > > > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> > > > > > > index f94ee3089e01..3aeab9e7e97b 100644
> > > > > > > --- a/include/linux/blkdev.h
> > > > > > > +++ b/include/linux/blkdev.h
> > > > > > > @@ -621,6 +621,7 @@ struct request_queue {
> > > > > > >  #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
> > > > > > >  #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
> > > > > > >  #define QUEUE_FLAG_NOWAIT       29	/* device supports NOWAIT */
> > > > > > > +#define QUEUE_FLAG_LIMIT_BIO_SIZE 30	/* limit bio size */
> > > > > > >  
> > > > > > >  #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
> > > > > > >  				 (1 << QUEUE_FLAG_SAME_COMP) |		\
> > > > > > > @@ -667,6 +668,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
> > > > > > >  #define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
> > > > > > >  #define blk_queue_registered(q)	test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
> > > > > > >  #define blk_queue_nowait(q)	test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
> > > > > > > +#define blk_queue_limit_bio_size(q)	\
> > > > > > > +	test_bit(QUEUE_FLAG_LIMIT_BIO_SIZE, &(q)->queue_flags)
> > > > > > >  
> > > > > > >  extern void blk_set_pm_only(struct request_queue *q);
> > > > > > >  extern void blk_clear_pm_only(struct request_queue *q);
> > > > > > > -- 
> > > > > > > 2.28.0
> > > > > > > 
> > > > > > 
> > > > > > Please feedback to me if more modification is needed to apply. :)
> > > > > 
> > > > > You are adding code that tests for a value to be set, yet you never set
> > > > > it in this code so why is it needed at all?
> > > > 
> > > > This patch is a solution for some inefficient case of multipage bvec like
> > > > as current DIO scenario. So it's not set as a default.
> > > > It will be set when bio size limitation is needed in runtime.
> > > 
> > > Set where?
> > 
> > In my environment, set it on init.rc file like as below.
> > "echo 1 > /sys/block/sda/queue/limit_bio_size"
> 
> I do not see any sysfs file in this patch, and why would you ever want
> to be forced to manually do this?  The hardware should know the limits
> itself, and should automatically tune things like this, do not force a
> user to do it as that's just not going to go well at all.

Patch for sysfs is sent "[RESEND,v5,2/2] bio: add limit_bio_size sysfs".
Actually I just suggested constant - 1MB - value to limit bio size at first.
But I got a feedback that patch will be better if it's optional, and
getting meaningful value from device queue on patchwork.
There are some differences for each system environment I think.

But there are inefficient logic obviously by applying of multipage bvec.
So it will be shown in several system environment.
Currently providing this patch as a option would be better to select
according to each system environment, and policy I think.

Please, revisit applying this patch.

> 
> So if this patch series is forcing a new option to be configured by
> sysfs only, that's not acceptable, sorry.

If it is not acceptable ever with current, may I progress review again
with default enabled?

> 
> greg k-h

Thanks,

Changheun Lee

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RESEND PATCH v5 1/2] bio: limit bio max size
  2021-04-07  6:55                       ` Changheun Lee
@ 2021-04-07  7:40                         ` Greg KH
       [not found]                           ` <CGME20210407094610epcas1p472207e8d3ca0e5e697974c993a2a34f7@epcas1p4.samsung.com>
  0 siblings, 1 reply; 20+ messages in thread
From: Greg KH @ 2021-04-07  7:40 UTC (permalink / raw)
  To: Changheun Lee
  Cc: Johannes.Thumshirn, asml.silence, axboe, damien.lemoal, hch,
	jisoo2146.oh, junho89.kim, linux-block, linux-kernel, ming.lei,
	mj0123.lee, osandov, patchwork-bot, seunghwan.hyun, sookwan7.kim,
	tj, tom.leiming, woosung2.lee, yt0928.kim

On Wed, Apr 07, 2021 at 03:55:07PM +0900, Changheun Lee wrote:
> > On Wed, Apr 07, 2021 at 02:06:33PM +0900, Changheun Lee wrote:
> > > > On Wed, Apr 07, 2021 at 09:16:12AM +0900, Changheun Lee wrote:
> > > > > > On Tue, Apr 06, 2021 at 10:31:28AM +0900, Changheun Lee wrote:
> > > > > > > > bio size can grow up to 4GB when muli-page bvec is enabled.
> > > > > > > > but sometimes it would lead to inefficient behaviors.
> > > > > > > > in case of large chunk direct I/O, - 32MB chunk read in user space -
> > > > > > > > all pages for 32MB would be merged to a bio structure if the pages
> > > > > > > > physical addresses are contiguous. it makes some delay to submit
> > > > > > > > until merge complete. bio max size should be limited to a proper size.
> > > > > > > > 
> > > > > > > > When 32MB chunk read with direct I/O option is coming from userspace,
> > > > > > > > kernel behavior is below now in do_direct_IO() loop. it's timeline.
> > > > > > > > 
> > > > > > > >  | bio merge for 32MB. total 8,192 pages are merged.
> > > > > > > >  | total elapsed time is over 2ms.
> > > > > > > >  |------------------ ... ----------------------->|
> > > > > > > >                                                  | 8,192 pages merged a bio.
> > > > > > > >                                                  | at this time, first bio submit is done.
> > > > > > > >                                                  | 1 bio is split to 32 read request and issue.
> > > > > > > >                                                  |--------------->
> > > > > > > >                                                   |--------------->
> > > > > > > >                                                    |--------------->
> > > > > > > >                                                               ......
> > > > > > > >                                                                    |--------------->
> > > > > > > >                                                                     |--------------->|
> > > > > > > >                           total 19ms elapsed to complete 32MB read done from device. |
> > > > > > > > 
> > > > > > > > If bio max size is limited with 1MB, behavior is changed below.
> > > > > > > > 
> > > > > > > >  | bio merge for 1MB. 256 pages are merged for each bio.
> > > > > > > >  | total 32 bio will be made.
> > > > > > > >  | total elapsed time is over 2ms. it's same.
> > > > > > > >  | but, first bio submit timing is fast. about 100us.
> > > > > > > >  |--->|--->|--->|---> ... -->|--->|--->|--->|--->|
> > > > > > > >       | 256 pages merged a bio.
> > > > > > > >       | at this time, first bio submit is done.
> > > > > > > >       | and 1 read request is issued for 1 bio.
> > > > > > > >       |--------------->
> > > > > > > >            |--------------->
> > > > > > > >                 |--------------->
> > > > > > > >                                       ......
> > > > > > > >                                                  |--------------->
> > > > > > > >                                                   |--------------->|
> > > > > > > >         total 17ms elapsed to complete 32MB read done from device. |
> > > > > > > > 
> > > > > > > > As a result, read request issue timing is faster if bio max size is limited.
> > > > > > > > Current kernel behavior with multipage bvec, super large bio can be created.
> > > > > > > > And it lead to delay first I/O request issue.
> > > > > > > > 
> > > > > > > > Signed-off-by: Changheun Lee <nanich.lee@samsung.com>
> > > > > > > > ---
> > > > > > > >  block/bio.c            | 13 ++++++++++++-
> > > > > > > >  include/linux/bio.h    |  2 +-
> > > > > > > >  include/linux/blkdev.h |  3 +++
> > > > > > > >  3 files changed, 16 insertions(+), 2 deletions(-)
> > > > > > > > 
> > > > > > > > diff --git a/block/bio.c b/block/bio.c
> > > > > > > > index 1f2cc1fbe283..c528e1f944c7 100644
> > > > > > > > --- a/block/bio.c
> > > > > > > > +++ b/block/bio.c
> > > > > > > > @@ -287,6 +287,17 @@ void bio_init(struct bio *bio, struct bio_vec *table,
> > > > > > > >  }
> > > > > > > >  EXPORT_SYMBOL(bio_init);
> > > > > > > >  
> > > > > > > > +unsigned int bio_max_size(struct bio *bio)
> > > > > > > > +{
> > > > > > > > +	struct request_queue *q = bio->bi_disk->queue;
> > > > > > > > +
> > > > > > > > +	if (blk_queue_limit_bio_size(q))
> > > > > > > > +		return blk_queue_get_max_sectors(q, bio_op(bio))
> > > > > > > > +			<< SECTOR_SHIFT;
> > > > > > > > +
> > > > > > > > +	return UINT_MAX;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > >  /**
> > > > > > > >   * bio_reset - reinitialize a bio
> > > > > > > >   * @bio:	bio to reset
> > > > > > > > @@ -877,7 +888,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
> > > > > > > >  		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
> > > > > > > >  
> > > > > > > >  		if (page_is_mergeable(bv, page, len, off, same_page)) {
> > > > > > > > -			if (bio->bi_iter.bi_size > UINT_MAX - len) {
> > > > > > > > +			if (bio->bi_iter.bi_size > bio_max_size(bio) - len) {
> > > > > > > >  				*same_page = false;
> > > > > > > >  				return false;
> > > > > > > >  			}
> > > > > > > > diff --git a/include/linux/bio.h b/include/linux/bio.h
> > > > > > > > index 1edda614f7ce..13b6f6562a5b 100644
> > > > > > > > --- a/include/linux/bio.h
> > > > > > > > +++ b/include/linux/bio.h
> > > > > > > > @@ -113,7 +113,7 @@ static inline bool bio_full(struct bio *bio, unsigned len)
> > > > > > > >  	if (bio->bi_vcnt >= bio->bi_max_vecs)
> > > > > > > >  		return true;
> > > > > > > >  
> > > > > > > > -	if (bio->bi_iter.bi_size > UINT_MAX - len)
> > > > > > > > +	if (bio->bi_iter.bi_size > bio_max_size(bio) - len)
> > > > > > > >  		return true;
> > > > > > > >  
> > > > > > > >  	return false;
> > > > > > > > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> > > > > > > > index f94ee3089e01..3aeab9e7e97b 100644
> > > > > > > > --- a/include/linux/blkdev.h
> > > > > > > > +++ b/include/linux/blkdev.h
> > > > > > > > @@ -621,6 +621,7 @@ struct request_queue {
> > > > > > > >  #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
> > > > > > > >  #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
> > > > > > > >  #define QUEUE_FLAG_NOWAIT       29	/* device supports NOWAIT */
> > > > > > > > +#define QUEUE_FLAG_LIMIT_BIO_SIZE 30	/* limit bio size */
> > > > > > > >  
> > > > > > > >  #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
> > > > > > > >  				 (1 << QUEUE_FLAG_SAME_COMP) |		\
> > > > > > > > @@ -667,6 +668,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
> > > > > > > >  #define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
> > > > > > > >  #define blk_queue_registered(q)	test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
> > > > > > > >  #define blk_queue_nowait(q)	test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
> > > > > > > > +#define blk_queue_limit_bio_size(q)	\
> > > > > > > > +	test_bit(QUEUE_FLAG_LIMIT_BIO_SIZE, &(q)->queue_flags)
> > > > > > > >  
> > > > > > > >  extern void blk_set_pm_only(struct request_queue *q);
> > > > > > > >  extern void blk_clear_pm_only(struct request_queue *q);
> > > > > > > > -- 
> > > > > > > > 2.28.0
> > > > > > > > 
> > > > > > > 
> > > > > > > Please feedback to me if more modification is needed to apply. :)
> > > > > > 
> > > > > > You are adding code that tests for a value to be set, yet you never set
> > > > > > it in this code so why is it needed at all?
> > > > > 
> > > > > This patch is a solution for some inefficient case of multipage bvec like
> > > > > as current DIO scenario. So it's not set as a default.
> > > > > It will be set when bio size limitation is needed in runtime.
> > > > 
> > > > Set where?
> > > 
> > > In my environment, set it on init.rc file like as below.
> > > "echo 1 > /sys/block/sda/queue/limit_bio_size"
> > 
> > I do not see any sysfs file in this patch, and why would you ever want
> > to be forced to manually do this?  The hardware should know the limits
> > itself, and should automatically tune things like this, do not force a
> > user to do it as that's just not going to go well at all.
> 
> Patch for sysfs is sent "[RESEND,v5,2/2] bio: add limit_bio_size sysfs".
> Actually I just suggested constant - 1MB - value to limit bio size at first.
> But I got a feedback that patch will be better if it's optional, and
> getting meaningful value from device queue on patchwork.
> There are some differences for each system environment I think.
> 
> But there are inefficient logic obviously by applying of multipage bvec.
> So it will be shown in several system environment.
> Currently providing this patch as a option would be better to select
> according to each system environment, and policy I think.
> 
> Please, revisit applying this patch.
> 
> > 
> > So if this patch series is forcing a new option to be configured by
> > sysfs only, that's not acceptable, sorry.
> 
> If it is not acceptable ever with current, may I progress review again
> with default enabled?

I am sorry, I can not parse this, can you rephrase this?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RESEND,v5,1/2] bio: limit bio max size
       [not found]                           ` <CGME20210407094610epcas1p472207e8d3ca0e5e697974c993a2a34f7@epcas1p4.samsung.com>
@ 2021-04-07  9:28                             ` Changheun Lee
  2021-04-07 10:27                               ` Damien Le Moal
  0 siblings, 1 reply; 20+ messages in thread
From: Changheun Lee @ 2021-04-07  9:28 UTC (permalink / raw)
  To: gregkh
  Cc: Johannes.Thumshirn, asml.silence, axboe, damien.lemoal, hch,
	jisoo2146.oh, junho89.kim, linux-block, linux-kernel, ming.lei,
	mj0123.lee, nanich.lee, osandov, patchwork-bot, seunghwan.hyun,
	sookwan7.kim, tj, tom.leiming, woosung2.lee, yt0928.kim

> On Wed, Apr 07, 2021 at 03:55:07PM +0900, Changheun Lee wrote:
> > > On Wed, Apr 07, 2021 at 02:06:33PM +0900, Changheun Lee wrote:
> > > > > On Wed, Apr 07, 2021 at 09:16:12AM +0900, Changheun Lee wrote:
> > > > > > > On Tue, Apr 06, 2021 at 10:31:28AM +0900, Changheun Lee wrote:
> > > > > > > > > bio size can grow up to 4GB when muli-page bvec is enabled.
> > > > > > > > > but sometimes it would lead to inefficient behaviors.
> > > > > > > > > in case of large chunk direct I/O, - 32MB chunk read in user space -
> > > > > > > > > all pages for 32MB would be merged to a bio structure if the pages
> > > > > > > > > physical addresses are contiguous. it makes some delay to submit
> > > > > > > > > until merge complete. bio max size should be limited to a proper size.
> > > > > > > > > 
> > > > > > > > > When 32MB chunk read with direct I/O option is coming from userspace,
> > > > > > > > > kernel behavior is below now in do_direct_IO() loop. it's timeline.
> > > > > > > > > 
> > > > > > > > >  | bio merge for 32MB. total 8,192 pages are merged.
> > > > > > > > >  | total elapsed time is over 2ms.
> > > > > > > > >  |------------------ ... ----------------------->|
> > > > > > > > >                                                  | 8,192 pages merged a bio.
> > > > > > > > >                                                  | at this time, first bio submit is done.
> > > > > > > > >                                                  | 1 bio is split to 32 read request and issue.
> > > > > > > > >                                                  |--------------->
> > > > > > > > >                                                   |--------------->
> > > > > > > > >                                                    |--------------->
> > > > > > > > >                                                               ......
> > > > > > > > >                                                                    |--------------->
> > > > > > > > >                                                                     |--------------->|
> > > > > > > > >                           total 19ms elapsed to complete 32MB read done from device. |
> > > > > > > > > 
> > > > > > > > > If bio max size is limited with 1MB, behavior is changed below.
> > > > > > > > > 
> > > > > > > > >  | bio merge for 1MB. 256 pages are merged for each bio.
> > > > > > > > >  | total 32 bio will be made.
> > > > > > > > >  | total elapsed time is over 2ms. it's same.
> > > > > > > > >  | but, first bio submit timing is fast. about 100us.
> > > > > > > > >  |--->|--->|--->|---> ... -->|--->|--->|--->|--->|
> > > > > > > > >       | 256 pages merged a bio.
> > > > > > > > >       | at this time, first bio submit is done.
> > > > > > > > >       | and 1 read request is issued for 1 bio.
> > > > > > > > >       |--------------->
> > > > > > > > >            |--------------->
> > > > > > > > >                 |--------------->
> > > > > > > > >                                       ......
> > > > > > > > >                                                  |--------------->
> > > > > > > > >                                                   |--------------->|
> > > > > > > > >         total 17ms elapsed to complete 32MB read done from device. |
> > > > > > > > > 
> > > > > > > > > As a result, read request issue timing is faster if bio max size is limited.
> > > > > > > > > Current kernel behavior with multipage bvec, super large bio can be created.
> > > > > > > > > And it lead to delay first I/O request issue.
> > > > > > > > > 
> > > > > > > > > Signed-off-by: Changheun Lee <nanich.lee@samsung.com>
> > > > > > > > > ---
> > > > > > > > >  block/bio.c            | 13 ++++++++++++-
> > > > > > > > >  include/linux/bio.h    |  2 +-
> > > > > > > > >  include/linux/blkdev.h |  3 +++
> > > > > > > > >  3 files changed, 16 insertions(+), 2 deletions(-)
> > > > > > > > > 
> > > > > > > > > diff --git a/block/bio.c b/block/bio.c
> > > > > > > > > index 1f2cc1fbe283..c528e1f944c7 100644
> > > > > > > > > --- a/block/bio.c
> > > > > > > > > +++ b/block/bio.c
> > > > > > > > > @@ -287,6 +287,17 @@ void bio_init(struct bio *bio, struct bio_vec *table,
> > > > > > > > >  }
> > > > > > > > >  EXPORT_SYMBOL(bio_init);
> > > > > > > > >  
> > > > > > > > > +unsigned int bio_max_size(struct bio *bio)
> > > > > > > > > +{
> > > > > > > > > +	struct request_queue *q = bio->bi_disk->queue;
> > > > > > > > > +
> > > > > > > > > +	if (blk_queue_limit_bio_size(q))
> > > > > > > > > +		return blk_queue_get_max_sectors(q, bio_op(bio))
> > > > > > > > > +			<< SECTOR_SHIFT;
> > > > > > > > > +
> > > > > > > > > +	return UINT_MAX;
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > >  /**
> > > > > > > > >   * bio_reset - reinitialize a bio
> > > > > > > > >   * @bio:	bio to reset
> > > > > > > > > @@ -877,7 +888,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
> > > > > > > > >  		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
> > > > > > > > >  
> > > > > > > > >  		if (page_is_mergeable(bv, page, len, off, same_page)) {
> > > > > > > > > -			if (bio->bi_iter.bi_size > UINT_MAX - len) {
> > > > > > > > > +			if (bio->bi_iter.bi_size > bio_max_size(bio) - len) {
> > > > > > > > >  				*same_page = false;
> > > > > > > > >  				return false;
> > > > > > > > >  			}
> > > > > > > > > diff --git a/include/linux/bio.h b/include/linux/bio.h
> > > > > > > > > index 1edda614f7ce..13b6f6562a5b 100644
> > > > > > > > > --- a/include/linux/bio.h
> > > > > > > > > +++ b/include/linux/bio.h
> > > > > > > > > @@ -113,7 +113,7 @@ static inline bool bio_full(struct bio *bio, unsigned len)
> > > > > > > > >  	if (bio->bi_vcnt >= bio->bi_max_vecs)
> > > > > > > > >  		return true;
> > > > > > > > >  
> > > > > > > > > -	if (bio->bi_iter.bi_size > UINT_MAX - len)
> > > > > > > > > +	if (bio->bi_iter.bi_size > bio_max_size(bio) - len)
> > > > > > > > >  		return true;
> > > > > > > > >  
> > > > > > > > >  	return false;
> > > > > > > > > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> > > > > > > > > index f94ee3089e01..3aeab9e7e97b 100644
> > > > > > > > > --- a/include/linux/blkdev.h
> > > > > > > > > +++ b/include/linux/blkdev.h
> > > > > > > > > @@ -621,6 +621,7 @@ struct request_queue {
> > > > > > > > >  #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
> > > > > > > > >  #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
> > > > > > > > >  #define QUEUE_FLAG_NOWAIT       29	/* device supports NOWAIT */
> > > > > > > > > +#define QUEUE_FLAG_LIMIT_BIO_SIZE 30	/* limit bio size */
> > > > > > > > >  
> > > > > > > > >  #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
> > > > > > > > >  				 (1 << QUEUE_FLAG_SAME_COMP) |		\
> > > > > > > > > @@ -667,6 +668,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
> > > > > > > > >  #define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
> > > > > > > > >  #define blk_queue_registered(q)	test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
> > > > > > > > >  #define blk_queue_nowait(q)	test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
> > > > > > > > > +#define blk_queue_limit_bio_size(q)	\
> > > > > > > > > +	test_bit(QUEUE_FLAG_LIMIT_BIO_SIZE, &(q)->queue_flags)
> > > > > > > > >  
> > > > > > > > >  extern void blk_set_pm_only(struct request_queue *q);
> > > > > > > > >  extern void blk_clear_pm_only(struct request_queue *q);
> > > > > > > > > -- 
> > > > > > > > > 2.28.0
> > > > > > > > > 
> > > > > > > > 
> > > > > > > > Please feedback to me if more modification is needed to apply. :)
> > > > > > > 
> > > > > > > You are adding code that tests for a value to be set, yet you never set
> > > > > > > it in this code so why is it needed at all?
> > > > > > 
> > > > > > This patch is a solution for some inefficient case of multipage bvec like
> > > > > > as current DIO scenario. So it's not set as a default.
> > > > > > It will be set when bio size limitation is needed in runtime.
> > > > > 
> > > > > Set where?
> > > > 
> > > > In my environment, set it on init.rc file like as below.
> > > > "echo 1 > /sys/block/sda/queue/limit_bio_size"
> > > 
> > > I do not see any sysfs file in this patch, and why would you ever want
> > > to be forced to manually do this?  The hardware should know the limits
> > > itself, and should automatically tune things like this, do not force a
> > > user to do it as that's just not going to go well at all.
> > 
> > Patch for sysfs is sent "[RESEND,v5,2/2] bio: add limit_bio_size sysfs".
> > Actually I just suggested constant - 1MB - value to limit bio size at first.
> > But I got a feedback that patch will be better if it's optional, and
> > getting meaningful value from device queue on patchwork.
> > There are some differences for each system environment I think.
> > 
> > But there are inefficient logic obviously by applying of multipage bvec.
> > So it will be shown in several system environment.
> > Currently providing this patch as a option would be better to select
> > according to each system environment, and policy I think.
> > 
> > Please, revisit applying this patch.
> > 
> > > 
> > > So if this patch series is forcing a new option to be configured by
> > > sysfs only, that's not acceptable, sorry.
> > 
> > If it is not acceptable ever with current, may I progress review again
> > with default enabled?
> 
> I am sorry, I can not parse this, can you rephrase this?
> 
> thanks,
> 
> greg k-h
> 

I'll prepare new patch as you recommand. It will be added setting of
limit_bio_size automatically when queue max sectors is determined.


Thanks,

Changheun Lee

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RESEND,v5,1/2] bio: limit bio max size
  2021-04-07  9:28                             ` [RESEND,v5,1/2] " Changheun Lee
@ 2021-04-07 10:27                               ` Damien Le Moal
  2021-04-09 14:47                                 ` Bart Van Assche
  0 siblings, 1 reply; 20+ messages in thread
From: Damien Le Moal @ 2021-04-07 10:27 UTC (permalink / raw)
  To: Changheun Lee, gregkh
  Cc: Johannes Thumshirn, asml.silence, axboe, hch, jisoo2146.oh,
	junho89.kim, linux-block, linux-kernel, ming.lei, mj0123.lee,
	osandov, patchwork-bot, seunghwan.hyun, sookwan7.kim, tj,
	tom.leiming, woosung2.lee, yt0928.kim

On 2021/04/07 18:46, Changheun Lee wrote:
>> On Wed, Apr 07, 2021 at 03:55:07PM +0900, Changheun Lee wrote:
>>>> On Wed, Apr 07, 2021 at 02:06:33PM +0900, Changheun Lee wrote:
>>>>>> On Wed, Apr 07, 2021 at 09:16:12AM +0900, Changheun Lee wrote:
>>>>>>>> On Tue, Apr 06, 2021 at 10:31:28AM +0900, Changheun Lee wrote:
>>>>>>>>>> bio size can grow up to 4GB when muli-page bvec is enabled.
>>>>>>>>>> but sometimes it would lead to inefficient behaviors.
>>>>>>>>>> in case of large chunk direct I/O, - 32MB chunk read in user space -
>>>>>>>>>> all pages for 32MB would be merged to a bio structure if the pages
>>>>>>>>>> physical addresses are contiguous. it makes some delay to submit
>>>>>>>>>> until merge complete. bio max size should be limited to a proper size.
>>>>>>>>>>
>>>>>>>>>> When 32MB chunk read with direct I/O option is coming from userspace,
>>>>>>>>>> kernel behavior is below now in do_direct_IO() loop. it's timeline.
>>>>>>>>>>
>>>>>>>>>>  | bio merge for 32MB. total 8,192 pages are merged.
>>>>>>>>>>  | total elapsed time is over 2ms.
>>>>>>>>>>  |------------------ ... ----------------------->|
>>>>>>>>>>                                                  | 8,192 pages merged a bio.
>>>>>>>>>>                                                  | at this time, first bio submit is done.
>>>>>>>>>>                                                  | 1 bio is split to 32 read request and issue.
>>>>>>>>>>                                                  |--------------->
>>>>>>>>>>                                                   |--------------->
>>>>>>>>>>                                                    |--------------->
>>>>>>>>>>                                                               ......
>>>>>>>>>>                                                                    |--------------->
>>>>>>>>>>                                                                     |--------------->|
>>>>>>>>>>                           total 19ms elapsed to complete 32MB read done from device. |
>>>>>>>>>>
>>>>>>>>>> If bio max size is limited with 1MB, behavior is changed below.
>>>>>>>>>>
>>>>>>>>>>  | bio merge for 1MB. 256 pages are merged for each bio.
>>>>>>>>>>  | total 32 bio will be made.
>>>>>>>>>>  | total elapsed time is over 2ms. it's same.
>>>>>>>>>>  | but, first bio submit timing is fast. about 100us.
>>>>>>>>>>  |--->|--->|--->|---> ... -->|--->|--->|--->|--->|
>>>>>>>>>>       | 256 pages merged a bio.
>>>>>>>>>>       | at this time, first bio submit is done.
>>>>>>>>>>       | and 1 read request is issued for 1 bio.
>>>>>>>>>>       |--------------->
>>>>>>>>>>            |--------------->
>>>>>>>>>>                 |--------------->
>>>>>>>>>>                                       ......
>>>>>>>>>>                                                  |--------------->
>>>>>>>>>>                                                   |--------------->|
>>>>>>>>>>         total 17ms elapsed to complete 32MB read done from device. |
>>>>>>>>>>
>>>>>>>>>> As a result, read request issue timing is faster if bio max size is limited.
>>>>>>>>>> Current kernel behavior with multipage bvec, super large bio can be created.
>>>>>>>>>> And it lead to delay first I/O request issue.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Changheun Lee <nanich.lee@samsung.com>
>>>>>>>>>> ---
>>>>>>>>>>  block/bio.c            | 13 ++++++++++++-
>>>>>>>>>>  include/linux/bio.h    |  2 +-
>>>>>>>>>>  include/linux/blkdev.h |  3 +++
>>>>>>>>>>  3 files changed, 16 insertions(+), 2 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/block/bio.c b/block/bio.c
>>>>>>>>>> index 1f2cc1fbe283..c528e1f944c7 100644
>>>>>>>>>> --- a/block/bio.c
>>>>>>>>>> +++ b/block/bio.c
>>>>>>>>>> @@ -287,6 +287,17 @@ void bio_init(struct bio *bio, struct bio_vec *table,
>>>>>>>>>>  }
>>>>>>>>>>  EXPORT_SYMBOL(bio_init);
>>>>>>>>>>  
>>>>>>>>>> +unsigned int bio_max_size(struct bio *bio)
>>>>>>>>>> +{
>>>>>>>>>> +	struct request_queue *q = bio->bi_disk->queue;
>>>>>>>>>> +
>>>>>>>>>> +	if (blk_queue_limit_bio_size(q))
>>>>>>>>>> +		return blk_queue_get_max_sectors(q, bio_op(bio))
>>>>>>>>>> +			<< SECTOR_SHIFT;
>>>>>>>>>> +
>>>>>>>>>> +	return UINT_MAX;
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>>  /**
>>>>>>>>>>   * bio_reset - reinitialize a bio
>>>>>>>>>>   * @bio:	bio to reset
>>>>>>>>>> @@ -877,7 +888,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
>>>>>>>>>>  		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
>>>>>>>>>>  
>>>>>>>>>>  		if (page_is_mergeable(bv, page, len, off, same_page)) {
>>>>>>>>>> -			if (bio->bi_iter.bi_size > UINT_MAX - len) {
>>>>>>>>>> +			if (bio->bi_iter.bi_size > bio_max_size(bio) - len) {
>>>>>>>>>>  				*same_page = false;
>>>>>>>>>>  				return false;
>>>>>>>>>>  			}
>>>>>>>>>> diff --git a/include/linux/bio.h b/include/linux/bio.h
>>>>>>>>>> index 1edda614f7ce..13b6f6562a5b 100644
>>>>>>>>>> --- a/include/linux/bio.h
>>>>>>>>>> +++ b/include/linux/bio.h
>>>>>>>>>> @@ -113,7 +113,7 @@ static inline bool bio_full(struct bio *bio, unsigned len)
>>>>>>>>>>  	if (bio->bi_vcnt >= bio->bi_max_vecs)
>>>>>>>>>>  		return true;
>>>>>>>>>>  
>>>>>>>>>> -	if (bio->bi_iter.bi_size > UINT_MAX - len)
>>>>>>>>>> +	if (bio->bi_iter.bi_size > bio_max_size(bio) - len)
>>>>>>>>>>  		return true;
>>>>>>>>>>  
>>>>>>>>>>  	return false;
>>>>>>>>>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>>>>>>>>>> index f94ee3089e01..3aeab9e7e97b 100644
>>>>>>>>>> --- a/include/linux/blkdev.h
>>>>>>>>>> +++ b/include/linux/blkdev.h
>>>>>>>>>> @@ -621,6 +621,7 @@ struct request_queue {
>>>>>>>>>>  #define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
>>>>>>>>>>  #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
>>>>>>>>>>  #define QUEUE_FLAG_NOWAIT       29	/* device supports NOWAIT */
>>>>>>>>>> +#define QUEUE_FLAG_LIMIT_BIO_SIZE 30	/* limit bio size */
>>>>>>>>>>  
>>>>>>>>>>  #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
>>>>>>>>>>  				 (1 << QUEUE_FLAG_SAME_COMP) |		\
>>>>>>>>>> @@ -667,6 +668,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
>>>>>>>>>>  #define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
>>>>>>>>>>  #define blk_queue_registered(q)	test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
>>>>>>>>>>  #define blk_queue_nowait(q)	test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
>>>>>>>>>> +#define blk_queue_limit_bio_size(q)	\
>>>>>>>>>> +	test_bit(QUEUE_FLAG_LIMIT_BIO_SIZE, &(q)->queue_flags)
>>>>>>>>>>  
>>>>>>>>>>  extern void blk_set_pm_only(struct request_queue *q);
>>>>>>>>>>  extern void blk_clear_pm_only(struct request_queue *q);
>>>>>>>>>> -- 
>>>>>>>>>> 2.28.0
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Please feedback to me if more modification is needed to apply. :)
>>>>>>>>
>>>>>>>> You are adding code that tests for a value to be set, yet you never set
>>>>>>>> it in this code so why is it needed at all?
>>>>>>>
>>>>>>> This patch is a solution for some inefficient case of multipage bvec like
>>>>>>> as current DIO scenario. So it's not set as a default.
>>>>>>> It will be set when bio size limitation is needed in runtime.
>>>>>>
>>>>>> Set where?
>>>>>
>>>>> In my environment, set it on init.rc file like as below.
>>>>> "echo 1 > /sys/block/sda/queue/limit_bio_size"
>>>>
>>>> I do not see any sysfs file in this patch, and why would you ever want
>>>> to be forced to manually do this?  The hardware should know the limits
>>>> itself, and should automatically tune things like this, do not force a
>>>> user to do it as that's just not going to go well at all.
>>>
>>> Patch for sysfs is sent "[RESEND,v5,2/2] bio: add limit_bio_size sysfs".
>>> Actually I just suggested constant - 1MB - value to limit bio size at first.
>>> But I got a feedback that patch will be better if it's optional, and
>>> getting meaningful value from device queue on patchwork.
>>> There are some differences for each system environment I think.
>>>
>>> But there are inefficient logic obviously by applying of multipage bvec.
>>> So it will be shown in several system environment.
>>> Currently providing this patch as a option would be better to select
>>> according to each system environment, and policy I think.
>>>
>>> Please, revisit applying this patch.
>>>
>>>>
>>>> So if this patch series is forcing a new option to be configured by
>>>> sysfs only, that's not acceptable, sorry.
>>>
>>> If it is not acceptable ever with current, may I progress review again
>>> with default enabled?
>>
>> I am sorry, I can not parse this, can you rephrase this?
>>
>> thanks,
>>
>> greg k-h
>>
> 
> I'll prepare new patch as you recommand. It will be added setting of
> limit_bio_size automatically when queue max sectors is determined.

Please do that in the driver for the HW that benefits from it. Do not do this
for all block devices.

> 
> 
> Thanks,
> 
> Changheun Lee
> 


-- 
Damien Le Moal
Western Digital Research

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RESEND,v5,1/2] bio: limit bio max size
  2021-04-07 10:27                               ` Damien Le Moal
@ 2021-04-09 14:47                                 ` Bart Van Assche
  2021-04-11 22:13                                   ` Damien Le Moal
  0 siblings, 1 reply; 20+ messages in thread
From: Bart Van Assche @ 2021-04-09 14:47 UTC (permalink / raw)
  To: Damien Le Moal, Changheun Lee, gregkh
  Cc: Johannes Thumshirn, asml.silence, axboe, hch, jisoo2146.oh,
	junho89.kim, linux-block, linux-kernel, ming.lei, mj0123.lee,
	osandov, patchwork-bot, seunghwan.hyun, sookwan7.kim, tj,
	tom.leiming, woosung2.lee, yt0928.kim

On 4/7/21 3:27 AM, Damien Le Moal wrote:
> On 2021/04/07 18:46, Changheun Lee wrote:
>> I'll prepare new patch as you recommand. It will be added setting of
>> limit_bio_size automatically when queue max sectors is determined.
> 
> Please do that in the driver for the HW that benefits from it. Do not do this
> for all block devices.

Hmm ... is it ever useful to build a bio with a size that exceeds 
max_hw_sectors when submitting a bio directly to a block device, or in 
other words, if no stacked block driver sits between the submitter and 
the block device? Am I perhaps missing something?

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RESEND,v5,1/2] bio: limit bio max size
  2021-04-09 14:47                                 ` Bart Van Assche
@ 2021-04-11 22:13                                   ` Damien Le Moal
  2021-04-12  3:23                                     ` Ming Lei
  0 siblings, 1 reply; 20+ messages in thread
From: Damien Le Moal @ 2021-04-11 22:13 UTC (permalink / raw)
  To: Bart Van Assche, Changheun Lee, gregkh
  Cc: Johannes Thumshirn, asml.silence, axboe, hch, jisoo2146.oh,
	junho89.kim, linux-block, linux-kernel, ming.lei, mj0123.lee,
	osandov, patchwork-bot, seunghwan.hyun, sookwan7.kim, tj,
	tom.leiming, woosung2.lee, yt0928.kim

On 2021/04/09 23:47, Bart Van Assche wrote:
> On 4/7/21 3:27 AM, Damien Le Moal wrote:
>> On 2021/04/07 18:46, Changheun Lee wrote:
>>> I'll prepare new patch as you recommand. It will be added setting of
>>> limit_bio_size automatically when queue max sectors is determined.
>>
>> Please do that in the driver for the HW that benefits from it. Do not do this
>> for all block devices.
> 
> Hmm ... is it ever useful to build a bio with a size that exceeds 
> max_hw_sectors when submitting a bio directly to a block device, or in 
> other words, if no stacked block driver sits between the submitter and 
> the block device? Am I perhaps missing something?

Device performance wise, the benefits are certainly not obvious to me either.
But for very fast block devices, I think the CPU overhead of building more
smaller BIOs may be significant compared to splitting a large BIO into multiple
requests. Though it may be good to revisit this with some benchmark numbers.

> 
> Thanks,
> 
> Bart.
> 


-- 
Damien Le Moal
Western Digital Research

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RESEND,v5,1/2] bio: limit bio max size
  2021-04-11 22:13                                   ` Damien Le Moal
@ 2021-04-12  3:23                                     ` Ming Lei
       [not found]                                       ` <CGME20210413045517epcas1p32b058646dd795e59389401ca997c4cac@epcas1p3.samsung.com>
  0 siblings, 1 reply; 20+ messages in thread
From: Ming Lei @ 2021-04-12  3:23 UTC (permalink / raw)
  To: Damien Le Moal
  Cc: Bart Van Assche, Changheun Lee, gregkh, Johannes Thumshirn,
	asml.silence, axboe, hch, jisoo2146.oh, junho89.kim, linux-block,
	linux-kernel, mj0123.lee, osandov, patchwork-bot, seunghwan.hyun,
	sookwan7.kim, tj, tom.leiming, woosung2.lee, yt0928.kim

On Sun, Apr 11, 2021 at 10:13:01PM +0000, Damien Le Moal wrote:
> On 2021/04/09 23:47, Bart Van Assche wrote:
> > On 4/7/21 3:27 AM, Damien Le Moal wrote:
> >> On 2021/04/07 18:46, Changheun Lee wrote:
> >>> I'll prepare new patch as you recommand. It will be added setting of
> >>> limit_bio_size automatically when queue max sectors is determined.
> >>
> >> Please do that in the driver for the HW that benefits from it. Do not do this
> >> for all block devices.
> > 
> > Hmm ... is it ever useful to build a bio with a size that exceeds 
> > max_hw_sectors when submitting a bio directly to a block device, or in 
> > other words, if no stacked block driver sits between the submitter and 
> > the block device? Am I perhaps missing something?
> 
> Device performance wise, the benefits are certainly not obvious to me either.
> But for very fast block devices, I think the CPU overhead of building more
> smaller BIOs may be significant compared to splitting a large BIO into multiple
> requests. Though it may be good to revisit this with some benchmark numbers.

This patch tries to address issue[1] in do_direct_IO() in which
Changheun observed that other operations takes time between adding page
to bio.

However, do_direct_IO() just does following except for adding bio and
submitting bio:

- retrieves pages at batch(pin 64 pages each time from VM) and 

- retrieve block mapping(get_more_blocks), which is still done usually
very less times for 32MB; for new mapping, clean_bdev_aliases() may
take a bit time.

If there isn't system memory pressure, pin 64 pages won't be slow, but
get_more_blocks() may take a bit time.

Changheun, can you check if multiple get_more_blocks() is called for submitting
32MB in your test?

In my 32MB sync dio f2fs test on x86_64 VM, one buffer_head mapping can
hold 32MB, but it is one freshly new f2fs.

I'd suggest to understand the issue completely before figuring out one
solution.


[1] https://lore.kernel.org/linux-block/20210202041204.28995-1-nanich.lee@samsung.com/


Thanks,
Ming


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RESEND,v5,1/2] bio: limit bio max size
       [not found]                                       ` <CGME20210413045517epcas1p32b058646dd795e59389401ca997c4cac@epcas1p3.samsung.com>
@ 2021-04-13  4:37                                         ` Changheun Lee
  2021-04-13  5:32                                           ` Christoph Hellwig
  0 siblings, 1 reply; 20+ messages in thread
From: Changheun Lee @ 2021-04-13  4:37 UTC (permalink / raw)
  To: ming.lei
  Cc: Damien.LeMoal, Johannes.Thumshirn, asml.silence, axboe,
	bvanassche, gregkh, hch, jisoo2146.oh, junho89.kim, linux-block,
	linux-kernel, mj0123.lee, nanich.lee, osandov, patchwork-bot,
	seunghwan.hyun, sookwan7.kim, tj, tom.leiming, woosung2.lee,
	yt0928.kim

> On Sun, Apr 11, 2021 at 10:13:01PM +0000, Damien Le Moal wrote:
> > On 2021/04/09 23:47, Bart Van Assche wrote:
> > > On 4/7/21 3:27 AM, Damien Le Moal wrote:
> > >> On 2021/04/07 18:46, Changheun Lee wrote:
> > >>> I'll prepare new patch as you recommand. It will be added setting of
> > >>> limit_bio_size automatically when queue max sectors is determined.
> > >>
> > >> Please do that in the driver for the HW that benefits from it. Do not do this
> > >> for all block devices.
> > > 
> > > Hmm ... is it ever useful to build a bio with a size that exceeds 
> > > max_hw_sectors when submitting a bio directly to a block device, or in 
> > > other words, if no stacked block driver sits between the submitter and 
> > > the block device? Am I perhaps missing something?
> > 
> > Device performance wise, the benefits are certainly not obvious to me either.
> > But for very fast block devices, I think the CPU overhead of building more
> > smaller BIOs may be significant compared to splitting a large BIO into multiple
> > requests. Though it may be good to revisit this with some benchmark numbers.
> 
> This patch tries to address issue[1] in do_direct_IO() in which
> Changheun observed that other operations takes time between adding page
> to bio.
> 
> However, do_direct_IO() just does following except for adding bio and
> submitting bio:
> 
> - retrieves pages at batch(pin 64 pages each time from VM) and 
> 
> - retrieve block mapping(get_more_blocks), which is still done usually
> very less times for 32MB; for new mapping, clean_bdev_aliases() may
> take a bit time.
> 
> If there isn't system memory pressure, pin 64 pages won't be slow, but
> get_more_blocks() may take a bit time.
> 
> Changheun, can you check if multiple get_more_blocks() is called for submitting
> 32MB in your test?

almost one time called.

> 
> In my 32MB sync dio f2fs test on x86_64 VM, one buffer_head mapping can
> hold 32MB, but it is one freshly new f2fs.
> 
> I'd suggest to understand the issue completely before figuring out one
> solution.

Thank you for your advice. I'll analyze more about your point later. :)
But I think it's different from finding main time spend point in
do_direct_IO(). I think excessive loop should be controlled.
8,192 loops in do_direct_IO() - for 32MB - to submit one bio is too much
on 4KB page system. I want to apply a optional solution to avoid
excessive loop casued by multipage bvec.

Thanks,

Changheun Lee

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RESEND,v5,1/2] bio: limit bio max size
  2021-04-13  4:37                                         ` Changheun Lee
@ 2021-04-13  5:32                                           ` Christoph Hellwig
  0 siblings, 0 replies; 20+ messages in thread
From: Christoph Hellwig @ 2021-04-13  5:32 UTC (permalink / raw)
  To: Changheun Lee
  Cc: ming.lei, Damien.LeMoal, Johannes.Thumshirn, asml.silence, axboe,
	bvanassche, gregkh, hch, jisoo2146.oh, junho89.kim, linux-block,
	linux-kernel, mj0123.lee, osandov, patchwork-bot, seunghwan.hyun,
	sookwan7.kim, tj, tom.leiming, woosung2.lee, yt0928.kim

And more importantly please test with a file system that uses the
iomap direct I/O code (btrfs, gfs2, ext4, xfs, zonefs) as we should
never just work aroudn a legacy codebase that should go away in the
block layer.

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2021-04-13  5:35 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <CGME20210316080104epcas1p11483035c927c2e65600ae90309334b24@epcas1p1.samsung.com>
2021-03-16  7:44 ` [RESEND PATCH v5 1/2] bio: limit bio max size Changheun Lee
     [not found]   ` <CGME20210316080106epcas1p3522dda95e9c97fc39b40b008bbf87c04@epcas1p3.samsung.com>
2021-03-16  7:44     ` [RESEND PATCH v5 2/2] bio: add limit_bio_size sysfs Changheun Lee
2021-04-06 21:05       ` Bart Van Assche
     [not found]         ` <CGME20210407013850epcas1p2d305f138c9fa1431abba1ec44a382de9@epcas1p2.samsung.com>
2021-04-07  1:21           ` Changheun Lee
2021-04-07  5:43             ` Greg KH
     [not found]   ` <CGME20210406014905epcas1p16830a46b7ac6af95a0e2c2c6f4c04859@epcas1p1.samsung.com>
2021-04-06  1:31     ` [RESEND PATCH v5 1/2] bio: limit bio max size Changheun Lee
2021-04-06  7:32       ` Greg KH
     [not found]         ` <CGME20210407003345epcas1p21376df37d3dfe933684139d3beaf9c51@epcas1p2.samsung.com>
2021-04-07  0:16           ` Changheun Lee
2021-04-07  5:05             ` Greg KH
     [not found]               ` <CGME20210407052407epcas1p1d0b5d23ac04a68760f691954e7ada3dd@epcas1p1.samsung.com>
2021-04-07  5:06                 ` Changheun Lee
2021-04-07  5:36                   ` Greg KH
     [not found]                     ` <CGME20210407071241epcas1p2559ea674299b686c9001326894c933bc@epcas1p2.samsung.com>
2021-04-07  6:55                       ` Changheun Lee
2021-04-07  7:40                         ` Greg KH
     [not found]                           ` <CGME20210407094610epcas1p472207e8d3ca0e5e697974c993a2a34f7@epcas1p4.samsung.com>
2021-04-07  9:28                             ` [RESEND,v5,1/2] " Changheun Lee
2021-04-07 10:27                               ` Damien Le Moal
2021-04-09 14:47                                 ` Bart Van Assche
2021-04-11 22:13                                   ` Damien Le Moal
2021-04-12  3:23                                     ` Ming Lei
     [not found]                                       ` <CGME20210413045517epcas1p32b058646dd795e59389401ca997c4cac@epcas1p3.samsung.com>
2021-04-13  4:37                                         ` Changheun Lee
2021-04-13  5:32                                           ` Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).