linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] block: advance by bvec's length for bio_for_each_bvec
@ 2019-02-28  3:24 Ming Lei
  2019-02-28 13:00 ` Jens Axboe
  2019-02-28 13:58 ` Christoph Hellwig
  0 siblings, 2 replies; 5+ messages in thread
From: Ming Lei @ 2019-02-28  3:24 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, Ming Lei, Omar Sandoval, Christoph Hellwig

bio_for_each_bvec is used in fast path of bio splitting and sg mapping,
and what we want to do is to iterate over multi-page bvecs, instead of pages.
However, bvec_iter_advance() is invisble for this requirement, and
always advance by page size.

This way isn't efficient for multipage bvec iterator, also bvec_iter_len()
isn't as fast as mp_bvec_iter_len().

So advance by multi-page bvec's length instead of page size for bio_for_each_bvec().

More than 1% IOPS improvement can be observed in io_uring test on null_blk.

Cc: Omar Sandoval <osandov@fb.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 include/linux/bio.h  | 13 +++++++++----
 include/linux/bvec.h | 13 ++++++++++---
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/include/linux/bio.h b/include/linux/bio.h
index bb6090aa165d..29c7dd348dc2 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -134,17 +134,22 @@ static inline bool bio_full(struct bio *bio)
 	for (i = 0, iter_all.idx = 0; iter_all.idx < (bio)->bi_vcnt; iter_all.idx++)	\
 		mp_bvec_for_each_segment(bvl, &((bio)->bi_io_vec[iter_all.idx]), i, iter_all)
 
-static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
-				    unsigned bytes)
+static inline void __bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
+				      unsigned bytes, bool bvec)
 {
 	iter->bi_sector += bytes >> 9;
 
 	if (bio_no_advance_iter(bio))
 		iter->bi_size -= bytes;
 	else
-		bvec_iter_advance(bio->bi_io_vec, iter, bytes);
+		__bvec_iter_advance(bio->bi_io_vec, iter, bytes, bvec);
 		/* TODO: It is reasonable to complete bio with error here. */
 }
+static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
+				    unsigned bytes)
+{
+	return __bio_advance_iter(bio, iter, bytes, false);
+}
 
 #define __bio_for_each_segment(bvl, bio, iter, start)			\
 	for (iter = (start);						\
@@ -159,7 +164,7 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 	for (iter = (start);						\
 	     (iter).bi_size &&						\
 		((bvl = mp_bvec_iter_bvec((bio)->bi_io_vec, (iter))), 1); \
-	     bio_advance_iter((bio), &(iter), (bvl).bv_len))
+	     __bio_advance_iter((bio), &(iter), (bvl).bv_len, true))
 
 /* iterate over multi-page bvec */
 #define bio_for_each_bvec(bvl, bio, iter)			\
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 2c32e3e151a0..98a140fa4dac 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -102,8 +102,8 @@ static inline struct page *bvec_nth_page(struct page *page, int idx)
 	.bv_offset	= bvec_iter_offset((bvec), (iter)),	\
 })
 
-static inline bool bvec_iter_advance(const struct bio_vec *bv,
-		struct bvec_iter *iter, unsigned bytes)
+static inline bool __bvec_iter_advance(const struct bio_vec *bv,
+		struct bvec_iter *iter, unsigned bytes, bool bvec)
 {
 	if (WARN_ONCE(bytes > iter->bi_size,
 		     "Attempted to advance past end of bvec iter\n")) {
@@ -112,7 +112,8 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv,
 	}
 
 	while (bytes) {
-		unsigned iter_len = bvec_iter_len(bv, *iter);
+		unsigned iter_len = bvec ? mp_bvec_iter_len(bv, *iter) :
+			bvec_iter_len(bv, *iter);
 		unsigned len = min(bytes, iter_len);
 
 		bytes -= len;
@@ -127,6 +128,12 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv,
 	return true;
 }
 
+static inline bool bvec_iter_advance(const struct bio_vec *bv,
+		struct bvec_iter *iter, unsigned bytes)
+{
+	return __bvec_iter_advance(bv, iter, bytes, false);
+}
+
 #define for_each_bvec(bvl, bio_vec, iter, start)			\
 	for (iter = (start);						\
 	     (iter).bi_size &&						\
-- 
2.9.5


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] block: advance by bvec's length for bio_for_each_bvec
  2019-02-28  3:24 [PATCH] block: advance by bvec's length for bio_for_each_bvec Ming Lei
@ 2019-02-28 13:00 ` Jens Axboe
  2019-02-28 13:58 ` Christoph Hellwig
  1 sibling, 0 replies; 5+ messages in thread
From: Jens Axboe @ 2019-02-28 13:00 UTC (permalink / raw)
  To: Ming Lei; +Cc: linux-block, Omar Sandoval, Christoph Hellwig

On 2/27/19 8:24 PM, Ming Lei wrote:
> bio_for_each_bvec is used in fast path of bio splitting and sg mapping,
> and what we want to do is to iterate over multi-page bvecs, instead of pages.
> However, bvec_iter_advance() is invisble for this requirement, and
> always advance by page size.
> 
> This way isn't efficient for multipage bvec iterator, also bvec_iter_len()
> isn't as fast as mp_bvec_iter_len().
> 
> So advance by multi-page bvec's length instead of page size for bio_for_each_bvec().
> 
> More than 1% IOPS improvement can be observed in io_uring test on null_blk.

Thanks Ming, I tested this last night with good results.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] block: advance by bvec's length for bio_for_each_bvec
  2019-02-28  3:24 [PATCH] block: advance by bvec's length for bio_for_each_bvec Ming Lei
  2019-02-28 13:00 ` Jens Axboe
@ 2019-02-28 13:58 ` Christoph Hellwig
  2019-02-28 15:20   ` Ming Lei
  2019-02-28 15:23   ` Jens Axboe
  1 sibling, 2 replies; 5+ messages in thread
From: Christoph Hellwig @ 2019-02-28 13:58 UTC (permalink / raw)
  To: Ming Lei; +Cc: Jens Axboe, linux-block, Omar Sandoval, Christoph Hellwig

On Thu, Feb 28, 2019 at 11:24:21AM +0800, Ming Lei wrote:
> bio_for_each_bvec is used in fast path of bio splitting and sg mapping,
> and what we want to do is to iterate over multi-page bvecs, instead of pages.
> However, bvec_iter_advance() is invisble for this requirement, and
> always advance by page size.
> 
> This way isn't efficient for multipage bvec iterator, also bvec_iter_len()
> isn't as fast as mp_bvec_iter_len().
> 
> So advance by multi-page bvec's length instead of page size for bio_for_each_bvec().
> 
> More than 1% IOPS improvement can be observed in io_uring test on null_blk.

We've been there before, and I still insist that there is not good
reason ever to clamp the iteration to page size in bvec_iter_advance.
Callers that iterate over it already do that in the callers.

So here is a resurretion and rebase of my patch from back then to
just do the right thing:

diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 2c32e3e151a0..cf06c0647c4f 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -112,14 +112,15 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv,
 	}
 
 	while (bytes) {
-		unsigned iter_len = bvec_iter_len(bv, *iter);
-		unsigned len = min(bytes, iter_len);
+		const struct bio_vec *cur = bv + iter->bi_idx;
+		unsigned len = min3(bytes, iter->bi_size,
+				    cur->bv_len - iter->bi_bvec_done);
 
 		bytes -= len;
 		iter->bi_size -= len;
 		iter->bi_bvec_done += len;
 
-		if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
+		if (iter->bi_bvec_done == cur->bv_len) {
 			iter->bi_bvec_done = 0;
 			iter->bi_idx++;
 		}

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] block: advance by bvec's length for bio_for_each_bvec
  2019-02-28 13:58 ` Christoph Hellwig
@ 2019-02-28 15:20   ` Ming Lei
  2019-02-28 15:23   ` Jens Axboe
  1 sibling, 0 replies; 5+ messages in thread
From: Ming Lei @ 2019-02-28 15:20 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, linux-block, Omar Sandoval, Christoph Hellwig

On Thu, Feb 28, 2019 at 05:58:32AM -0800, Christoph Hellwig wrote:
> On Thu, Feb 28, 2019 at 11:24:21AM +0800, Ming Lei wrote:
> > bio_for_each_bvec is used in fast path of bio splitting and sg mapping,
> > and what we want to do is to iterate over multi-page bvecs, instead of pages.
> > However, bvec_iter_advance() is invisble for this requirement, and
> > always advance by page size.
> > 
> > This way isn't efficient for multipage bvec iterator, also bvec_iter_len()
> > isn't as fast as mp_bvec_iter_len().
> > 
> > So advance by multi-page bvec's length instead of page size for bio_for_each_bvec().
> > 
> > More than 1% IOPS improvement can be observed in io_uring test on null_blk.
> 
> We've been there before, and I still insist that there is not good
> reason ever to clamp the iteration to page size in bvec_iter_advance.
> Callers that iterate over it already do that in the callers.
> 
> So here is a resurretion and rebase of my patch from back then to
> just do the right thing:
> 
> diff --git a/include/linux/bvec.h b/include/linux/bvec.h
> index 2c32e3e151a0..cf06c0647c4f 100644
> --- a/include/linux/bvec.h
> +++ b/include/linux/bvec.h
> @@ -112,14 +112,15 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv,
>  	}
>  
>  	while (bytes) {
> -		unsigned iter_len = bvec_iter_len(bv, *iter);
> -		unsigned len = min(bytes, iter_len);
> +		const struct bio_vec *cur = bv + iter->bi_idx;
> +		unsigned len = min3(bytes, iter->bi_size,
> +				    cur->bv_len - iter->bi_bvec_done);
>  
>  		bytes -= len;
>  		iter->bi_size -= len;
>  		iter->bi_bvec_done += len;
>  
> -		if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
> +		if (iter->bi_bvec_done == cur->bv_len) {
>  			iter->bi_bvec_done = 0;
>  			iter->bi_idx++;
>  		}

Yeah, this change is the correct thing to do, and there shouldn't be
performance drop with this patch for Jens' test case, I guess.

Thanks,
Ming

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] block: advance by bvec's length for bio_for_each_bvec
  2019-02-28 13:58 ` Christoph Hellwig
  2019-02-28 15:20   ` Ming Lei
@ 2019-02-28 15:23   ` Jens Axboe
  1 sibling, 0 replies; 5+ messages in thread
From: Jens Axboe @ 2019-02-28 15:23 UTC (permalink / raw)
  To: Christoph Hellwig, Ming Lei; +Cc: linux-block, Omar Sandoval, Christoph Hellwig

On 2/28/19 6:58 AM, Christoph Hellwig wrote:
> On Thu, Feb 28, 2019 at 11:24:21AM +0800, Ming Lei wrote:
>> bio_for_each_bvec is used in fast path of bio splitting and sg mapping,
>> and what we want to do is to iterate over multi-page bvecs, instead of pages.
>> However, bvec_iter_advance() is invisble for this requirement, and
>> always advance by page size.
>>
>> This way isn't efficient for multipage bvec iterator, also bvec_iter_len()
>> isn't as fast as mp_bvec_iter_len().
>>
>> So advance by multi-page bvec's length instead of page size for bio_for_each_bvec().
>>
>> More than 1% IOPS improvement can be observed in io_uring test on null_blk.
> 
> We've been there before, and I still insist that there is not good
> reason ever to clamp the iteration to page size in bvec_iter_advance.
> Callers that iterate over it already do that in the callers.
> 
> So here is a resurretion and rebase of my patch from back then to
> just do the right thing:

Care to resend as a proper patch?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-02-28 15:23 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-28  3:24 [PATCH] block: advance by bvec's length for bio_for_each_bvec Ming Lei
2019-02-28 13:00 ` Jens Axboe
2019-02-28 13:58 ` Christoph Hellwig
2019-02-28 15:20   ` Ming Lei
2019-02-28 15:23   ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).