* [PATCH 1/5] block: add single bio async direct IO helper
2021-10-23 16:21 [PATCH 0/5] block optimisations Pavel Begunkov
@ 2021-10-23 16:21 ` Pavel Begunkov
2021-10-23 16:21 ` [PATCH 2/5] block: refactor bio_iov_bvec_set() Pavel Begunkov
` (5 subsequent siblings)
6 siblings, 0 replies; 21+ messages in thread
From: Pavel Begunkov @ 2021-10-23 16:21 UTC (permalink / raw)
To: linux-block; +Cc: Jens Axboe, Christoph Hellwig, Pavel Begunkov
As with __blkdev_direct_IO_simple(), we can implement direct IO more
efficiently if there is only one bio. Add __blkdev_direct_IO_async() and
blkdev_bio_end_io_async(). This patch brings me from 4.45-4.5 MIOPS with
nullblk to 4.7+.
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
block/fops.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 84 insertions(+), 3 deletions(-)
diff --git a/block/fops.c b/block/fops.c
index 396537598e3e..a7b328296912 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -305,6 +305,85 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
return ret;
}
+static void blkdev_bio_end_io_async(struct bio *bio)
+{
+ struct blkdev_dio *dio = container_of(bio, struct blkdev_dio, bio);
+ struct kiocb *iocb = dio->iocb;
+ ssize_t ret;
+
+ if (likely(!bio->bi_status)) {
+ ret = dio->size;
+ iocb->ki_pos += ret;
+ } else {
+ ret = blk_status_to_errno(bio->bi_status);
+ }
+
+ iocb->ki_complete(iocb, ret, 0);
+
+ if (dio->flags & DIO_SHOULD_DIRTY) {
+ bio_check_pages_dirty(bio);
+ } else {
+ bio_release_pages(bio, false);
+ bio_put(bio);
+ }
+}
+
+static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
+ struct iov_iter *iter,
+ unsigned int nr_pages)
+{
+ struct block_device *bdev = iocb->ki_filp->private_data;
+ struct blkdev_dio *dio;
+ struct bio *bio;
+ loff_t pos = iocb->ki_pos;
+ int ret = 0;
+
+ if ((pos | iov_iter_alignment(iter)) &
+ (bdev_logical_block_size(bdev) - 1))
+ return -EINVAL;
+
+ bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);
+ dio = container_of(bio, struct blkdev_dio, bio);
+ dio->flags = 0;
+ dio->iocb = iocb;
+ bio_set_dev(bio, bdev);
+ bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
+ bio->bi_write_hint = iocb->ki_hint;
+ bio->bi_end_io = blkdev_bio_end_io_async;
+ bio->bi_ioprio = iocb->ki_ioprio;
+
+ ret = bio_iov_iter_get_pages(bio, iter);
+ if (unlikely(ret)) {
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ return ret;
+ }
+ dio->size = bio->bi_iter.bi_size;
+
+ if (iov_iter_rw(iter) == READ) {
+ bio->bi_opf = REQ_OP_READ;
+ if (iter_is_iovec(iter)) {
+ dio->flags |= DIO_SHOULD_DIRTY;
+ bio_set_pages_dirty(bio);
+ }
+ } else {
+ bio->bi_opf = dio_bio_write_op(iocb);
+ task_io_account_write(bio->bi_iter.bi_size);
+ }
+
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ bio->bi_opf |= REQ_NOWAIT;
+
+ if (iocb->ki_flags & IOCB_HIPRI) {
+ bio_set_polled(bio, iocb);
+ submit_bio(bio);
+ WRITE_ONCE(iocb->private, bio);
+ } else {
+ submit_bio(bio);
+ }
+ return -EIOCBQUEUED;
+}
+
static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
{
unsigned int nr_pages;
@@ -313,9 +392,11 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
return 0;
nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
- if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS)
- return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
-
+ if (likely(nr_pages <= BIO_MAX_VECS)) {
+ if (is_sync_kiocb(iocb))
+ return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
+ return __blkdev_direct_IO_async(iocb, iter, nr_pages);
+ }
return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
}
--
2.33.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 2/5] block: refactor bio_iov_bvec_set()
2021-10-23 16:21 [PATCH 0/5] block optimisations Pavel Begunkov
2021-10-23 16:21 ` [PATCH 1/5] block: add single bio async direct IO helper Pavel Begunkov
@ 2021-10-23 16:21 ` Pavel Begunkov
2021-10-25 7:31 ` Christoph Hellwig
2021-10-23 16:21 ` [PATCH 3/5] block: avoid extra iter advance with async iocb Pavel Begunkov
` (4 subsequent siblings)
6 siblings, 1 reply; 21+ messages in thread
From: Pavel Begunkov @ 2021-10-23 16:21 UTC (permalink / raw)
To: linux-block; +Cc: Jens Axboe, Christoph Hellwig, Pavel Begunkov
Combine bio_iov_bvec_set() and bio_iov_bvec_set_append() and let the
caller to do iov_iter_advance(). Also get rid of __bio_iov_bvec_set(),
which was duplicated in the final binary, and replace a weird
iov_iter_truncate() of a temporal iter copy with min() better reflecting
the intention.
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
block/bio.c | 37 ++++++++++++++-----------------------
1 file changed, 14 insertions(+), 23 deletions(-)
diff --git a/block/bio.c b/block/bio.c
index 46a87c72d2b4..ead1f8a9ff5e 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1046,36 +1046,27 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty)
}
EXPORT_SYMBOL_GPL(__bio_release_pages);
-static void __bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
+static void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
{
+ size_t size = iov_iter_count(iter);
+
WARN_ON_ONCE(bio->bi_max_vecs);
+ if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
+ struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+ size_t max_sectors = queue_max_zone_append_sectors(q);
+
+ size = min(size, max_sectors << SECTOR_SHIFT);
+ }
+
bio->bi_vcnt = iter->nr_segs;
bio->bi_io_vec = (struct bio_vec *)iter->bvec;
bio->bi_iter.bi_bvec_done = iter->iov_offset;
- bio->bi_iter.bi_size = iter->count;
+ bio->bi_iter.bi_size = size;
bio_set_flag(bio, BIO_NO_PAGE_REF);
bio_set_flag(bio, BIO_CLONED);
}
-static int bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
-{
- __bio_iov_bvec_set(bio, iter);
- iov_iter_advance(iter, iter->count);
- return 0;
-}
-
-static int bio_iov_bvec_set_append(struct bio *bio, struct iov_iter *iter)
-{
- struct request_queue *q = bdev_get_queue(bio->bi_bdev);
- struct iov_iter i = *iter;
-
- iov_iter_truncate(&i, queue_max_zone_append_sectors(q) << 9);
- __bio_iov_bvec_set(bio, &i);
- iov_iter_advance(iter, i.count);
- return 0;
-}
-
static void bio_put_pages(struct page **pages, size_t size, size_t off)
{
size_t i, nr = DIV_ROUND_UP(size + (off & ~PAGE_MASK), PAGE_SIZE);
@@ -1217,9 +1208,9 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
int ret = 0;
if (iov_iter_is_bvec(iter)) {
- if (bio_op(bio) == REQ_OP_ZONE_APPEND)
- return bio_iov_bvec_set_append(bio, iter);
- return bio_iov_bvec_set(bio, iter);
+ bio_iov_bvec_set(bio, iter);
+ iov_iter_advance(iter, bio->bi_iter.bi_size);
+ return 0;
}
do {
--
2.33.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH 2/5] block: refactor bio_iov_bvec_set()
2021-10-23 16:21 ` [PATCH 2/5] block: refactor bio_iov_bvec_set() Pavel Begunkov
@ 2021-10-25 7:31 ` Christoph Hellwig
0 siblings, 0 replies; 21+ messages in thread
From: Christoph Hellwig @ 2021-10-25 7:31 UTC (permalink / raw)
To: Pavel Begunkov; +Cc: linux-block, Jens Axboe, Christoph Hellwig
On Sat, Oct 23, 2021 at 05:21:33PM +0100, Pavel Begunkov wrote:
> Combine bio_iov_bvec_set() and bio_iov_bvec_set_append() and let the
> caller to do iov_iter_advance(). Also get rid of __bio_iov_bvec_set(),
> which was duplicated in the final binary, and replace a weird
> iov_iter_truncate() of a temporal iter copy with min() better reflecting
> the intention.
>
> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Looks good,
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH 3/5] block: avoid extra iter advance with async iocb
2021-10-23 16:21 [PATCH 0/5] block optimisations Pavel Begunkov
2021-10-23 16:21 ` [PATCH 1/5] block: add single bio async direct IO helper Pavel Begunkov
2021-10-23 16:21 ` [PATCH 2/5] block: refactor bio_iov_bvec_set() Pavel Begunkov
@ 2021-10-23 16:21 ` Pavel Begunkov
2021-10-25 7:33 ` Christoph Hellwig
2021-10-23 16:21 ` [PATCH 4/5] block: kill unused polling bits in __blkdev_direct_IO() Pavel Begunkov
` (3 subsequent siblings)
6 siblings, 1 reply; 21+ messages in thread
From: Pavel Begunkov @ 2021-10-23 16:21 UTC (permalink / raw)
To: linux-block; +Cc: Jens Axboe, Christoph Hellwig, Pavel Begunkov
Nobody cares about iov iterators state if we return -EIOCBQUEUED, so as
the we now have __blkdev_direct_IO_async(), which gets pages only once,
we can skip expensive iov_iter_advance(). It's around 1-2% of all CPU
spent.
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
block/bio.c | 2 +-
block/fops.c | 20 +++++++++++++++-----
include/linux/bio.h | 1 +
3 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/block/bio.c b/block/bio.c
index ead1f8a9ff5e..15ab0d6d1c06 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1046,7 +1046,7 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty)
}
EXPORT_SYMBOL_GPL(__bio_release_pages);
-static void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
+void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
{
size_t size = iov_iter_count(iter);
diff --git a/block/fops.c b/block/fops.c
index a7b328296912..8800b0ad5c29 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -352,11 +352,21 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
bio->bi_end_io = blkdev_bio_end_io_async;
bio->bi_ioprio = iocb->ki_ioprio;
- ret = bio_iov_iter_get_pages(bio, iter);
- if (unlikely(ret)) {
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
- return ret;
+ if (!iov_iter_is_bvec(iter)) {
+ ret = bio_iov_iter_get_pages(bio, iter);
+ if (unlikely(ret)) {
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ return ret;
+ }
+ } else {
+ /*
+ * Users don't rely on the iterator being in any particular
+ * state for async I/O returning -EIOCBQUEUED, hence we can
+ * avoid expensive iov_iter_advance(). Bypass
+ * bio_iov_iter_get_pages() and set the bvec directly.
+ */
+ bio_iov_bvec_set(bio, iter);
}
dio->size = bio->bi_iter.bi_size;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index c88700d1bdc3..fe6bdfbbef66 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -417,6 +417,7 @@ int bio_add_zone_append_page(struct bio *bio, struct page *page,
void __bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int off);
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
+void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter);
void __bio_release_pages(struct bio *bio, bool mark_dirty);
extern void bio_set_pages_dirty(struct bio *bio);
extern void bio_check_pages_dirty(struct bio *bio);
--
2.33.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH 3/5] block: avoid extra iter advance with async iocb
2021-10-23 16:21 ` [PATCH 3/5] block: avoid extra iter advance with async iocb Pavel Begunkov
@ 2021-10-25 7:33 ` Christoph Hellwig
2021-10-25 11:10 ` Pavel Begunkov
0 siblings, 1 reply; 21+ messages in thread
From: Christoph Hellwig @ 2021-10-25 7:33 UTC (permalink / raw)
To: Pavel Begunkov; +Cc: linux-block, Jens Axboe, Christoph Hellwig
On Sat, Oct 23, 2021 at 05:21:34PM +0100, Pavel Begunkov wrote:
> --- a/block/fops.c
> +++ b/block/fops.c
> @@ -352,11 +352,21 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
> bio->bi_end_io = blkdev_bio_end_io_async;
> bio->bi_ioprio = iocb->ki_ioprio;
>
> - ret = bio_iov_iter_get_pages(bio, iter);
> - if (unlikely(ret)) {
> - bio->bi_status = BLK_STS_IOERR;
> - bio_endio(bio);
> - return ret;
> + if (!iov_iter_is_bvec(iter)) {
> + ret = bio_iov_iter_get_pages(bio, iter);
> + if (unlikely(ret)) {
> + bio->bi_status = BLK_STS_IOERR;
> + bio_endio(bio);
> + return ret;
> + }
Nit: I generally find it much nicer to read if simple if statements
don't use pointless negations.
> + } else {
> + /*
> + * Users don't rely on the iterator being in any particular
> + * state for async I/O returning -EIOCBQUEUED, hence we can
> + * avoid expensive iov_iter_advance(). Bypass
> + * bio_iov_iter_get_pages() and set the bvec directly.
> + */
> + bio_iov_bvec_set(bio, iter);
So if this optimization is so useful, please also do it for
non-bvec iov_iters, which is what 99% of the applications actually
use.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 3/5] block: avoid extra iter advance with async iocb
2021-10-25 7:33 ` Christoph Hellwig
@ 2021-10-25 11:10 ` Pavel Begunkov
0 siblings, 0 replies; 21+ messages in thread
From: Pavel Begunkov @ 2021-10-25 11:10 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-block, Jens Axboe
On 10/25/21 08:33, Christoph Hellwig wrote:
> On Sat, Oct 23, 2021 at 05:21:34PM +0100, Pavel Begunkov wrote:
>> --- a/block/fops.c
>> +++ b/block/fops.c
>> @@ -352,11 +352,21 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
>> bio->bi_end_io = blkdev_bio_end_io_async;
>> bio->bi_ioprio = iocb->ki_ioprio;
>>
>> - ret = bio_iov_iter_get_pages(bio, iter);
>> - if (unlikely(ret)) {
>> - bio->bi_status = BLK_STS_IOERR;
>> - bio_endio(bio);
>> - return ret;
>> + if (!iov_iter_is_bvec(iter)) {
>> + ret = bio_iov_iter_get_pages(bio, iter);
>> + if (unlikely(ret)) {
>> + bio->bi_status = BLK_STS_IOERR;
>> + bio_endio(bio);
>> + return ret;
>> + }
>
> Nit: I generally find it much nicer to read if simple if statements
> don't use pointless negations.
>
>> + } else {
>> + /*
>> + * Users don't rely on the iterator being in any particular
>> + * state for async I/O returning -EIOCBQUEUED, hence we can
>> + * avoid expensive iov_iter_advance(). Bypass
>> + * bio_iov_iter_get_pages() and set the bvec directly.
>> + */
>> + bio_iov_bvec_set(bio, iter);
>
> So if this optimization is so useful, please also do it for
> non-bvec iov_iters, which is what 99% of the applications actually
> use.
It's an async path, so mainly io_uring or aio, I don't think there
is much profit in doing that for iov, especially behind iov -> bvec
translation with page referencing.
Could've been done nonetheless, but what I think about looks too
ugly because of the loop inside of bio_iov_iter_get_pages(). Don't
think the sketch below is viable, any better ideas?
ssize_t __bio_iov_iter_get_pages() {
...
/* not advancing */
return size;
}
do {
if (bio_op(bio) == REQ_OP_ZONE_APPEND)
ret = __bio_iov_append_get_pages(bio, iter);
else
ret = __bio_iov_iter_get_pages(bio, iter);
if (ret < 0)
break;
iov_iter_advance(ret);
} while (iov_iter_count(iter) && !bio_full(bio, 0));
and copy paste into fops.c
do {
if (bio_op(bio) == REQ_OP_ZONE_APPEND)
ret = __bio_iov_append_get_pages(bio, iter);
else
ret = __bio_iov_iter_get_pages(bio, iter);
if (ret < 0 || iov_iter_count(iter) == ret || bio_full(bio, 0))
break;
iov_iter_advance(ret);
} while (1);
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH 4/5] block: kill unused polling bits in __blkdev_direct_IO()
2021-10-23 16:21 [PATCH 0/5] block optimisations Pavel Begunkov
` (2 preceding siblings ...)
2021-10-23 16:21 ` [PATCH 3/5] block: avoid extra iter advance with async iocb Pavel Begunkov
@ 2021-10-23 16:21 ` Pavel Begunkov
2021-10-23 16:46 ` Pavel Begunkov
2021-10-25 7:35 ` Christoph Hellwig
2021-10-23 16:21 ` [PATCH 5/5] block: add async version of bio_set_polled Pavel Begunkov
` (2 subsequent siblings)
6 siblings, 2 replies; 21+ messages in thread
From: Pavel Begunkov @ 2021-10-23 16:21 UTC (permalink / raw)
To: linux-block; +Cc: Jens Axboe, Christoph Hellwig, Pavel Begunkov
With addition of __blkdev_direct_IO_async(), __blkdev_direct_IO() now
serves only multio-bio I/O, which we don't poll. Now we can remove
anything related to I/O polling from it.
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
block/fops.c | 20 +++-----------------
1 file changed, 3 insertions(+), 17 deletions(-)
diff --git a/block/fops.c b/block/fops.c
index 8800b0ad5c29..997904963a9d 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -190,7 +190,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
struct blk_plug plug;
struct blkdev_dio *dio;
struct bio *bio;
- bool do_poll = (iocb->ki_flags & IOCB_HIPRI);
bool is_read = (iov_iter_rw(iter) == READ), is_sync;
loff_t pos = iocb->ki_pos;
int ret = 0;
@@ -216,12 +215,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
if (is_read && iter_is_iovec(iter))
dio->flags |= DIO_SHOULD_DIRTY;
- /*
- * Don't plug for HIPRI/polled IO, as those should go straight
- * to issue
- */
- if (!(iocb->ki_flags & IOCB_HIPRI))
- blk_start_plug(&plug);
+ blk_start_plug(&plug);
for (;;) {
bio_set_dev(bio, bdev);
@@ -254,11 +248,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS);
if (!nr_pages) {
- if (do_poll)
- bio_set_polled(bio, iocb);
submit_bio(bio);
- if (do_poll)
- WRITE_ONCE(iocb->private, bio);
break;
}
if (!(dio->flags & DIO_MULTI_BIO)) {
@@ -271,7 +261,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
bio_get(bio);
dio->flags |= DIO_MULTI_BIO;
atomic_set(&dio->ref, 2);
- do_poll = false;
} else {
atomic_inc(&dio->ref);
}
@@ -280,8 +269,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
bio = bio_alloc(GFP_KERNEL, nr_pages);
}
- if (!(iocb->ki_flags & IOCB_HIPRI))
- blk_finish_plug(&plug);
+ blk_finish_plug(&plug);
if (!is_sync)
return -EIOCBQUEUED;
@@ -290,9 +278,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
set_current_state(TASK_UNINTERRUPTIBLE);
if (!READ_ONCE(dio->waiter))
break;
-
- if (!do_poll || !bio_poll(bio, NULL, 0))
- blk_io_schedule();
+ blk_io_schedule();
}
__set_current_state(TASK_RUNNING);
--
2.33.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH 4/5] block: kill unused polling bits in __blkdev_direct_IO()
2021-10-23 16:21 ` [PATCH 4/5] block: kill unused polling bits in __blkdev_direct_IO() Pavel Begunkov
@ 2021-10-23 16:46 ` Pavel Begunkov
2021-10-24 15:09 ` Jens Axboe
2021-10-25 7:35 ` Christoph Hellwig
1 sibling, 1 reply; 21+ messages in thread
From: Pavel Begunkov @ 2021-10-23 16:46 UTC (permalink / raw)
To: linux-block; +Cc: Jens Axboe, Christoph Hellwig
On 10/23/21 17:21, Pavel Begunkov wrote:
> With addition of __blkdev_direct_IO_async(), __blkdev_direct_IO() now
> serves only multio-bio I/O, which we don't poll. Now we can remove
> anything related to I/O polling from it.
>
> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> ---
> block/fops.c | 20 +++-----------------
> 1 file changed, 3 insertions(+), 17 deletions(-)
>
> diff --git a/block/fops.c b/block/fops.c
> index 8800b0ad5c29..997904963a9d 100644
> --- a/block/fops.c
> +++ b/block/fops.c
> @@ -190,7 +190,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
> struct blk_plug plug;
> struct blkdev_dio *dio;
> struct bio *bio;
> - bool do_poll = (iocb->ki_flags & IOCB_HIPRI);
> bool is_read = (iov_iter_rw(iter) == READ), is_sync;
> loff_t pos = iocb->ki_pos;
> int ret = 0;
> @@ -216,12 +215,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
> if (is_read && iter_is_iovec(iter))
> dio->flags |= DIO_SHOULD_DIRTY;
>
> - /*
> - * Don't plug for HIPRI/polled IO, as those should go straight
> - * to issue
> - */
> - if (!(iocb->ki_flags & IOCB_HIPRI))
> - blk_start_plug(&plug);
I'm not sure, do we want to leave it conditional here?
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 4/5] block: kill unused polling bits in __blkdev_direct_IO()
2021-10-23 16:46 ` Pavel Begunkov
@ 2021-10-24 15:09 ` Jens Axboe
2021-10-24 17:17 ` Pavel Begunkov
0 siblings, 1 reply; 21+ messages in thread
From: Jens Axboe @ 2021-10-24 15:09 UTC (permalink / raw)
To: Pavel Begunkov, linux-block; +Cc: Christoph Hellwig
On 10/23/21 10:46 AM, Pavel Begunkov wrote:
> On 10/23/21 17:21, Pavel Begunkov wrote:
>> With addition of __blkdev_direct_IO_async(), __blkdev_direct_IO() now
>> serves only multio-bio I/O, which we don't poll. Now we can remove
>> anything related to I/O polling from it.
>>
>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>> ---
>> block/fops.c | 20 +++-----------------
>> 1 file changed, 3 insertions(+), 17 deletions(-)
>>
>> diff --git a/block/fops.c b/block/fops.c
>> index 8800b0ad5c29..997904963a9d 100644
>> --- a/block/fops.c
>> +++ b/block/fops.c
>> @@ -190,7 +190,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
>> struct blk_plug plug;
>> struct blkdev_dio *dio;
>> struct bio *bio;
>> - bool do_poll = (iocb->ki_flags & IOCB_HIPRI);
>> bool is_read = (iov_iter_rw(iter) == READ), is_sync;
>> loff_t pos = iocb->ki_pos;
>> int ret = 0;
>> @@ -216,12 +215,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
>> if (is_read && iter_is_iovec(iter))
>> dio->flags |= DIO_SHOULD_DIRTY;
>>
>> - /*
>> - * Don't plug for HIPRI/polled IO, as those should go straight
>> - * to issue
>> - */
>> - if (!(iocb->ki_flags & IOCB_HIPRI))
>> - blk_start_plug(&plug);
>
> I'm not sure, do we want to leave it conditional here?
For async polled there's only one user and that user plug already...
--
Jens Axboe
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 4/5] block: kill unused polling bits in __blkdev_direct_IO()
2021-10-24 15:09 ` Jens Axboe
@ 2021-10-24 17:17 ` Pavel Begunkov
2021-10-24 18:11 ` Jens Axboe
0 siblings, 1 reply; 21+ messages in thread
From: Pavel Begunkov @ 2021-10-24 17:17 UTC (permalink / raw)
To: Jens Axboe, linux-block; +Cc: Christoph Hellwig
On 10/24/21 16:09, Jens Axboe wrote:
> On 10/23/21 10:46 AM, Pavel Begunkov wrote:
>> On 10/23/21 17:21, Pavel Begunkov wrote:
>>> With addition of __blkdev_direct_IO_async(), __blkdev_direct_IO() now
>>> serves only multio-bio I/O, which we don't poll. Now we can remove
>>> anything related to I/O polling from it.
>>>
>>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>>> ---
>>> block/fops.c | 20 +++-----------------
>>> 1 file changed, 3 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/block/fops.c b/block/fops.c
>>> index 8800b0ad5c29..997904963a9d 100644
>>> --- a/block/fops.c
>>> +++ b/block/fops.c
>>> @@ -190,7 +190,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
>>> struct blk_plug plug;
>>> struct blkdev_dio *dio;
>>> struct bio *bio;
>>> - bool do_poll = (iocb->ki_flags & IOCB_HIPRI);
>>> bool is_read = (iov_iter_rw(iter) == READ), is_sync;
>>> loff_t pos = iocb->ki_pos;
>>> int ret = 0;
>>> @@ -216,12 +215,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
>>> if (is_read && iter_is_iovec(iter))
>>> dio->flags |= DIO_SHOULD_DIRTY;
>>>
>>> - /*
>>> - * Don't plug for HIPRI/polled IO, as those should go straight
>>> - * to issue
>>> - */
>>> - if (!(iocb->ki_flags & IOCB_HIPRI))
>>> - blk_start_plug(&plug);
>>
>> I'm not sure, do we want to leave it conditional here?
>
> For async polled there's only one user and that user plug already...
It's __blkdev_direct_IO() though, covers both sync and async
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 4/5] block: kill unused polling bits in __blkdev_direct_IO()
2021-10-24 17:17 ` Pavel Begunkov
@ 2021-10-24 18:11 ` Jens Axboe
0 siblings, 0 replies; 21+ messages in thread
From: Jens Axboe @ 2021-10-24 18:11 UTC (permalink / raw)
To: Pavel Begunkov; +Cc: linux-block, Christoph Hellwig
On Oct 24, 2021, at 12:18 PM, Pavel Begunkov <asml.silence@gmail.com> wrote:
>
> On 10/24/21 16:09, Jens Axboe wrote:
>>> On 10/23/21 10:46 AM, Pavel Begunkov wrote:
>>> On 10/23/21 17:21, Pavel Begunkov wrote:
>>>> With addition of __blkdev_direct_IO_async(), __blkdev_direct_IO() now
>>>> serves only multio-bio I/O, which we don't poll. Now we can remove
>>>> anything related to I/O polling from it.
>>>>
>>>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>>>> ---
>>>> block/fops.c | 20 +++-----------------
>>>> 1 file changed, 3 insertions(+), 17 deletions(-)
>>>>
>>>> diff --git a/block/fops.c b/block/fops.c
>>>> index 8800b0ad5c29..997904963a9d 100644
>>>> --- a/block/fops.c
>>>> +++ b/block/fops.c
>>>> @@ -190,7 +190,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
>>>> struct blk_plug plug;
>>>> struct blkdev_dio *dio;
>>>> struct bio *bio;
>>>> - bool do_poll = (iocb->ki_flags & IOCB_HIPRI);
>>>> bool is_read = (iov_iter_rw(iter) == READ), is_sync;
>>>> loff_t pos = iocb->ki_pos;
>>>> int ret = 0;
>>>> @@ -216,12 +215,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
>>>> if (is_read && iter_is_iovec(iter))
>>>> dio->flags |= DIO_SHOULD_DIRTY;
>>>> - /*
>>>> - * Don't plug for HIPRI/polled IO, as those should go straight
>>>> - * to issue
>>>> - */
>>>> - if (!(iocb->ki_flags & IOCB_HIPRI))
>>>> - blk_start_plug(&plug);
>>>
>>> I'm not sure, do we want to leave it conditional here?
>> For async polled there's only one user and that user plug already...
>
> It's __blkdev_direct_IO() though, covers both sync and async
Pointless to plug for sync, though.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 4/5] block: kill unused polling bits in __blkdev_direct_IO()
2021-10-23 16:21 ` [PATCH 4/5] block: kill unused polling bits in __blkdev_direct_IO() Pavel Begunkov
2021-10-23 16:46 ` Pavel Begunkov
@ 2021-10-25 7:35 ` Christoph Hellwig
2021-10-25 10:12 ` Pavel Begunkov
1 sibling, 1 reply; 21+ messages in thread
From: Christoph Hellwig @ 2021-10-25 7:35 UTC (permalink / raw)
To: Pavel Begunkov; +Cc: linux-block, Jens Axboe, Christoph Hellwig
On Sat, Oct 23, 2021 at 05:21:35PM +0100, Pavel Begunkov wrote:
> With addition of __blkdev_direct_IO_async(), __blkdev_direct_IO() now
> serves only multio-bio I/O, which we don't poll. Now we can remove
> anything related to I/O polling from it.
Looks good, but please also remove the entire non-multi bio support
while you're at it.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 4/5] block: kill unused polling bits in __blkdev_direct_IO()
2021-10-25 7:35 ` Christoph Hellwig
@ 2021-10-25 10:12 ` Pavel Begunkov
2021-10-25 10:27 ` Pavel Begunkov
0 siblings, 1 reply; 21+ messages in thread
From: Pavel Begunkov @ 2021-10-25 10:12 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-block, Jens Axboe
On 10/25/21 08:35, Christoph Hellwig wrote:
> On Sat, Oct 23, 2021 at 05:21:35PM +0100, Pavel Begunkov wrote:
>> With addition of __blkdev_direct_IO_async(), __blkdev_direct_IO() now
>> serves only multio-bio I/O, which we don't poll. Now we can remove
>> anything related to I/O polling from it.
>
> Looks good, but please also remove the entire non-multi bio support
> while you're at it.
ok, will include into the series
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 4/5] block: kill unused polling bits in __blkdev_direct_IO()
2021-10-25 10:12 ` Pavel Begunkov
@ 2021-10-25 10:27 ` Pavel Begunkov
2021-10-27 6:47 ` Christoph Hellwig
0 siblings, 1 reply; 21+ messages in thread
From: Pavel Begunkov @ 2021-10-25 10:27 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-block, Jens Axboe
On 10/25/21 11:12, Pavel Begunkov wrote:
> On 10/25/21 08:35, Christoph Hellwig wrote:
>> On Sat, Oct 23, 2021 at 05:21:35PM +0100, Pavel Begunkov wrote:
>>> With addition of __blkdev_direct_IO_async(), __blkdev_direct_IO() now
>>> serves only multio-bio I/O, which we don't poll. Now we can remove
>>> anything related to I/O polling from it.
>>
>> Looks good, but please also remove the entire non-multi bio support
>> while you're at it.
>
> ok, will include into the series
You mean simplifying __blkdev_direct_IO(), right? E.g. as mentioned
removing DIO_MULTI_BIO.
There is also some potential for doing bio_iov_vecs_to_alloc()
only once and doing some refactoring on top of it, but would
need extra legwork that is better to go separately.
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 4/5] block: kill unused polling bits in __blkdev_direct_IO()
2021-10-25 10:27 ` Pavel Begunkov
@ 2021-10-27 6:47 ` Christoph Hellwig
0 siblings, 0 replies; 21+ messages in thread
From: Christoph Hellwig @ 2021-10-27 6:47 UTC (permalink / raw)
To: Pavel Begunkov; +Cc: Christoph Hellwig, linux-block, Jens Axboe
On Mon, Oct 25, 2021 at 11:27:12AM +0100, Pavel Begunkov wrote:
> On 10/25/21 11:12, Pavel Begunkov wrote:
> > On 10/25/21 08:35, Christoph Hellwig wrote:
> > > On Sat, Oct 23, 2021 at 05:21:35PM +0100, Pavel Begunkov wrote:
> > > > With addition of __blkdev_direct_IO_async(), __blkdev_direct_IO() now
> > > > serves only multio-bio I/O, which we don't poll. Now we can remove
> > > > anything related to I/O polling from it.
> > >
> > > Looks good, but please also remove the entire non-multi bio support
> > > while you're at it.
> >
> > ok, will include into the series
>
> You mean simplifying __blkdev_direct_IO(), right? E.g. as mentioned
> removing DIO_MULTI_BIO.
Yes.
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH 5/5] block: add async version of bio_set_polled
2021-10-23 16:21 [PATCH 0/5] block optimisations Pavel Begunkov
` (3 preceding siblings ...)
2021-10-23 16:21 ` [PATCH 4/5] block: kill unused polling bits in __blkdev_direct_IO() Pavel Begunkov
@ 2021-10-23 16:21 ` Pavel Begunkov
2021-10-25 7:36 ` Christoph Hellwig
2021-10-25 7:28 ` [PATCH 0/5] block optimisations Christoph Hellwig
2021-10-26 23:30 ` (subset) " Jens Axboe
6 siblings, 1 reply; 21+ messages in thread
From: Pavel Begunkov @ 2021-10-23 16:21 UTC (permalink / raw)
To: linux-block; +Cc: Jens Axboe, Christoph Hellwig, Pavel Begunkov
If we know that a iocb is async we can optimise bio_set_polled() a bit,
add a new helper bio_set_polled_async().
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
block/fops.c | 7 +++----
include/linux/bio.h | 5 +++++
2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/block/fops.c b/block/fops.c
index 997904963a9d..9cb436de92bb 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -367,14 +367,13 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
task_io_account_write(bio->bi_iter.bi_size);
}
- if (iocb->ki_flags & IOCB_NOWAIT)
- bio->bi_opf |= REQ_NOWAIT;
-
if (iocb->ki_flags & IOCB_HIPRI) {
- bio_set_polled(bio, iocb);
+ bio_set_polled_async(bio, iocb);
submit_bio(bio);
WRITE_ONCE(iocb->private, bio);
} else {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ bio->bi_opf |= REQ_NOWAIT;
submit_bio(bio);
}
return -EIOCBQUEUED;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index fe6bdfbbef66..b64161473f3e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -738,6 +738,11 @@ static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb)
bio->bi_opf |= REQ_NOWAIT;
}
+static inline void bio_set_polled_async(struct bio *bio, struct kiocb *kiocb)
+{
+ bio->bi_opf |= REQ_POLLED | REQ_NOWAIT;
+}
+
struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp);
#endif /* __LINUX_BIO_H */
--
2.33.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH 5/5] block: add async version of bio_set_polled
2021-10-23 16:21 ` [PATCH 5/5] block: add async version of bio_set_polled Pavel Begunkov
@ 2021-10-25 7:36 ` Christoph Hellwig
2021-10-25 10:20 ` Pavel Begunkov
0 siblings, 1 reply; 21+ messages in thread
From: Christoph Hellwig @ 2021-10-25 7:36 UTC (permalink / raw)
To: Pavel Begunkov; +Cc: linux-block, Jens Axboe, Christoph Hellwig
On Sat, Oct 23, 2021 at 05:21:36PM +0100, Pavel Begunkov wrote:
> +static inline void bio_set_polled_async(struct bio *bio, struct kiocb *kiocb)
> +{
> + bio->bi_opf |= REQ_POLLED | REQ_NOWAIT;
> +}
As mentioned last time I'm a little skeptical of this optimization. But
if you an Jens thing it is worth it just drop this helper and assign
the flags directly.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 5/5] block: add async version of bio_set_polled
2021-10-25 7:36 ` Christoph Hellwig
@ 2021-10-25 10:20 ` Pavel Begunkov
0 siblings, 0 replies; 21+ messages in thread
From: Pavel Begunkov @ 2021-10-25 10:20 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-block, Jens Axboe
On 10/25/21 08:36, Christoph Hellwig wrote:
> On Sat, Oct 23, 2021 at 05:21:36PM +0100, Pavel Begunkov wrote:
>> +static inline void bio_set_polled_async(struct bio *bio, struct kiocb *kiocb)
>> +{
>> + bio->bi_opf |= REQ_POLLED | REQ_NOWAIT;
>> +}
>
> As mentioned last time I'm a little skeptical of this optimization. But
> if you an Jens thing it is worth it just drop this helper and assign
> the flags directly.
Ok. I'll keep it last in the series, looks Jens doesn't mind
applying patchsets partially.
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 0/5] block optimisations
2021-10-23 16:21 [PATCH 0/5] block optimisations Pavel Begunkov
` (4 preceding siblings ...)
2021-10-23 16:21 ` [PATCH 5/5] block: add async version of bio_set_polled Pavel Begunkov
@ 2021-10-25 7:28 ` Christoph Hellwig
2021-10-26 23:30 ` (subset) " Jens Axboe
6 siblings, 0 replies; 21+ messages in thread
From: Christoph Hellwig @ 2021-10-25 7:28 UTC (permalink / raw)
To: Pavel Begunkov; +Cc: linux-block, Jens Axboe, Christoph Hellwig
On Sat, Oct 23, 2021 at 05:21:31PM +0100, Pavel Begunkov wrote:
> I don't find a good way how to deduplicate __blkdev_direct_IO_async(),
> there are small differences in implementation. If that's fine I'd
> suggest to do it afterwards, anyway I want to brush up
> __blkdev_direct_IO(), e.g. remove effectively unnecessary DIO_MULTI_BIO
> flag.
If we add another code path we should at last drop the DIO_MULTI_BIO
code to keep the complexity down in that function.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: (subset) [PATCH 0/5] block optimisations
2021-10-23 16:21 [PATCH 0/5] block optimisations Pavel Begunkov
` (5 preceding siblings ...)
2021-10-25 7:28 ` [PATCH 0/5] block optimisations Christoph Hellwig
@ 2021-10-26 23:30 ` Jens Axboe
6 siblings, 0 replies; 21+ messages in thread
From: Jens Axboe @ 2021-10-26 23:30 UTC (permalink / raw)
To: Pavel Begunkov, linux-block; +Cc: Christoph Hellwig
On Sat, 23 Oct 2021 17:21:31 +0100, Pavel Begunkov wrote:
> Another split off from the "block optimisation round" series focusing
> on fops.c. It addresses Cristoph's review, the main change is
> setting bvec directly instead of passing a "hint"
> to bio_iov_iter_get_pages() in 3/5, and a prep patch 2/5.
>
> I don't find a good way how to deduplicate __blkdev_direct_IO_async(),
> there are small differences in implementation. If that's fine I'd
> suggest to do it afterwards, anyway I want to brush up
> __blkdev_direct_IO(), e.g. remove effectively unnecessary DIO_MULTI_BIO
> flag.
>
> [...]
Applied, thanks!
[1/5] block: add single bio async direct IO helper
(no commit info)
[2/5] block: refactor bio_iov_bvec_set()
(no commit info)
Best regards,
--
Jens Axboe
^ permalink raw reply [flat|nested] 21+ messages in thread