* [PATCH 1/9] Use RWF_* flags for AIO operations
2017-04-11 14:26 [PATCH 0/9 v5] No wait AIO Goldwyn Rodrigues
@ 2017-04-11 14:26 ` Goldwyn Rodrigues
[not found] ` <20170411142619.27205-2-rgoldwyn-l3A5Bk7waGM@public.gmane.org>
2017-04-11 14:26 ` [PATCH 2/9] nowait aio: Introduce RWF_NOWAIT Goldwyn Rodrigues
` (7 subsequent siblings)
8 siblings, 1 reply; 14+ messages in thread
From: Goldwyn Rodrigues @ 2017-04-11 14:26 UTC (permalink / raw)
To: linux-fsdevel
Cc: jack, hch, linux-block, linux-btrfs, linux-ext4, linux-xfs, sagi,
avi, axboe, linux-api, willy, tom.leiming, Goldwyn Rodrigues
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
RWF_* flags is used for preadv2/pwritev2 calls. Port to use
it for aio operations as well. For this, aio_rw_flags is
introduced in struct iocb (using aio_reserved1) which will
carry these flags.
This is a precursor to the nowait AIO calls.
Note, the only place RWF_HIPRI comes in effect is dio_await_one().
All the rest of the locations, aio code return -EIOCBQUEUED before the
checks for RWF_HIPRI.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
fs/aio.c | 15 ++++++++++++++-
include/uapi/linux/aio_abi.h | 2 +-
2 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index f52d925ee259..7cd1443ad1c8 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1541,11 +1541,17 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
ssize_t ret;
/* enforce forwards compatibility on users */
- if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2)) {
+ if (unlikely(iocb->aio_reserved2)) {
pr_debug("EINVAL: reserve field set\n");
return -EINVAL;
}
+ if (unlikely(iocb->aio_rw_flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC))) {
+ pr_debug("EINVAL: aio_rw_flags set with incompatible flags\n");
+ return -EINVAL;
+ }
+
+
/* prevent overflows */
if (unlikely(
(iocb->aio_buf != (unsigned long)iocb->aio_buf) ||
@@ -1586,6 +1592,13 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
req->common.ki_flags |= IOCB_EVENTFD;
}
+ if (iocb->aio_rw_flags & RWF_HIPRI)
+ req->common.ki_flags |= IOCB_HIPRI;
+ if (iocb->aio_rw_flags & RWF_DSYNC)
+ req->common.ki_flags |= IOCB_DSYNC;
+ if (iocb->aio_rw_flags & RWF_SYNC)
+ req->common.ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+
ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
if (unlikely(ret)) {
pr_debug("EFAULT: aio_key\n");
diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
index bb2554f7fbd1..a2d4a8ac94ca 100644
--- a/include/uapi/linux/aio_abi.h
+++ b/include/uapi/linux/aio_abi.h
@@ -79,7 +79,7 @@ struct io_event {
struct iocb {
/* these are internal to the kernel/libc. */
__u64 aio_data; /* data to be returned in event's data */
- __u32 PADDED(aio_key, aio_reserved1);
+ __u32 PADDED(aio_key, aio_rw_flags);
/* the kernel sets aio_key to the req # */
/* common fields */
--
2.12.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 2/9] nowait aio: Introduce RWF_NOWAIT
2017-04-11 14:26 [PATCH 0/9 v5] No wait AIO Goldwyn Rodrigues
2017-04-11 14:26 ` [PATCH 1/9] Use RWF_* flags for AIO operations Goldwyn Rodrigues
@ 2017-04-11 14:26 ` Goldwyn Rodrigues
2017-04-11 14:26 ` [PATCH 3/9] nowait aio: return if direct write will trigger writeback Goldwyn Rodrigues
` (6 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Goldwyn Rodrigues @ 2017-04-11 14:26 UTC (permalink / raw)
To: linux-fsdevel
Cc: jack, hch, linux-block, linux-btrfs, linux-ext4, linux-xfs, sagi,
avi, axboe, linux-api, willy, tom.leiming, Goldwyn Rodrigues
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
This flag informs kernel to bail out if an AIO request will block
for reasons such as file allocations, or a writeback triggered,
or would block while allocating requests while performing
direct I/O.
Unfortunately, aio_flags is not checked for validity, which would
break existing applications which have it set to anything besides zero
or IOCB_FLAG_RESFD. So, we are using aio_reserved1 and renaming it
to aio_rw_flags.
RWF_NOWAIT is translated to IOCB_NOWAIT for iocb->ki_flags.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
fs/aio.c | 12 ++++++++++--
include/linux/fs.h | 1 +
include/uapi/linux/fs.h | 1 +
3 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index 7cd1443ad1c8..948af3729d69 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1546,12 +1546,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
return -EINVAL;
}
- if (unlikely(iocb->aio_rw_flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC))) {
+ if (unlikely(iocb->aio_rw_flags &
+ ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT))) {
pr_debug("EINVAL: aio_rw_flags set with incompatible flags\n");
return -EINVAL;
}
-
/* prevent overflows */
if (unlikely(
(iocb->aio_buf != (unsigned long)iocb->aio_buf) ||
@@ -1598,6 +1598,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
req->common.ki_flags |= IOCB_DSYNC;
if (iocb->aio_rw_flags & RWF_SYNC)
req->common.ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+ if (iocb->aio_rw_flags & RWF_NOWAIT) {
+ if (!(iocb->aio_lio_opcode & IOCB_CMD_PWRITE) ||
+ !(req->common.ki_flags & IOCB_DIRECT)) {
+ ret = -EINVAL;
+ goto out_put_req;
+ }
+ req->common.ki_flags |= IOCB_NOWAIT;
+ }
ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
if (unlikely(ret)) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7251f7bb45e8..e8d93462529c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -270,6 +270,7 @@ struct writeback_control;
#define IOCB_DSYNC (1 << 4)
#define IOCB_SYNC (1 << 5)
#define IOCB_WRITE (1 << 6)
+#define IOCB_NOWAIT (1 << 7)
struct kiocb {
struct file *ki_filp;
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 048a85e9f017..7bcaef101876 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -347,5 +347,6 @@ struct fscrypt_policy {
#define RWF_HIPRI 0x00000001 /* high priority request, poll if possible */
#define RWF_DSYNC 0x00000002 /* per-IO O_DSYNC */
#define RWF_SYNC 0x00000004 /* per-IO O_SYNC */
+#define RWF_NOWAIT 0x00000008 /* per-IO, return -EAGAIN if operation would block */
#endif /* _UAPI_LINUX_FS_H */
--
2.12.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 3/9] nowait aio: return if direct write will trigger writeback
2017-04-11 14:26 [PATCH 0/9 v5] No wait AIO Goldwyn Rodrigues
2017-04-11 14:26 ` [PATCH 1/9] Use RWF_* flags for AIO operations Goldwyn Rodrigues
2017-04-11 14:26 ` [PATCH 2/9] nowait aio: Introduce RWF_NOWAIT Goldwyn Rodrigues
@ 2017-04-11 14:26 ` Goldwyn Rodrigues
2017-04-11 14:26 ` [PATCH 4/9] nowait-aio: Introduce IOMAP_NOWAIT Goldwyn Rodrigues
` (5 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Goldwyn Rodrigues @ 2017-04-11 14:26 UTC (permalink / raw)
To: linux-fsdevel
Cc: jack, hch, linux-block, linux-btrfs, linux-ext4, linux-xfs, sagi,
avi, axboe, linux-api, willy, tom.leiming, Goldwyn Rodrigues
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
Find out if the write will trigger a wait due to writeback. If yes,
return -EAGAIN.
This introduces a new function filemap_range_has_page() which
returns true if the file's mapping has a page within the range
mentioned.
Return -EINVAL for buffered AIO: there are multiple causes of
delay such as page locks, dirty throttling logic, page loading
from disk etc. which cannot be taken care of.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
include/linux/fs.h | 2 ++
mm/filemap.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 49 insertions(+), 3 deletions(-)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e8d93462529c..4a30e8f3ce11 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2514,6 +2514,8 @@ extern int filemap_fdatawait(struct address_space *);
extern void filemap_fdatawait_keep_errors(struct address_space *);
extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
loff_t lend);
+extern int filemap_range_has_page(struct address_space *, loff_t lstart,
+ loff_t lend);
extern int filemap_write_and_wait(struct address_space *mapping);
extern int filemap_write_and_wait_range(struct address_space *mapping,
loff_t lstart, loff_t lend);
diff --git a/mm/filemap.c b/mm/filemap.c
index 1694623a6289..46e01b8f6880 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -376,6 +376,39 @@ int filemap_flush(struct address_space *mapping)
}
EXPORT_SYMBOL(filemap_flush);
+/**
+ * filemap_range_has_page - check if a page exists in range.
+ * @mapping: address space structure to wait for
+ * @start_byte: offset in bytes where the range starts
+ * @end_byte: offset in bytes where the range ends (inclusive)
+ *
+ * Find at least one page in the range supplied, usually used to check if
+ * direct writing in this range will trigger a writeback.
+ */
+int filemap_range_has_page(struct address_space *mapping,
+ loff_t start_byte, loff_t end_byte)
+{
+ pgoff_t index = start_byte >> PAGE_SHIFT;
+ pgoff_t end = end_byte >> PAGE_SHIFT;
+ struct pagevec pvec;
+ int ret;
+
+ if (end_byte < start_byte)
+ return 0;
+
+ if (mapping->nrpages == 0)
+ return 0;
+
+ pagevec_init(&pvec, 0);
+ ret = pagevec_lookup(&pvec, mapping, index, 1);
+ if (!ret)
+ return 0;
+ ret = (pvec.pages[0]->index <= end);
+ pagevec_release(&pvec);
+ return ret;
+}
+EXPORT_SYMBOL(filemap_range_has_page);
+
static int __filemap_fdatawait_range(struct address_space *mapping,
loff_t start_byte, loff_t end_byte)
{
@@ -2640,6 +2673,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
pos = iocb->ki_pos;
+ if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
+ return -EINVAL;
+
if (limit != RLIM_INFINITY) {
if (iocb->ki_pos >= limit) {
send_sig(SIGXFSZ, current, 0);
@@ -2709,9 +2745,17 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
write_len = iov_iter_count(from);
end = (pos + write_len - 1) >> PAGE_SHIFT;
- written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
- if (written)
- goto out;
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ /* If there are pages to writeback, return */
+ if (filemap_range_has_page(inode->i_mapping, pos,
+ pos + iov_iter_count(from)))
+ return -EAGAIN;
+ } else {
+ written = filemap_write_and_wait_range(mapping, pos,
+ pos + write_len - 1);
+ if (written)
+ goto out;
+ }
/*
* After a write we want buffered reads to be sure to go to disk to get
--
2.12.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 4/9] nowait-aio: Introduce IOMAP_NOWAIT
2017-04-11 14:26 [PATCH 0/9 v5] No wait AIO Goldwyn Rodrigues
` (2 preceding siblings ...)
2017-04-11 14:26 ` [PATCH 3/9] nowait aio: return if direct write will trigger writeback Goldwyn Rodrigues
@ 2017-04-11 14:26 ` Goldwyn Rodrigues
2017-04-11 14:26 ` [PATCH 5/9] nowait aio: return on congested block device Goldwyn Rodrigues
` (4 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Goldwyn Rodrigues @ 2017-04-11 14:26 UTC (permalink / raw)
To: linux-fsdevel
Cc: jack, hch, linux-block, linux-btrfs, linux-ext4, linux-xfs, sagi,
avi, axboe, linux-api, willy, tom.leiming, Goldwyn Rodrigues
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
IOCB_NOWAIT translates to IOMAP_NOWAIT for iomaps.
This is used by XFS in the XFS patch.
---
fs/iomap.c | 2 ++
include/linux/iomap.h | 1 +
2 files changed, 3 insertions(+)
diff --git a/fs/iomap.c b/fs/iomap.c
index 141c3cd55a8b..d1c81753d411 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -885,6 +885,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
} else {
dio->flags |= IOMAP_DIO_WRITE;
flags |= IOMAP_WRITE;
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ flags |= IOMAP_NOWAIT;
}
if (mapping->nrpages) {
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 7291810067eb..53f6af89c625 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -51,6 +51,7 @@ struct iomap {
#define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */
#define IOMAP_FAULT (1 << 3) /* mapping for page fault */
#define IOMAP_DIRECT (1 << 4) /* direct I/O */
+#define IOMAP_NOWAIT (1 << 5) /* Don't wait for writeback */
struct iomap_ops {
/*
--
2.12.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 5/9] nowait aio: return on congested block device
2017-04-11 14:26 [PATCH 0/9 v5] No wait AIO Goldwyn Rodrigues
` (3 preceding siblings ...)
2017-04-11 14:26 ` [PATCH 4/9] nowait-aio: Introduce IOMAP_NOWAIT Goldwyn Rodrigues
@ 2017-04-11 14:26 ` Goldwyn Rodrigues
2017-04-12 8:36 ` Christoph Hellwig
2017-04-11 14:26 ` [PATCH 6/9] nowait aio: ext4 Goldwyn Rodrigues
` (3 subsequent siblings)
8 siblings, 1 reply; 14+ messages in thread
From: Goldwyn Rodrigues @ 2017-04-11 14:26 UTC (permalink / raw)
To: linux-fsdevel
Cc: jack, hch, linux-block, linux-btrfs, linux-ext4, linux-xfs, sagi,
avi, axboe, linux-api, willy, tom.leiming, Goldwyn Rodrigues
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
A new flag BIO_NOWAIT is introduced to identify bio's
orignating from iocb with IOCB_NOWAIT. This flag indicates
to return immediately if a request cannot be made instead
of retrying.
To facilitate this, QUEUE_FLAG_NOWAIT is set to devices
which support this. While currently this is set to
virtio and sd only. Support to more devices will be added soon.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
block/blk-core.c | 24 ++++++++++++++++++++++--
block/blk-mq-sched.c | 3 +++
block/blk-mq.c | 4 ++++
drivers/block/virtio_blk.c | 3 +++
drivers/scsi/sd.c | 3 +++
fs/direct-io.c | 11 +++++++++--
include/linux/bio.h | 6 ++++++
include/linux/blk_types.h | 1 +
include/linux/blkdev.h | 2 ++
9 files changed, 53 insertions(+), 4 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index d772c221cc17..95a9b18f38a3 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1232,6 +1232,11 @@ static struct request *get_request(struct request_queue *q, unsigned int op,
if (!IS_ERR(rq))
return rq;
+ if (bio && bio_flagged(bio, BIO_NOWAIT)) {
+ blk_put_rl(rl);
+ return ERR_PTR(-EAGAIN);
+ }
+
if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
blk_put_rl(rl);
return rq;
@@ -1870,6 +1875,18 @@ generic_make_request_checks(struct bio *bio)
goto end_io;
}
+ if (bio_flagged(bio, BIO_NOWAIT)) {
+ if (!blk_queue_nowait(q)) {
+ err = -EOPNOTSUPP;
+ goto end_io;
+ }
+ if (!(bio->bi_opf & (REQ_SYNC | REQ_IDLE))) {
+ err = -EINVAL;
+ goto end_io;
+ }
+ }
+
+
part = bio->bi_bdev->bd_part;
if (should_fail_request(part, bio->bi_iter.bi_size) ||
should_fail_request(&part_to_disk(part)->part0,
@@ -2021,7 +2038,7 @@ blk_qc_t generic_make_request(struct bio *bio)
do {
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
- if (likely(blk_queue_enter(q, false) == 0)) {
+ if (likely(blk_queue_enter(q, bio_flagged(bio, BIO_NOWAIT)) == 0)) {
struct bio_list lower, same;
/* Create a fresh bio_list for all subordinate requests */
@@ -2046,7 +2063,10 @@ blk_qc_t generic_make_request(struct bio *bio)
bio_list_merge(&bio_list_on_stack[0], &same);
bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
} else {
- bio_io_error(bio);
+ if (unlikely(!blk_queue_dying(q) && bio_flagged(bio, BIO_NOWAIT)))
+ bio_wouldblock_error(bio);
+ else
+ bio_io_error(bio);
}
bio = bio_list_pop(&bio_list_on_stack[0]);
} while (bio);
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index c974a1bbf4cb..c0d3bbf293ec 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -119,6 +119,9 @@ struct request *blk_mq_sched_get_request(struct request_queue *q,
if (likely(!data->hctx))
data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
+ if (likely(bio) && bio_flagged(bio, BIO_NOWAIT))
+ data->flags |= BLK_MQ_REQ_NOWAIT;
+
if (e) {
data->flags |= BLK_MQ_REQ_INTERNAL;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 572966f49596..f20e802b0e15 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1538,6 +1538,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data);
if (unlikely(!rq)) {
__wbt_done(q->rq_wb, wb_acct);
+ if (bio && bio_flagged(bio, BIO_NOWAIT))
+ bio_wouldblock_error(bio);
return BLK_QC_T_NONE;
}
@@ -1662,6 +1664,8 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data);
if (unlikely(!rq)) {
__wbt_done(q->rq_wb, wb_acct);
+ if (bio && bio_flagged(bio, BIO_NOWAIT))
+ bio_wouldblock_error(bio);
return BLK_QC_T_NONE;
}
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 1d4c9f8bc1e1..7481124c5025 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -731,6 +731,9 @@ static int virtblk_probe(struct virtio_device *vdev)
/* No real sector limit. */
blk_queue_max_hw_sectors(q, -1U);
+ /* Request queue supports BIO_NOWAIT */
+ queue_flag_set_unlocked(QUEUE_FLAG_NOWAIT, q);
+
/* Host can optionally specify maximum segment size and number of
* segments. */
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX,
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index fcfeddc79331..9df85ee165be 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3177,6 +3177,9 @@ static int sd_probe(struct device *dev)
SD_MOD_TIMEOUT);
}
+ /* Support BIO_NOWAIT */
+ queue_flag_set_unlocked(QUEUE_FLAG_NOWAIT, sdp->request_queue);
+
device_initialize(&sdkp->dev);
sdkp->dev.parent = dev;
sdkp->dev.class = &sd_disk_class;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index a04ebea77de8..f6835d3d5fe2 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -386,6 +386,9 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
else
bio->bi_end_io = dio_bio_end_io;
+ if (dio->iocb->ki_flags & IOCB_NOWAIT)
+ bio_set_flag(bio, BIO_NOWAIT);
+
sdio->bio = bio;
sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
}
@@ -480,8 +483,12 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
unsigned i;
int err;
- if (bio->bi_error)
- dio->io_error = -EIO;
+ if (bio->bi_error) {
+ if (bio_flagged(bio, BIO_NOWAIT))
+ dio->io_error = -EAGAIN;
+ else
+ dio->io_error = -EIO;
+ }
if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) {
err = bio->bi_error;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 8e521194f6fc..1a9270744b1e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -425,6 +425,12 @@ static inline void bio_io_error(struct bio *bio)
bio_endio(bio);
}
+static inline void bio_wouldblock_error(struct bio *bio)
+{
+ bio->bi_error = -EAGAIN;
+ bio_endio(bio);
+}
+
struct request_queue;
extern int bio_phys_segments(struct request_queue *, struct bio *);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index d703acb55d0f..514c08e8af78 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -102,6 +102,7 @@ struct bio {
#define BIO_REFFED 8 /* bio has elevated ->bi_cnt */
#define BIO_THROTTLED 9 /* This bio has already been subjected to
* throttling rules. Don't do it again. */
+#define BIO_NOWAIT 10 /* don't block over blk device congestion */
/*
* Flags starting here get preserved by bio_reset() - this includes
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7548f332121a..2663918f12ce 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -610,6 +610,7 @@ struct request_queue {
#define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */
#define QUEUE_FLAG_DAX 26 /* device supports DAX */
#define QUEUE_FLAG_STATS 27 /* track rq completion times */
+#define QUEUE_FLAG_NOWAIT 28 /* queue supports BIO_NOWAIT */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_STACKABLE) | \
@@ -700,6 +701,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
#define blk_queue_secure_erase(q) \
(test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
#define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
+#define blk_queue_nowait(q) test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
#define blk_noretry_request(rq) \
((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
--
2.12.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 5/9] nowait aio: return on congested block device
2017-04-11 14:26 ` [PATCH 5/9] nowait aio: return on congested block device Goldwyn Rodrigues
@ 2017-04-12 8:36 ` Christoph Hellwig
2017-04-12 18:31 ` Goldwyn Rodrigues
0 siblings, 1 reply; 14+ messages in thread
From: Christoph Hellwig @ 2017-04-12 8:36 UTC (permalink / raw)
To: Goldwyn Rodrigues
Cc: linux-fsdevel, jack, hch, linux-block, linux-btrfs, linux-ext4,
linux-xfs, sagi, avi, axboe, linux-api, willy, tom.leiming,
Goldwyn Rodrigues
As mentioned last time around, this should be a REQ_NOWAIT flag so
that it can be easily passed down to the request layer.
> +static inline void bio_wouldblock_error(struct bio *bio)
> +{
> + bio->bi_error = -EAGAIN;
> + bio_endio(bio);
> +}
Please skip this helper..
> +#define QUEUE_FLAG_NOWAIT 28 /* queue supports BIO_NOWAIT */
Please make the flag name a little more descriptive, this sounds like
it will never wait.
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 5/9] nowait aio: return on congested block device
2017-04-12 8:36 ` Christoph Hellwig
@ 2017-04-12 18:31 ` Goldwyn Rodrigues
0 siblings, 0 replies; 14+ messages in thread
From: Goldwyn Rodrigues @ 2017-04-12 18:31 UTC (permalink / raw)
To: Christoph Hellwig
Cc: linux-fsdevel, jack, linux-block, linux-btrfs, linux-ext4,
linux-xfs, sagi, avi, axboe, linux-api, willy, tom.leiming,
Goldwyn Rodrigues
On 04/12/2017 03:36 AM, Christoph Hellwig wrote:
> As mentioned last time around, this should be a REQ_NOWAIT flag so
> that it can be easily passed dow? n to the request layer.
>
>> +static inline void bio_wouldblock_error(struct bio *bio)
>> +{
>> + bio->bi_error = -EAGAIN;
>> + bio_endio(bio);
>> +}
>
> Please skip this helper..
Why? It is being called three times?
I am incorporating all the rest of the comments, besides this one. Thanks.
>
>> +#define QUEUE_FLAG_NOWAIT 28 /* queue supports BIO_NOWAIT */
>
> Please make the flag name a little more descriptive, this sounds like
> it will never wait.
>
--
Goldwyn
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 6/9] nowait aio: ext4
2017-04-11 14:26 [PATCH 0/9 v5] No wait AIO Goldwyn Rodrigues
` (4 preceding siblings ...)
2017-04-11 14:26 ` [PATCH 5/9] nowait aio: return on congested block device Goldwyn Rodrigues
@ 2017-04-11 14:26 ` Goldwyn Rodrigues
2017-04-11 14:26 ` [PATCH 7/9] nowait aio: xfs Goldwyn Rodrigues
` (2 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Goldwyn Rodrigues @ 2017-04-11 14:26 UTC (permalink / raw)
To: linux-fsdevel
Cc: jack, hch, linux-block, linux-btrfs, linux-ext4, linux-xfs, sagi,
avi, axboe, linux-api, willy, tom.leiming, Goldwyn Rodrigues
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
Return EAGAIN if any of the following checks fail for direct I/O:
+ i_rwsem is lockable
+ Writing beyond end of file (will trigger allocation)
+ Blocks are not allocated at the write location
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
fs/ext4/file.c | 20 ++++++++++++++++----
1 file changed, 16 insertions(+), 4 deletions(-)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index cefa9835f275..2efdc6d4d3e8 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -216,7 +216,13 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
return ext4_dax_write_iter(iocb, from);
#endif
- inode_lock(inode);
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!inode_trylock(inode))
+ return -EAGAIN;
+ } else {
+ inode_lock(inode);
+ }
+
ret = ext4_write_checks(iocb, from);
if (ret <= 0)
goto out;
@@ -235,9 +241,15 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
iocb->private = &overwrite;
/* Check whether we do a DIO overwrite or not */
- if (o_direct && ext4_should_dioread_nolock(inode) && !unaligned_aio &&
- ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from)))
- overwrite = 1;
+ if (o_direct && !unaligned_aio) {
+ if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
+ if (ext4_should_dioread_nolock(inode))
+ overwrite = 1;
+ } else if (iocb->ki_flags & IOCB_NOWAIT) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ }
ret = __generic_file_write_iter(iocb, from);
inode_unlock(inode);
--
2.12.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 7/9] nowait aio: xfs
2017-04-11 14:26 [PATCH 0/9 v5] No wait AIO Goldwyn Rodrigues
` (5 preceding siblings ...)
2017-04-11 14:26 ` [PATCH 6/9] nowait aio: ext4 Goldwyn Rodrigues
@ 2017-04-11 14:26 ` Goldwyn Rodrigues
2017-04-11 14:26 ` [PATCH 8/9] nowait aio: btrfs Goldwyn Rodrigues
2017-04-11 14:26 ` [PATCH 9/9] nowait aio: Return -EOPNOTSUPP if filesystem does not support Goldwyn Rodrigues
8 siblings, 0 replies; 14+ messages in thread
From: Goldwyn Rodrigues @ 2017-04-11 14:26 UTC (permalink / raw)
To: linux-fsdevel
Cc: jack, hch, linux-block, linux-btrfs, linux-ext4, linux-xfs, sagi,
avi, axboe, linux-api, willy, tom.leiming, Goldwyn Rodrigues
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
If IOCB_NOWAIT is set, bail if the i_rwsem is not lockable
immediately.
IF IOMAP_NOWAIT is set, return EAGAIN in xfs_file_iomap_begin
if it needs allocation either due to file extension, writing to a hole,
or COW or waiting for other DIOs to finish.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
fs/xfs/xfs_file.c | 19 ++++++++++++++-----
fs/xfs/xfs_iomap.c | 17 +++++++++++++++++
2 files changed, 31 insertions(+), 5 deletions(-)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 35703a801372..b307940e7d56 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -541,8 +541,11 @@ xfs_file_dio_aio_write(
iolock = XFS_IOLOCK_SHARED;
}
- xfs_ilock(ip, iolock);
-
+ if (!xfs_ilock_nowait(ip, iolock)) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EAGAIN;
+ xfs_ilock(ip, iolock);
+ }
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
goto out;
@@ -553,9 +556,15 @@ xfs_file_dio_aio_write(
* otherwise demote the lock if we had to take the exclusive lock
* for other reasons in xfs_file_aio_write_checks.
*/
- if (unaligned_io)
- inode_dio_wait(inode);
- else if (iolock == XFS_IOLOCK_EXCL) {
+ if (unaligned_io) {
+ /* If we are going to wait for other DIO to finish, bail */
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (atomic_read(&inode->i_dio_count))
+ return -EAGAIN;
+ } else {
+ inode_dio_wait(inode);
+ }
+ } else if (iolock == XFS_IOLOCK_EXCL) {
xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
iolock = XFS_IOLOCK_SHARED;
}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 288ee5b840d7..9baa65eeae9e 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1015,6 +1015,15 @@ xfs_file_iomap_begin(
if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
if (flags & IOMAP_DIRECT) {
+ /*
+ * A reflinked inode will result in CoW alloc.
+ * FIXME: It could still overwrite on unshared extents
+ * and not need allocation.
+ */
+ if (flags & IOMAP_NOWAIT) {
+ error = -EAGAIN;
+ goto out_unlock;
+ }
/* may drop and re-acquire the ilock */
error = xfs_reflink_allocate_cow(ip, &imap, &shared,
&lockmode);
@@ -1032,6 +1041,14 @@ xfs_file_iomap_begin(
if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
/*
+ * If nowait is set bail since we are going to make
+ * allocations.
+ */
+ if (flags & IOMAP_NOWAIT) {
+ error = -EAGAIN;
+ goto out_unlock;
+ }
+ /*
* We cap the maximum length we map here to MAX_WRITEBACK_PAGES
* pages to keep the chunks of work done where somewhat symmetric
* with the work writeback does. This is a completely arbitrary
--
2.12.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 8/9] nowait aio: btrfs
2017-04-11 14:26 [PATCH 0/9 v5] No wait AIO Goldwyn Rodrigues
` (6 preceding siblings ...)
2017-04-11 14:26 ` [PATCH 7/9] nowait aio: xfs Goldwyn Rodrigues
@ 2017-04-11 14:26 ` Goldwyn Rodrigues
2017-04-11 14:26 ` [PATCH 9/9] nowait aio: Return -EOPNOTSUPP if filesystem does not support Goldwyn Rodrigues
8 siblings, 0 replies; 14+ messages in thread
From: Goldwyn Rodrigues @ 2017-04-11 14:26 UTC (permalink / raw)
To: linux-fsdevel
Cc: jack, hch, linux-block, linux-btrfs, linux-ext4, linux-xfs, sagi,
avi, axboe, linux-api, willy, tom.leiming, Goldwyn Rodrigues
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
Return EAGAIN if any of the following checks fail
+ i_rwsem is not lockable
+ NODATACOW or PREALLOC is not set
+ Cannot nocow at the desired location
+ Writing beyond end of file which is not allocated
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
fs/btrfs/file.c | 25 ++++++++++++++++++++-----
fs/btrfs/inode.c | 3 +++
2 files changed, 23 insertions(+), 5 deletions(-)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 520cb7230b2d..a870e5dd2b4d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1823,12 +1823,29 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
ssize_t num_written = 0;
bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
ssize_t err;
- loff_t pos;
- size_t count;
+ loff_t pos = iocb->ki_pos;
+ size_t count = iov_iter_count(from);
loff_t oldsize;
int clean_page = 0;
- inode_lock(inode);
+ if ((iocb->ki_flags & IOCB_NOWAIT) &&
+ (iocb->ki_flags & IOCB_DIRECT)) {
+ /* Don't sleep on inode rwsem */
+ if (!inode_trylock(inode))
+ return -EAGAIN;
+ /*
+ * We will allocate space in case nodatacow is not set,
+ * so bail
+ */
+ if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
+ BTRFS_INODE_PREALLOC)) ||
+ check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) {
+ inode_unlock(inode);
+ return -EAGAIN;
+ }
+ } else
+ inode_lock(inode);
+
err = generic_write_checks(iocb, from);
if (err <= 0) {
inode_unlock(inode);
@@ -1862,8 +1879,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
*/
update_time_for_write(inode);
- pos = iocb->ki_pos;
- count = iov_iter_count(from);
start_pos = round_down(pos, fs_info->sectorsize);
oldsize = i_size_read(inode);
if (start_pos > oldsize) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a18510be76c1..d91b21a76d6d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8627,6 +8627,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
dio_data.overwrite = 1;
inode_unlock(inode);
relock = true;
+ } else if (iocb->ki_flags & IOCB_NOWAIT) {
+ ret = -EAGAIN;
+ goto out;
}
ret = btrfs_delalloc_reserve_space(inode, offset, count);
if (ret)
--
2.12.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 9/9] nowait aio: Return -EOPNOTSUPP if filesystem does not support
2017-04-11 14:26 [PATCH 0/9 v5] No wait AIO Goldwyn Rodrigues
` (7 preceding siblings ...)
2017-04-11 14:26 ` [PATCH 8/9] nowait aio: btrfs Goldwyn Rodrigues
@ 2017-04-11 14:26 ` Goldwyn Rodrigues
2017-04-12 8:37 ` Christoph Hellwig
8 siblings, 1 reply; 14+ messages in thread
From: Goldwyn Rodrigues @ 2017-04-11 14:26 UTC (permalink / raw)
To: linux-fsdevel
Cc: jack, hch, linux-block, linux-btrfs, linux-ext4, linux-xfs, sagi,
avi, axboe, linux-api, willy, tom.leiming, Goldwyn Rodrigues
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
The check is in generic_file_write_iter(), which is called by
most filesystems, either through fsops.write_iter() or through
the function defined by write_iter(). If not, we perform the
check in the defined .write_iter() function which is called
for direct IO.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
fs/9p/vfs_file.c | 3 +++
fs/ceph/file.c | 3 +++
fs/cifs/file.c | 3 +++
fs/fuse/file.c | 3 +++
fs/nfs/direct.c | 3 +++
fs/ocfs2/file.c | 3 +++
mm/filemap.c | 3 +++
7 files changed, 21 insertions(+)
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 3de3b4a89d89..403681db7723 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -411,6 +411,9 @@ v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
loff_t origin;
int err = 0;
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EOPNOTSUPP;
+
retval = generic_write_checks(iocb, from);
if (retval <= 0)
return retval;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 26cc95421cca..af28419b1731 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1267,6 +1267,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
int err, want, got;
loff_t pos;
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EOPNOTSUPP;
+
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index aa3debbba826..a828ab3e7775 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2638,6 +2638,9 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
* write request.
*/
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EOPNOTSUPP;
+
rc = generic_write_checks(iocb, from);
if (rc <= 0)
return rc;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index ec238fb5a584..72786e798319 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1425,6 +1425,9 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(file);
ssize_t res;
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EOPNOTSUPP;
+
if (is_bad_inode(inode))
return -EIO;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index aab32fc3d6a8..ab419caebd5f 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -991,6 +991,9 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
file, iov_iter_count(iter), (long long) iocb->ki_pos);
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EOPNOTSUPP;
+
result = generic_write_checks(iocb, iter);
if (result <= 0)
return result;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index bfeb647459d9..e7f8ba890305 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2235,6 +2235,9 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
if (count == 0)
return 0;
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EOPNOTSUPP;
+
direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
inode_lock(inode);
diff --git a/mm/filemap.c b/mm/filemap.c
index 46e01b8f6880..48b83d1d4a30 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3026,6 +3026,9 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct inode *inode = file->f_mapping->host;
ssize_t ret;
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EOPNOTSUPP;
+
inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret > 0)
--
2.12.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 9/9] nowait aio: Return -EOPNOTSUPP if filesystem does not support
2017-04-11 14:26 ` [PATCH 9/9] nowait aio: Return -EOPNOTSUPP if filesystem does not support Goldwyn Rodrigues
@ 2017-04-12 8:37 ` Christoph Hellwig
0 siblings, 0 replies; 14+ messages in thread
From: Christoph Hellwig @ 2017-04-12 8:37 UTC (permalink / raw)
To: Goldwyn Rodrigues
Cc: linux-fsdevel, jack, hch, linux-block, linux-btrfs, linux-ext4,
linux-xfs, sagi, avi, axboe, linux-api, willy, tom.leiming,
Goldwyn Rodrigues
This should go into the patch that introduces IOCB_NOWAIT.
^ permalink raw reply [flat|nested] 14+ messages in thread