* [PATCH 1/2] vfs: add vfs_iocb_iter_[read|write] helper functions
2019-11-20 9:45 [PATCH V2 0/2] ovl: implement async IO routines Jiufei Xue
@ 2019-11-20 9:45 ` Jiufei Xue
2019-11-20 9:45 ` [PATCH 2/2] ovl: implement async IO routines Jiufei Xue
` (2 subsequent siblings)
3 siblings, 0 replies; 6+ messages in thread
From: Jiufei Xue @ 2019-11-20 9:45 UTC (permalink / raw)
To: miklos, amir73il; +Cc: linux-unionfs, linux-fsdevel
This isn't cause any behavior changes and will be used by overlay
async IO implementation.
Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
---
fs/read_write.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/fs.h | 16 +++++++++++++++
2 files changed, 74 insertions(+)
diff --git a/fs/read_write.c b/fs/read_write.c
index 5bbf587..3dfbcec 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -984,6 +984,64 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
}
EXPORT_SYMBOL(vfs_iter_write);
+ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
+ struct iov_iter *iter)
+{
+ ssize_t ret = 0;
+ ssize_t tot_len;
+
+ if (!file->f_op->read_iter)
+ return -EINVAL;
+ if (!(file->f_mode & FMODE_READ))
+ return -EBADF;
+ if (!(file->f_mode & FMODE_CAN_READ))
+ return -EINVAL;
+
+ tot_len = iov_iter_count(iter);
+ if (!tot_len)
+ return 0;
+
+ ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len);
+ if (ret < 0)
+ return ret;
+
+ ret = call_read_iter(file, iocb, iter);
+ if (ret >= 0)
+ fsnotify_access(file);
+
+ return ret;
+}
+EXPORT_SYMBOL(vfs_iocb_iter_read);
+
+ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
+ struct iov_iter *iter)
+{
+ ssize_t ret = 0;
+ ssize_t tot_len;
+
+ if (!file->f_op->write_iter)
+ return -EINVAL;
+ if (!(file->f_mode & FMODE_WRITE))
+ return -EBADF;
+ if (!(file->f_mode & FMODE_CAN_WRITE))
+ return -EINVAL;
+
+ tot_len = iov_iter_count(iter);
+ if (!tot_len)
+ return 0;
+
+ ret = rw_verify_area(WRITE, file, &iocb->ki_pos, tot_len);
+ if (ret < 0)
+ return ret;
+
+ ret = call_write_iter(file, iocb, iter);
+ if (ret >= 0)
+ fsnotify_modify(file);
+
+ return ret;
+}
+EXPORT_SYMBOL(vfs_iocb_iter_write);
+
ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
unsigned long vlen, loff_t *pos, rwf_t flags)
{
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e0d909d..c885279 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2071,6 +2071,18 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
};
}
+static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
+ struct file *filp)
+{
+ *kiocb = (struct kiocb) {
+ .ki_filp = filp,
+ .ki_flags = kiocb_src->ki_flags,
+ .ki_hint = ki_hint_validate(file_write_hint(filp)),
+ .ki_ioprio = kiocb_src->ki_ioprio,
+ .ki_pos = kiocb_src->ki_pos,
+ };
+}
+
/*
* Inode state bits. Protected by inode->i_lock
*
@@ -3105,6 +3117,10 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
rwf_t flags);
ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
rwf_t flags);
+ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
+ struct iov_iter *iter);
+ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
+ struct iov_iter *iter);
/* fs/block_dev.c */
extern ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 2/2] ovl: implement async IO routines
2019-11-20 9:45 [PATCH V2 0/2] ovl: implement async IO routines Jiufei Xue
2019-11-20 9:45 ` [PATCH 1/2] vfs: add vfs_iocb_iter_[read|write] helper functions Jiufei Xue
@ 2019-11-20 9:45 ` Jiufei Xue
2019-11-26 2:00 ` [PATCH V2 0/2] " Jiufei Xue
2020-01-24 12:10 ` Miklos Szeredi
3 siblings, 0 replies; 6+ messages in thread
From: Jiufei Xue @ 2019-11-20 9:45 UTC (permalink / raw)
To: miklos, amir73il; +Cc: linux-unionfs, linux-fsdevel
A performance regression is observed since linux v4.19 when we do aio
test using fio with iodepth 128 on overlayfs. And we found that queue
depth of the device is always 1 which is unexpected.
After investigation, it is found that commit 16914e6fc7e1
("ovl: add ovl_read_iter()") and commit 2a92e07edc5e
("ovl: add ovl_write_iter()") use do_iter_readv_writev() to submit
requests to real filesystem. Async IOs are converted to sync IOs here
and cause performance regression.
So implement async IO for stacked reading and writing.
Changes since v1:
- add a cleanup helper for completion/error handling
- handle the case when aio_req allocation failed
Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
---
fs/overlayfs/file.c | 116 +++++++++++++++++++++++++++++++++++++++++------
fs/overlayfs/overlayfs.h | 2 +
fs/overlayfs/super.c | 12 ++++-
3 files changed, 114 insertions(+), 16 deletions(-)
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index e235a63..8ba000f 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -11,6 +11,14 @@
#include <linux/uaccess.h>
#include "overlayfs.h"
+struct ovl_aio_req {
+ struct kiocb iocb;
+ struct kiocb *orig_iocb;
+ struct fd fd;
+};
+
+static struct kmem_cache *ovl_aio_request_cachep;
+
static char ovl_whatisit(struct inode *inode, struct inode *realinode)
{
if (realinode != ovl_inode_upper(inode))
@@ -225,6 +233,33 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
return flags;
}
+static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
+{
+ struct kiocb *iocb = &aio_req->iocb;
+ struct kiocb *orig_iocb = aio_req->orig_iocb;
+
+ if (iocb->ki_flags & IOCB_WRITE) {
+ struct inode *inode = file_inode(orig_iocb->ki_filp);
+
+ file_end_write(iocb->ki_filp);
+ ovl_copyattr(ovl_inode_real(inode), inode);
+ }
+
+ orig_iocb->ki_pos = iocb->ki_pos;
+ fdput(aio_req->fd);
+ kmem_cache_free(ovl_aio_request_cachep, aio_req);
+}
+
+static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2)
+{
+ struct ovl_aio_req *aio_req = container_of(iocb,
+ struct ovl_aio_req, iocb);
+ struct kiocb *orig_iocb = aio_req->orig_iocb;
+
+ ovl_aio_cleanup_handler(aio_req);
+ orig_iocb->ki_complete(orig_iocb, res, res2);
+}
+
static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
@@ -240,14 +275,32 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
return ret;
old_cred = ovl_override_creds(file_inode(file)->i_sb);
- ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
- ovl_iocb_to_rwf(iocb));
+ if (is_sync_kiocb(iocb)) {
+ ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
+ ovl_iocb_to_rwf(iocb));
+ ovl_file_accessed(file);
+ fdput(real);
+ } else {
+ struct ovl_aio_req *aio_req = kmem_cache_alloc(ovl_aio_request_cachep,
+ GFP_NOFS);
+ if (!aio_req) {
+ ret = -ENOMEM;
+ fdput(real);
+ goto out_revert;
+ }
+
+ aio_req->fd = real;
+ aio_req->orig_iocb = iocb;
+ kiocb_clone(&aio_req->iocb, iocb, real.file);
+ aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+ ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
+ ovl_file_accessed(file);
+ if (ret != -EIOCBQUEUED)
+ ovl_aio_cleanup_handler(aio_req);
+ }
+out_revert:
revert_creds(old_cred);
- ovl_file_accessed(file);
-
- fdput(real);
-
return ret;
}
@@ -275,15 +328,34 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
old_cred = ovl_override_creds(file_inode(file)->i_sb);
file_start_write(real.file);
- ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
- ovl_iocb_to_rwf(iocb));
- file_end_write(real.file);
- revert_creds(old_cred);
-
- /* Update size */
- ovl_copyattr(ovl_inode_real(inode), inode);
+ if (is_sync_kiocb(iocb)) {
+ ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
+ ovl_iocb_to_rwf(iocb));
+ file_end_write(real.file);
+ /* Update size */
+ ovl_copyattr(ovl_inode_real(inode), inode);
+ fdput(real);
+ } else {
+ struct ovl_aio_req *aio_req = kmem_cache_alloc(ovl_aio_request_cachep,
+ GFP_NOFS);
+ if (!aio_req) {
+ ret = -ENOMEM;
+ file_end_write(real.file);
+ fdput(real);
+ goto out_revert;
+ }
+
+ aio_req->fd = real;
+ aio_req->orig_iocb = iocb;
+ kiocb_clone(&aio_req->iocb, iocb, real.file);
+ aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+ ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
+ if (ret != -EIOCBQUEUED)
+ ovl_aio_cleanup_handler(aio_req);
+ }
- fdput(real);
+out_revert:
+ revert_creds(old_cred);
out_unlock:
inode_unlock(inode);
@@ -651,3 +723,19 @@ static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
.copy_file_range = ovl_copy_file_range,
.remap_file_range = ovl_remap_file_range,
};
+
+int __init ovl_init_aio_request_cache(void)
+{
+ ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
+ sizeof(struct ovl_aio_req),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!ovl_aio_request_cachep)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void ovl_exit_aio_request_cache(void)
+{
+ kmem_cache_destroy(ovl_aio_request_cachep);
+}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 6934bcf..afd1631 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -416,6 +416,8 @@ struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry,
/* file.c */
extern const struct file_operations ovl_file_operations;
+int __init ovl_init_aio_request_cache(void);
+void ovl_exit_aio_request_cache(void);
/* copy_up.c */
int ovl_copy_up(struct dentry *dentry);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index afbcb11..83cef1f 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1739,9 +1739,17 @@ static int __init ovl_init(void)
if (ovl_inode_cachep == NULL)
return -ENOMEM;
+ err = ovl_init_aio_request_cache();
+ if (err) {
+ kmem_cache_destroy(ovl_inode_cachep);
+ return -ENOMEM;
+ }
+
err = register_filesystem(&ovl_fs_type);
- if (err)
+ if (err) {
kmem_cache_destroy(ovl_inode_cachep);
+ ovl_exit_aio_request_cache();
+ }
return err;
}
@@ -1756,7 +1764,7 @@ static void __exit ovl_exit(void)
*/
rcu_barrier();
kmem_cache_destroy(ovl_inode_cachep);
-
+ ovl_exit_aio_request_cache();
}
module_init(ovl_init);
--
1.8.3.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH V2 0/2] ovl: implement async IO routines
2019-11-20 9:45 [PATCH V2 0/2] ovl: implement async IO routines Jiufei Xue
2019-11-20 9:45 ` [PATCH 1/2] vfs: add vfs_iocb_iter_[read|write] helper functions Jiufei Xue
2019-11-20 9:45 ` [PATCH 2/2] ovl: implement async IO routines Jiufei Xue
@ 2019-11-26 2:00 ` Jiufei Xue
2019-12-06 3:54 ` Jiufei Xue
2020-01-24 12:10 ` Miklos Szeredi
3 siblings, 1 reply; 6+ messages in thread
From: Jiufei Xue @ 2019-11-26 2:00 UTC (permalink / raw)
To: miklos, amir73il; +Cc: linux-unionfs, linux-fsdevel
Hi miklos,
Could you please kindly review this patch and give some advice?
Thanks,
Jiufei
On 2019/11/20 下午5:45, Jiufei Xue wrote:
> ovl stacks regular file operations now. However it doesn't implement
> async IO routines and will convert async IOs to sync IOs which is not
> expected.
>
> This patchset implements overlayfs async IO routines.
>
> Jiufei Xue (2)
> vfs: add vfs_iocb_iter_[read|write] helper functions
> ovl: implement async IO routines
>
> fs/overlayfs/file.c | 116 +++++++++++++++++++++++++++++++++++++++++------
> fs/overlayfs/overlayfs.h | 2
> fs/overlayfs/super.c | 12 ++++
> fs/read_write.c | 58 +++++++++++++++++++++++
> include/linux/fs.h | 16 ++++++
> 5 files changed, 188 insertions(+), 16 deletions(-)
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH V2 0/2] ovl: implement async IO routines
2019-11-26 2:00 ` [PATCH V2 0/2] " Jiufei Xue
@ 2019-12-06 3:54 ` Jiufei Xue
0 siblings, 0 replies; 6+ messages in thread
From: Jiufei Xue @ 2019-12-06 3:54 UTC (permalink / raw)
To: miklos, amir73il; +Cc: linux-unionfs, linux-fsdevel
ping again.
>From my test, the patchset can improve the performance significantly.
The following data are tested on INTEL P4510 NVMe using fio with iodepth
128 and blocksize 4k.
----------------------------------------------------------------
| RANDREAD | RANDWRITE |
----------------------------------------------------------------
w/ async IO routines | 377MB/s | 405MB/s |
----------------------------------------------------------------
w/o async IO routines | 32.0MB/s | 62.3MB/s |
----------------------------------------------------------------
Regards,
Jiufei
On 2019/11/26 上午10:00, Jiufei Xue wrote:
> Hi miklos,
>
> Could you please kindly review this patch and give some advice?
>
> Thanks,
> Jiufei
>
> On 2019/11/20 下午5:45, Jiufei Xue wrote:
>> ovl stacks regular file operations now. However it doesn't implement
>> async IO routines and will convert async IOs to sync IOs which is not
>> expected.
>>
>> This patchset implements overlayfs async IO routines.
>>
>> Jiufei Xue (2)
>> vfs: add vfs_iocb_iter_[read|write] helper functions
>> ovl: implement async IO routines
>>
>> fs/overlayfs/file.c | 116 +++++++++++++++++++++++++++++++++++++++++------
>> fs/overlayfs/overlayfs.h | 2
>> fs/overlayfs/super.c | 12 ++++
>> fs/read_write.c | 58 +++++++++++++++++++++++
>> include/linux/fs.h | 16 ++++++
>> 5 files changed, 188 insertions(+), 16 deletions(-)
>>
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH V2 0/2] ovl: implement async IO routines
2019-11-20 9:45 [PATCH V2 0/2] ovl: implement async IO routines Jiufei Xue
` (2 preceding siblings ...)
2019-11-26 2:00 ` [PATCH V2 0/2] " Jiufei Xue
@ 2020-01-24 12:10 ` Miklos Szeredi
3 siblings, 0 replies; 6+ messages in thread
From: Miklos Szeredi @ 2020-01-24 12:10 UTC (permalink / raw)
To: Jiufei Xue; +Cc: Amir Goldstein, overlayfs, linux-fsdevel
On Wed, Nov 20, 2019 at 10:45 AM Jiufei Xue
<jiufei.xue@linux.alibaba.com> wrote:
>
> ovl stacks regular file operations now. However it doesn't implement
> async IO routines and will convert async IOs to sync IOs which is not
> expected.
>
> This patchset implements overlayfs async IO routines.
Thanks, pushed to overlayfs-next with slight modifications.
Miklos
^ permalink raw reply [flat|nested] 6+ messages in thread