linux-unionfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] ovl: implement async IO routines
@ 2019-11-19  2:14 Jiufei Xue
  2019-11-19  2:14 ` [PATCH 1/2] vfs: add vfs_iocb_iter_[read|write] helper functions Jiufei Xue
  2019-11-19  2:14 ` [PATCH 2/2] ovl: implement async IO routines Jiufei Xue
  0 siblings, 2 replies; 11+ messages in thread
From: Jiufei Xue @ 2019-11-19  2:14 UTC (permalink / raw)
  To: miklos, amir73il; +Cc: linux-unionfs, linux-fsdevel

ovl stacks regular file operations now. However it doesn't implement
async IO routines and will convert async IOs to sync IOs which is not
expected.

This patchset implements overlayfs async IO routines.

Jiufei Xue (2)
vfs: add vfs_iocb_iter_[read|write] helper functions
ovl: implement async IO routines

 fs/overlayfs/file.c      |   97 ++++++-----------------------------------------
 fs/overlayfs/overlayfs.h |    2
 fs/overlayfs/super.c     |   12 -----
 fs/read_write.c          |   58 ----------------------------
 include/linux/fs.h       |   16 -------
 5 files changed, 16 insertions(+), 169 deletions(-)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/2] vfs: add vfs_iocb_iter_[read|write] helper functions
  2019-11-19  2:14 [PATCH 0/2] ovl: implement async IO routines Jiufei Xue
@ 2019-11-19  2:14 ` Jiufei Xue
  2019-11-19  3:14   ` Amir Goldstein
  2019-11-19  2:14 ` [PATCH 2/2] ovl: implement async IO routines Jiufei Xue
  1 sibling, 1 reply; 11+ messages in thread
From: Jiufei Xue @ 2019-11-19  2:14 UTC (permalink / raw)
  To: miklos, amir73il; +Cc: linux-unionfs, linux-fsdevel

This isn't cause any behavior changes and will be used by overlay
async IO implementation.

Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
---
 fs/read_write.c    | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h | 16 +++++++++++++++
 2 files changed, 74 insertions(+)

diff --git a/fs/read_write.c b/fs/read_write.c
index 5bbf587..3dfbcec 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -984,6 +984,64 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
 }
 EXPORT_SYMBOL(vfs_iter_write);
 
+ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
+			   struct iov_iter *iter)
+{
+	ssize_t ret = 0;
+	ssize_t tot_len;
+
+	if (!file->f_op->read_iter)
+		return -EINVAL;
+	if (!(file->f_mode & FMODE_READ))
+		return -EBADF;
+	if (!(file->f_mode & FMODE_CAN_READ))
+		return -EINVAL;
+
+	tot_len = iov_iter_count(iter);
+	if (!tot_len)
+		return 0;
+
+	ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len);
+	if (ret < 0)
+		return ret;
+
+	ret = call_read_iter(file, iocb, iter);
+	if (ret >= 0)
+		fsnotify_access(file);
+
+	return ret;
+}
+EXPORT_SYMBOL(vfs_iocb_iter_read);
+
+ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
+			    struct iov_iter *iter)
+{
+	ssize_t ret = 0;
+	ssize_t tot_len;
+
+	if (!file->f_op->write_iter)
+		return -EINVAL;
+	if (!(file->f_mode & FMODE_WRITE))
+		return -EBADF;
+	if (!(file->f_mode & FMODE_CAN_WRITE))
+		return -EINVAL;
+
+	tot_len = iov_iter_count(iter);
+	if (!tot_len)
+		return 0;
+
+	ret = rw_verify_area(WRITE, file, &iocb->ki_pos, tot_len);
+	if (ret < 0)
+		return ret;
+
+	ret = call_write_iter(file, iocb, iter);
+	if (ret >= 0)
+		fsnotify_modify(file);
+
+	return ret;
+}
+EXPORT_SYMBOL(vfs_iocb_iter_write);
+
 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
 		  unsigned long vlen, loff_t *pos, rwf_t flags)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e0d909d..c885279 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2071,6 +2071,18 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
 	};
 }
 
+static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
+			       struct file *filp)
+{
+        *kiocb = (struct kiocb) {
+                .ki_filp = filp,
+                .ki_flags = kiocb_src->ki_flags,
+                .ki_hint = ki_hint_validate(file_write_hint(filp)),
+                .ki_ioprio = kiocb_src->ki_ioprio,
+                .ki_pos = kiocb_src->ki_pos,
+        };
+}
+
 /*
  * Inode state bits.  Protected by inode->i_lock
  *
@@ -3105,6 +3117,10 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
 		rwf_t flags);
 ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
 		rwf_t flags);
+ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
+			   struct iov_iter *iter);
+ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
+			    struct iov_iter *iter);
 
 /* fs/block_dev.c */
 extern ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/2] ovl: implement async IO routines
  2019-11-19  2:14 [PATCH 0/2] ovl: implement async IO routines Jiufei Xue
  2019-11-19  2:14 ` [PATCH 1/2] vfs: add vfs_iocb_iter_[read|write] helper functions Jiufei Xue
@ 2019-11-19  2:14 ` Jiufei Xue
  2019-11-19  4:22   ` Amir Goldstein
  1 sibling, 1 reply; 11+ messages in thread
From: Jiufei Xue @ 2019-11-19  2:14 UTC (permalink / raw)
  To: miklos, amir73il; +Cc: linux-unionfs, linux-fsdevel

A performance regression is observed since linux v4.19 when we do aio
test using fio with iodepth 128 on overlayfs. And we found that queue
depth of the device is always 1 which is unexpected.

After investigation, it is found that commit 16914e6fc7
(“ovl: add ovl_read_iter()”) and commit 2a92e07edc
(“ovl: add ovl_write_iter()”) use do_iter_readv_writev() to submit
requests to real filesystem. Async IOs are converted to sync IOs here
and cause performance regression.

So implement async IO for stacked reading and writing.

Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
---
 fs/overlayfs/file.c      | 97 +++++++++++++++++++++++++++++++++++++++++-------
 fs/overlayfs/overlayfs.h |  2 +
 fs/overlayfs/super.c     | 12 +++++-
 3 files changed, 95 insertions(+), 16 deletions(-)

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index e235a63..07d94e7 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -11,6 +11,14 @@
 #include <linux/uaccess.h>
 #include "overlayfs.h"
 
+struct ovl_aio_req {
+	struct kiocb iocb;
+	struct kiocb *orig_iocb;
+	struct fd fd;
+};
+
+static struct kmem_cache *ovl_aio_request_cachep;
+
 static char ovl_whatisit(struct inode *inode, struct inode *realinode)
 {
 	if (realinode != ovl_inode_upper(inode))
@@ -225,6 +233,21 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
 	return flags;
 }
 
+static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2)
+{
+	struct ovl_aio_req *aio_req = container_of(iocb, struct ovl_aio_req, iocb);
+	struct kiocb *orig_iocb = aio_req->orig_iocb;
+
+	if (iocb->ki_flags & IOCB_WRITE)
+		file_end_write(iocb->ki_filp);
+
+	orig_iocb->ki_pos = iocb->ki_pos;
+	orig_iocb->ki_complete(orig_iocb, res, res2);
+
+	fdput(aio_req->fd);
+	kmem_cache_free(ovl_aio_request_cachep, aio_req);
+}
+
 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
 	struct file *file = iocb->ki_filp;
@@ -240,14 +263,28 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 		return ret;
 
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
-	ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
-			    ovl_iocb_to_rwf(iocb));
+	if (is_sync_kiocb(iocb)) {
+		ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
+				    ovl_iocb_to_rwf(iocb));
+		ovl_file_accessed(file);
+		fdput(real);
+	} else {
+		struct ovl_aio_req *aio_req = kmem_cache_alloc(ovl_aio_request_cachep,
+							       GFP_NOFS);
+		aio_req->fd = real;
+		aio_req->orig_iocb = iocb;
+		kiocb_clone(&aio_req->iocb, iocb, real.file);
+		aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+		ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
+		ovl_file_accessed(file);
+		if (ret != -EIOCBQUEUED) {
+			iocb->ki_pos = aio_req->iocb.ki_pos;
+			fdput(real);
+			kmem_cache_free(ovl_aio_request_cachep, aio_req);
+		}
+	}
 	revert_creds(old_cred);
 
-	ovl_file_accessed(file);
-
-	fdput(real);
-
 	return ret;
 }
 
@@ -275,16 +312,32 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
 	file_start_write(real.file);
-	ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
-			     ovl_iocb_to_rwf(iocb));
-	file_end_write(real.file);
+	if (is_sync_kiocb(iocb)) {
+		ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
+				     ovl_iocb_to_rwf(iocb));
+		file_end_write(real.file);
+		/* Update size */
+		ovl_copyattr(ovl_inode_real(inode), inode);
+		fdput(real);
+	} else {
+		struct ovl_aio_req *aio_req = kmem_cache_alloc(ovl_aio_request_cachep,
+							       GFP_NOFS);
+		aio_req->fd = real;
+		aio_req->orig_iocb = iocb;
+		kiocb_clone(&aio_req->iocb, iocb, real.file);
+		aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+		ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
+		/* Update size */
+		ovl_copyattr(ovl_inode_real(inode), inode);
+		if (ret != -EIOCBQUEUED) {
+			iocb->ki_pos = aio_req->iocb.ki_pos;
+			file_end_write(real.file);
+			fdput(real);
+			kmem_cache_free(ovl_aio_request_cachep, aio_req);
+		}
+	}
 	revert_creds(old_cred);
 
-	/* Update size */
-	ovl_copyattr(ovl_inode_real(inode), inode);
-
-	fdput(real);
-
 out_unlock:
 	inode_unlock(inode);
 
@@ -651,3 +704,19 @@ static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
 	.copy_file_range	= ovl_copy_file_range,
 	.remap_file_range	= ovl_remap_file_range,
 };
+
+int __init ovl_init_aio_request_cache(void)
+{
+	ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
+						   sizeof(struct ovl_aio_req),
+						   0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!ovl_aio_request_cachep)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void ovl_exit_aio_request_cache(void)
+{
+	kmem_cache_destroy(ovl_aio_request_cachep);
+}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 6934bcf..afd1631 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -416,6 +416,8 @@ struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry,
 
 /* file.c */
 extern const struct file_operations ovl_file_operations;
+int __init ovl_init_aio_request_cache(void);
+void ovl_exit_aio_request_cache(void);
 
 /* copy_up.c */
 int ovl_copy_up(struct dentry *dentry);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index afbcb11..83cef1f 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1739,9 +1739,17 @@ static int __init ovl_init(void)
 	if (ovl_inode_cachep == NULL)
 		return -ENOMEM;
 
+	err = ovl_init_aio_request_cache();
+	if (err) {
+		kmem_cache_destroy(ovl_inode_cachep);
+		return -ENOMEM;
+	}
+
 	err = register_filesystem(&ovl_fs_type);
-	if (err)
+	if (err) {
 		kmem_cache_destroy(ovl_inode_cachep);
+		ovl_exit_aio_request_cache();
+	}
 
 	return err;
 }
@@ -1756,7 +1764,7 @@ static void __exit ovl_exit(void)
 	 */
 	rcu_barrier();
 	kmem_cache_destroy(ovl_inode_cachep);
-
+	ovl_exit_aio_request_cache();
 }
 
 module_init(ovl_init);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] vfs: add vfs_iocb_iter_[read|write] helper functions
  2019-11-19  2:14 ` [PATCH 1/2] vfs: add vfs_iocb_iter_[read|write] helper functions Jiufei Xue
@ 2019-11-19  3:14   ` Amir Goldstein
  2019-11-19  8:40     ` Jiufei Xue
  0 siblings, 1 reply; 11+ messages in thread
From: Amir Goldstein @ 2019-11-19  3:14 UTC (permalink / raw)
  To: Jiufei Xue; +Cc: Miklos Szeredi, overlayfs, linux-fsdevel

On Tue, Nov 19, 2019 at 4:14 AM Jiufei Xue <jiufei.xue@linux.alibaba.com> wrote:
>
> This isn't cause any behavior changes and will be used by overlay
> async IO implementation.
>
> Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
> ---
>  fs/read_write.c    | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/fs.h | 16 +++++++++++++++
>  2 files changed, 74 insertions(+)
>
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 5bbf587..3dfbcec 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -984,6 +984,64 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
>  }
>  EXPORT_SYMBOL(vfs_iter_write);
>
> +ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
> +                          struct iov_iter *iter)
> +{
> +       ssize_t ret = 0;
> +       ssize_t tot_len;
> +
> +       if (!file->f_op->read_iter)
> +               return -EINVAL;
> +       if (!(file->f_mode & FMODE_READ))
> +               return -EBADF;
> +       if (!(file->f_mode & FMODE_CAN_READ))
> +               return -EINVAL;
> +
> +       tot_len = iov_iter_count(iter);
> +       if (!tot_len)
> +               return 0;
> +
> +       ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len);
> +       if (ret < 0)
> +               return ret;
> +
> +       ret = call_read_iter(file, iocb, iter);
> +       if (ret >= 0)
> +               fsnotify_access(file);
> +
> +       return ret;
> +}
> +EXPORT_SYMBOL(vfs_iocb_iter_read);
> +
> +ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
> +                           struct iov_iter *iter)
> +{
> +       ssize_t ret = 0;
> +       ssize_t tot_len;
> +
> +       if (!file->f_op->write_iter)
> +               return -EINVAL;
> +       if (!(file->f_mode & FMODE_WRITE))
> +               return -EBADF;
> +       if (!(file->f_mode & FMODE_CAN_WRITE))
> +               return -EINVAL;
> +
> +       tot_len = iov_iter_count(iter);
> +       if (!tot_len)
> +               return 0;
> +
> +       ret = rw_verify_area(WRITE, file, &iocb->ki_pos, tot_len);
> +       if (ret < 0)
> +               return ret;
> +
> +       ret = call_write_iter(file, iocb, iter);
> +       if (ret >= 0)
> +               fsnotify_modify(file);
> +
> +       return ret;
> +}
> +EXPORT_SYMBOL(vfs_iocb_iter_write);
> +

If it was up to me, I would pass down an optional iocb pointer
to the do_iter_XXX static helpers, instead of duplicating the code.
Others may find your approach cleaner, so let's see what other
people think.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] ovl: implement async IO routines
  2019-11-19  2:14 ` [PATCH 2/2] ovl: implement async IO routines Jiufei Xue
@ 2019-11-19  4:22   ` Amir Goldstein
  2019-11-19  8:37     ` Jiufei Xue
  0 siblings, 1 reply; 11+ messages in thread
From: Amir Goldstein @ 2019-11-19  4:22 UTC (permalink / raw)
  To: Jiufei Xue; +Cc: Miklos Szeredi, overlayfs, linux-fsdevel

On Tue, Nov 19, 2019 at 4:14 AM Jiufei Xue <jiufei.xue@linux.alibaba.com> wrote:
>
> A performance regression is observed since linux v4.19 when we do aio
> test using fio with iodepth 128 on overlayfs. And we found that queue
> depth of the device is always 1 which is unexpected.
>
> After investigation, it is found that commit 16914e6fc7
> (“ovl: add ovl_read_iter()”) and commit 2a92e07edc
> (“ovl: add ovl_write_iter()”) use do_iter_readv_writev() to submit
> requests to real filesystem. Async IOs are converted to sync IOs here
> and cause performance regression.
>
> So implement async IO for stacked reading and writing.
>
> Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
> ---
>  fs/overlayfs/file.c      | 97 +++++++++++++++++++++++++++++++++++++++++-------
>  fs/overlayfs/overlayfs.h |  2 +
>  fs/overlayfs/super.c     | 12 +++++-
>  3 files changed, 95 insertions(+), 16 deletions(-)
>
> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> index e235a63..07d94e7 100644
> --- a/fs/overlayfs/file.c
> +++ b/fs/overlayfs/file.c
> @@ -11,6 +11,14 @@
>  #include <linux/uaccess.h>
>  #include "overlayfs.h"
>
> +struct ovl_aio_req {
> +       struct kiocb iocb;
> +       struct kiocb *orig_iocb;
> +       struct fd fd;
> +};
> +
> +static struct kmem_cache *ovl_aio_request_cachep;
> +
>  static char ovl_whatisit(struct inode *inode, struct inode *realinode)
>  {
>         if (realinode != ovl_inode_upper(inode))
> @@ -225,6 +233,21 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
>         return flags;
>  }
>
> +static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2)
> +{
> +       struct ovl_aio_req *aio_req = container_of(iocb, struct ovl_aio_req, iocb);
> +       struct kiocb *orig_iocb = aio_req->orig_iocb;
> +
> +       if (iocb->ki_flags & IOCB_WRITE)
> +               file_end_write(iocb->ki_filp);
> +
> +       orig_iocb->ki_pos = iocb->ki_pos;
> +       orig_iocb->ki_complete(orig_iocb, res, res2);
> +
> +       fdput(aio_req->fd);
> +       kmem_cache_free(ovl_aio_request_cachep, aio_req);
> +}
> +
>  static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
>  {
>         struct file *file = iocb->ki_filp;
> @@ -240,14 +263,28 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
>                 return ret;
>
>         old_cred = ovl_override_creds(file_inode(file)->i_sb);
> -       ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
> -                           ovl_iocb_to_rwf(iocb));
> +       if (is_sync_kiocb(iocb)) {
> +               ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
> +                                   ovl_iocb_to_rwf(iocb));
> +               ovl_file_accessed(file);
> +               fdput(real);
> +       } else {
> +               struct ovl_aio_req *aio_req = kmem_cache_alloc(ovl_aio_request_cachep,
> +                                                              GFP_NOFS);
> +               aio_req->fd = real;
> +               aio_req->orig_iocb = iocb;
> +               kiocb_clone(&aio_req->iocb, iocb, real.file);
> +               aio_req->iocb.ki_complete = ovl_aio_rw_complete;
> +               ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
> +               ovl_file_accessed(file);

That should be done in completion/error

> +               if (ret != -EIOCBQUEUED) {
> +                       iocb->ki_pos = aio_req->iocb.ki_pos;
> +                       fdput(real);
> +                       kmem_cache_free(ovl_aio_request_cachep, aio_req);
> +               }

Suggest cleanup helper for completion/error


> +       }
>         revert_creds(old_cred);
>
> -       ovl_file_accessed(file);
> -
> -       fdput(real);
> -
>         return ret;
>  }
>
> @@ -275,16 +312,32 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
>
>         old_cred = ovl_override_creds(file_inode(file)->i_sb);
>         file_start_write(real.file);
> -       ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
> -                            ovl_iocb_to_rwf(iocb));
> -       file_end_write(real.file);
> +       if (is_sync_kiocb(iocb)) {
> +               ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
> +                                    ovl_iocb_to_rwf(iocb));
> +               file_end_write(real.file);
> +               /* Update size */
> +               ovl_copyattr(ovl_inode_real(inode), inode);
> +               fdput(real);
> +       } else {
> +               struct ovl_aio_req *aio_req = kmem_cache_alloc(ovl_aio_request_cachep,
> +                                                              GFP_NOFS);
> +               aio_req->fd = real;
> +               aio_req->orig_iocb = iocb;
> +               kiocb_clone(&aio_req->iocb, iocb, real.file);
> +               aio_req->iocb.ki_complete = ovl_aio_rw_complete;
> +               ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
> +               /* Update size */
> +               ovl_copyattr(ovl_inode_real(inode), inode);

That should be in completion/error

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] ovl: implement async IO routines
  2019-11-19  4:22   ` Amir Goldstein
@ 2019-11-19  8:37     ` Jiufei Xue
  2019-11-19  9:38       ` Amir Goldstein
  0 siblings, 1 reply; 11+ messages in thread
From: Jiufei Xue @ 2019-11-19  8:37 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Miklos Szeredi, overlayfs, linux-fsdevel

Hi Amir,

On 2019/11/19 下午12:22, Amir Goldstein wrote:
> On Tue, Nov 19, 2019 at 4:14 AM Jiufei Xue <jiufei.xue@linux.alibaba.com> wrote:
>>
>> A performance regression is observed since linux v4.19 when we do aio
>> test using fio with iodepth 128 on overlayfs. And we found that queue
>> depth of the device is always 1 which is unexpected.
>>
>> After investigation, it is found that commit 16914e6fc7
>> (“ovl: add ovl_read_iter()”) and commit 2a92e07edc
>> (“ovl: add ovl_write_iter()”) use do_iter_readv_writev() to submit
>> requests to real filesystem. Async IOs are converted to sync IOs here
>> and cause performance regression.
>>
>> So implement async IO for stacked reading and writing.
>>
>> Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
>> ---
>>  fs/overlayfs/file.c      | 97 +++++++++++++++++++++++++++++++++++++++++-------
>>  fs/overlayfs/overlayfs.h |  2 +
>>  fs/overlayfs/super.c     | 12 +++++-
>>  3 files changed, 95 insertions(+), 16 deletions(-)
>>
>> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
>> index e235a63..07d94e7 100644
>> --- a/fs/overlayfs/file.c
>> +++ b/fs/overlayfs/file.c
>> @@ -11,6 +11,14 @@
>>  #include <linux/uaccess.h>
>>  #include "overlayfs.h"
>>
>> +struct ovl_aio_req {
>> +       struct kiocb iocb;
>> +       struct kiocb *orig_iocb;
>> +       struct fd fd;
>> +};
>> +
>> +static struct kmem_cache *ovl_aio_request_cachep;
>> +
>>  static char ovl_whatisit(struct inode *inode, struct inode *realinode)
>>  {
>>         if (realinode != ovl_inode_upper(inode))
>> @@ -225,6 +233,21 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
>>         return flags;
>>  }
>>
>> +static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2)
>> +{
>> +       struct ovl_aio_req *aio_req = container_of(iocb, struct ovl_aio_req, iocb);
>> +       struct kiocb *orig_iocb = aio_req->orig_iocb;
>> +
>> +       if (iocb->ki_flags & IOCB_WRITE)
>> +               file_end_write(iocb->ki_filp);
>> +
>> +       orig_iocb->ki_pos = iocb->ki_pos;
>> +       orig_iocb->ki_complete(orig_iocb, res, res2);
>> +
>> +       fdput(aio_req->fd);
>> +       kmem_cache_free(ovl_aio_request_cachep, aio_req);
>> +}
>> +
>>  static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
>>  {
>>         struct file *file = iocb->ki_filp;
>> @@ -240,14 +263,28 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
>>                 return ret;
>>
>>         old_cred = ovl_override_creds(file_inode(file)->i_sb);
>> -       ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
>> -                           ovl_iocb_to_rwf(iocb));
>> +       if (is_sync_kiocb(iocb)) {
>> +               ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
>> +                                   ovl_iocb_to_rwf(iocb));
>> +               ovl_file_accessed(file);
>> +               fdput(real);
>> +       } else {
>> +               struct ovl_aio_req *aio_req = kmem_cache_alloc(ovl_aio_request_cachep,
>> +                                                              GFP_NOFS);
>> +               aio_req->fd = real;
>> +               aio_req->orig_iocb = iocb;
>> +               kiocb_clone(&aio_req->iocb, iocb, real.file);
>> +               aio_req->iocb.ki_complete = ovl_aio_rw_complete;
>> +               ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
>> +               ovl_file_accessed(file);
> 
> That should be done in completion/error
>

Refer to function generic_file_read_iter(), in direct IO path,
file_accessed() is done before IO submission, so I think ovl_file_accessed()
should be done here no matter completion/error or IO is queued.

>> +               if (ret != -EIOCBQUEUED) {
>> +                       iocb->ki_pos = aio_req->iocb.ki_pos;
>> +                       fdput(real);
>> +                       kmem_cache_free(ovl_aio_request_cachep, aio_req);
>> +               }
> 
> Suggest cleanup helper for completion/error
> 
Yes, that will be more clearly. I will add a cleanup helper in version 2.

> 
>> +       }
>>         revert_creds(old_cred);
>>
>> -       ovl_file_accessed(file);
>> -
>> -       fdput(real);
>> -
>>         return ret;
>>  }
>>
>> @@ -275,16 +312,32 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
>>
>>         old_cred = ovl_override_creds(file_inode(file)->i_sb);
>>         file_start_write(real.file);
>> -       ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
>> -                            ovl_iocb_to_rwf(iocb));
>> -       file_end_write(real.file);
>> +       if (is_sync_kiocb(iocb)) {
>> +               ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
>> +                                    ovl_iocb_to_rwf(iocb));
>> +               file_end_write(real.file);
>> +               /* Update size */
>> +               ovl_copyattr(ovl_inode_real(inode), inode);
>> +               fdput(real);
>> +       } else {
>> +               struct ovl_aio_req *aio_req = kmem_cache_alloc(ovl_aio_request_cachep,
>> +                                                              GFP_NOFS);
>> +               aio_req->fd = real;
>> +               aio_req->orig_iocb = iocb;
>> +               kiocb_clone(&aio_req->iocb, iocb, real.file);
>> +               aio_req->iocb.ki_complete = ovl_aio_rw_complete;
>> +               ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
>> +               /* Update size */
>> +               ovl_copyattr(ovl_inode_real(inode), inode);
> 
> That should be in completion/error
> 
Yes, I will move it to the newly added cleanup helper.

Thanks,
Jiufei

> Thanks,
> Amir.
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] vfs: add vfs_iocb_iter_[read|write] helper functions
  2019-11-19  3:14   ` Amir Goldstein
@ 2019-11-19  8:40     ` Jiufei Xue
  0 siblings, 0 replies; 11+ messages in thread
From: Jiufei Xue @ 2019-11-19  8:40 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Miklos Szeredi, overlayfs, linux-fsdevel


Hi Amir,
On 2019/11/19 上午11:14, Amir Goldstein wrote:
> On Tue, Nov 19, 2019 at 4:14 AM Jiufei Xue <jiufei.xue@linux.alibaba.com> wrote:
>>
>> This isn't cause any behavior changes and will be used by overlay
>> async IO implementation.
>>
>> Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
>> ---
>>  fs/read_write.c    | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  include/linux/fs.h | 16 +++++++++++++++
>>  2 files changed, 74 insertions(+)
>>
>> diff --git a/fs/read_write.c b/fs/read_write.c
>> index 5bbf587..3dfbcec 100644
>> --- a/fs/read_write.c
>> +++ b/fs/read_write.c
>> @@ -984,6 +984,64 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
>>  }
>>  EXPORT_SYMBOL(vfs_iter_write);
>>
>> +ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
>> +                          struct iov_iter *iter)
>> +{
>> +       ssize_t ret = 0;
>> +       ssize_t tot_len;
>> +
>> +       if (!file->f_op->read_iter)
>> +               return -EINVAL;
>> +       if (!(file->f_mode & FMODE_READ))
>> +               return -EBADF;
>> +       if (!(file->f_mode & FMODE_CAN_READ))
>> +               return -EINVAL;
>> +
>> +       tot_len = iov_iter_count(iter);
>> +       if (!tot_len)
>> +               return 0;
>> +
>> +       ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len);
>> +       if (ret < 0)
>> +               return ret;
>> +
>> +       ret = call_read_iter(file, iocb, iter);
>> +       if (ret >= 0)
>> +               fsnotify_access(file);
>> +
>> +       return ret;
>> +}
>> +EXPORT_SYMBOL(vfs_iocb_iter_read);
>> +
>> +ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
>> +                           struct iov_iter *iter)
>> +{
>> +       ssize_t ret = 0;
>> +       ssize_t tot_len;
>> +
>> +       if (!file->f_op->write_iter)
>> +               return -EINVAL;
>> +       if (!(file->f_mode & FMODE_WRITE))
>> +               return -EBADF;
>> +       if (!(file->f_mode & FMODE_CAN_WRITE))
>> +               return -EINVAL;
>> +
>> +       tot_len = iov_iter_count(iter);
>> +       if (!tot_len)
>> +               return 0;
>> +
>> +       ret = rw_verify_area(WRITE, file, &iocb->ki_pos, tot_len);
>> +       if (ret < 0)
>> +               return ret;
>> +
>> +       ret = call_write_iter(file, iocb, iter);
>> +       if (ret >= 0)
>> +               fsnotify_modify(file);
>> +
>> +       return ret;
>> +}
>> +EXPORT_SYMBOL(vfs_iocb_iter_write);
>> +
> 
> If it was up to me, I would pass down an optional iocb pointer
> to the do_iter_XXX static helpers, instead of duplicating the code.
> Others may find your approach cleaner, so let's see what other
> people think.
>

Thanks for your review. I have considered your suggestion and
still think that adding new helpers are more clearly.

Let's wait for other people's opinions.

Thanks,
Jiufei

> Thanks,
> Amir.
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] ovl: implement async IO routines
  2019-11-19  8:37     ` Jiufei Xue
@ 2019-11-19  9:38       ` Amir Goldstein
  2019-11-19 11:22         ` Jiufei Xue
  0 siblings, 1 reply; 11+ messages in thread
From: Amir Goldstein @ 2019-11-19  9:38 UTC (permalink / raw)
  To: Jiufei Xue; +Cc: Miklos Szeredi, overlayfs, linux-fsdevel

On Tue, Nov 19, 2019 at 10:37 AM Jiufei Xue
<jiufei.xue@linux.alibaba.com> wrote:
>
> Hi Amir,
>
> On 2019/11/19 下午12:22, Amir Goldstein wrote:
> > On Tue, Nov 19, 2019 at 4:14 AM Jiufei Xue <jiufei.xue@linux.alibaba.com> wrote:
> >>
> >> A performance regression is observed since linux v4.19 when we do aio
> >> test using fio with iodepth 128 on overlayfs. And we found that queue
> >> depth of the device is always 1 which is unexpected.
> >>
> >> After investigation, it is found that commit 16914e6fc7
> >> (“ovl: add ovl_read_iter()”) and commit 2a92e07edc
> >> (“ovl: add ovl_write_iter()”) use do_iter_readv_writev() to submit
> >> requests to real filesystem. Async IOs are converted to sync IOs here
> >> and cause performance regression.
> >>
> >> So implement async IO for stacked reading and writing.
> >>
> >> Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
> >> ---
> >>  fs/overlayfs/file.c      | 97 +++++++++++++++++++++++++++++++++++++++++-------
> >>  fs/overlayfs/overlayfs.h |  2 +
> >>  fs/overlayfs/super.c     | 12 +++++-
> >>  3 files changed, 95 insertions(+), 16 deletions(-)
> >>
> >> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> >> index e235a63..07d94e7 100644
> >> --- a/fs/overlayfs/file.c
> >> +++ b/fs/overlayfs/file.c
> >> @@ -11,6 +11,14 @@
> >>  #include <linux/uaccess.h>
> >>  #include "overlayfs.h"
> >>
> >> +struct ovl_aio_req {
> >> +       struct kiocb iocb;
> >> +       struct kiocb *orig_iocb;
> >> +       struct fd fd;
> >> +};
> >> +
> >> +static struct kmem_cache *ovl_aio_request_cachep;
> >> +
> >>  static char ovl_whatisit(struct inode *inode, struct inode *realinode)
> >>  {
> >>         if (realinode != ovl_inode_upper(inode))
> >> @@ -225,6 +233,21 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
> >>         return flags;
> >>  }
> >>
> >> +static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2)
> >> +{
> >> +       struct ovl_aio_req *aio_req = container_of(iocb, struct ovl_aio_req, iocb);
> >> +       struct kiocb *orig_iocb = aio_req->orig_iocb;
> >> +
> >> +       if (iocb->ki_flags & IOCB_WRITE)
> >> +               file_end_write(iocb->ki_filp);
> >> +
> >> +       orig_iocb->ki_pos = iocb->ki_pos;
> >> +       orig_iocb->ki_complete(orig_iocb, res, res2);
> >> +
> >> +       fdput(aio_req->fd);
> >> +       kmem_cache_free(ovl_aio_request_cachep, aio_req);
> >> +}
> >> +
> >>  static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> >>  {
> >>         struct file *file = iocb->ki_filp;
> >> @@ -240,14 +263,28 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> >>                 return ret;
> >>
> >>         old_cred = ovl_override_creds(file_inode(file)->i_sb);
> >> -       ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
> >> -                           ovl_iocb_to_rwf(iocb));
> >> +       if (is_sync_kiocb(iocb)) {
> >> +               ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
> >> +                                   ovl_iocb_to_rwf(iocb));
> >> +               ovl_file_accessed(file);
> >> +               fdput(real);
> >> +       } else {
> >> +               struct ovl_aio_req *aio_req = kmem_cache_alloc(ovl_aio_request_cachep,
> >> +                                                              GFP_NOFS);
> >> +               aio_req->fd = real;
> >> +               aio_req->orig_iocb = iocb;
> >> +               kiocb_clone(&aio_req->iocb, iocb, real.file);
> >> +               aio_req->iocb.ki_complete = ovl_aio_rw_complete;
> >> +               ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
> >> +               ovl_file_accessed(file);
> >
> > That should be done in completion/error
> >
>
> Refer to function generic_file_read_iter(), in direct IO path,
> file_accessed() is done before IO submission, so I think ovl_file_accessed()
> should be done here no matter completion/error or IO is queued.

Mmm, it doesn't matter much if atime is updated before or after,
but ovl_file_accessed() does not only update atime, it also copies
ctime which could have been modified as a result of the io, so
I think it is safer to put it in the cleanup hook.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] ovl: implement async IO routines
  2019-11-19  9:38       ` Amir Goldstein
@ 2019-11-19 11:22         ` Jiufei Xue
  2019-11-19 12:50           ` Amir Goldstein
  0 siblings, 1 reply; 11+ messages in thread
From: Jiufei Xue @ 2019-11-19 11:22 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Miklos Szeredi, overlayfs, linux-fsdevel


Hi Amir,
On 2019/11/19 下午5:38, Amir Goldstein wrote:
> On Tue, Nov 19, 2019 at 10:37 AM Jiufei Xue
> <jiufei.xue@linux.alibaba.com> wrote:
>>
>> Hi Amir,
>>
>> On 2019/11/19 下午12:22, Amir Goldstein wrote:
>>> On Tue, Nov 19, 2019 at 4:14 AM Jiufei Xue <jiufei.xue@linux.alibaba.com> wrote:
>>>>
>>>> A performance regression is observed since linux v4.19 when we do aio
>>>> test using fio with iodepth 128 on overlayfs. And we found that queue
>>>> depth of the device is always 1 which is unexpected.
>>>>
>>>> After investigation, it is found that commit 16914e6fc7
>>>> (“ovl: add ovl_read_iter()”) and commit 2a92e07edc
>>>> (“ovl: add ovl_write_iter()”) use do_iter_readv_writev() to submit
>>>> requests to real filesystem. Async IOs are converted to sync IOs here
>>>> and cause performance regression.
>>>>
>>>> So implement async IO for stacked reading and writing.
>>>>
>>>> Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
>>>> ---
>>>>  fs/overlayfs/file.c      | 97 +++++++++++++++++++++++++++++++++++++++++-------
>>>>  fs/overlayfs/overlayfs.h |  2 +
>>>>  fs/overlayfs/super.c     | 12 +++++-
>>>>  3 files changed, 95 insertions(+), 16 deletions(-)
>>>>
>>>> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
>>>> index e235a63..07d94e7 100644
>>>> --- a/fs/overlayfs/file.c
>>>> +++ b/fs/overlayfs/file.c
>>>> @@ -11,6 +11,14 @@
>>>>  #include <linux/uaccess.h>
>>>>  #include "overlayfs.h"
>>>>
>>>> +struct ovl_aio_req {
>>>> +       struct kiocb iocb;
>>>> +       struct kiocb *orig_iocb;
>>>> +       struct fd fd;
>>>> +};
>>>> +
>>>> +static struct kmem_cache *ovl_aio_request_cachep;
>>>> +
>>>>  static char ovl_whatisit(struct inode *inode, struct inode *realinode)
>>>>  {
>>>>         if (realinode != ovl_inode_upper(inode))
>>>> @@ -225,6 +233,21 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
>>>>         return flags;
>>>>  }
>>>>
>>>> +static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2)
>>>> +{
>>>> +       struct ovl_aio_req *aio_req = container_of(iocb, struct ovl_aio_req, iocb);
>>>> +       struct kiocb *orig_iocb = aio_req->orig_iocb;
>>>> +
>>>> +       if (iocb->ki_flags & IOCB_WRITE)
>>>> +               file_end_write(iocb->ki_filp);
>>>> +
>>>> +       orig_iocb->ki_pos = iocb->ki_pos;
>>>> +       orig_iocb->ki_complete(orig_iocb, res, res2);
>>>> +
>>>> +       fdput(aio_req->fd);
>>>> +       kmem_cache_free(ovl_aio_request_cachep, aio_req);
>>>> +}
>>>> +
>>>>  static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
>>>>  {
>>>>         struct file *file = iocb->ki_filp;
>>>> @@ -240,14 +263,28 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
>>>>                 return ret;
>>>>
>>>>         old_cred = ovl_override_creds(file_inode(file)->i_sb);
>>>> -       ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
>>>> -                           ovl_iocb_to_rwf(iocb));
>>>> +       if (is_sync_kiocb(iocb)) {
>>>> +               ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
>>>> +                                   ovl_iocb_to_rwf(iocb));
>>>> +               ovl_file_accessed(file);
>>>> +               fdput(real);
>>>> +       } else {
>>>> +               struct ovl_aio_req *aio_req = kmem_cache_alloc(ovl_aio_request_cachep,
>>>> +                                                              GFP_NOFS);
>>>> +               aio_req->fd = real;
>>>> +               aio_req->orig_iocb = iocb;
>>>> +               kiocb_clone(&aio_req->iocb, iocb, real.file);
>>>> +               aio_req->iocb.ki_complete = ovl_aio_rw_complete;
>>>> +               ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
>>>> +               ovl_file_accessed(file);
>>>
>>> That should be done in completion/error
>>>
>>
>> Refer to function generic_file_read_iter(), in direct IO path,
>> file_accessed() is done before IO submission, so I think ovl_file_accessed()
>> should be done here no matter completion/error or IO is queued.
> 
> Mmm, it doesn't matter much if atime is updated before or after,
> but ovl_file_accessed() does not only update atime, it also copies
> ctime which could have been modified as a result of the io, so
> I think it is safer to put it in the cleanup hook.
>

Can you give a more detailed description that a read op will modify
ctime as a result of the io?

I found that it will trigger BUG_ON(irqs_disabled()) while
calling ovl_file_accessed() on async IO return path. The calltrace
is pasted below:

ovl_file_accessed
  -> touch_atime
    -> ovl_update_time
      -> generic_update_time
        -> __mark_inode_dirty
          -> ext4_dirty_inode
            -> __ext4_get_inode_loc
              -> __find_get_block
                -> lookup_bh_lru
                   -> check_irqs_on

So I need more detail to find how to fix this issue.

Thanks,
Jiufei.

> Thanks,
> Amir.
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] ovl: implement async IO routines
  2019-11-19 11:22         ` Jiufei Xue
@ 2019-11-19 12:50           ` Amir Goldstein
  0 siblings, 0 replies; 11+ messages in thread
From: Amir Goldstein @ 2019-11-19 12:50 UTC (permalink / raw)
  To: Jiufei Xue; +Cc: Miklos Szeredi, overlayfs, linux-fsdevel

> >>
> >> Refer to function generic_file_read_iter(), in direct IO path,
> >> file_accessed() is done before IO submission, so I think ovl_file_accessed()
> >> should be done here no matter completion/error or IO is queued.
> >
> > Mmm, it doesn't matter much if atime is updated before or after,
> > but ovl_file_accessed() does not only update atime, it also copies
> > ctime which could have been modified as a result of the io, so
> > I think it is safer to put it in the cleanup hook.
> >
>
> Can you give a more detailed description that a read op will modify
> ctime as a result of the io?
>
> I found that it will trigger BUG_ON(irqs_disabled()) while
> calling ovl_file_accessed() on async IO return path. The calltrace
> is pasted below:
>
> ovl_file_accessed
>   -> touch_atime
>     -> ovl_update_time
>       -> generic_update_time
>         -> __mark_inode_dirty
>           -> ext4_dirty_inode
>             -> __ext4_get_inode_loc
>               -> __find_get_block
>                 -> lookup_bh_lru
>                    -> check_irqs_on
>
> So I need more detail to find how to fix this issue.
>

It's not important. Please ignore. I din't know there was an issue
with placing touch_atime() in completion context.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/2] vfs: add vfs_iocb_iter_[read|write] helper functions
  2019-11-20  9:45 [PATCH V2 0/2] " Jiufei Xue
@ 2019-11-20  9:45 ` Jiufei Xue
  0 siblings, 0 replies; 11+ messages in thread
From: Jiufei Xue @ 2019-11-20  9:45 UTC (permalink / raw)
  To: miklos, amir73il; +Cc: linux-unionfs, linux-fsdevel

This isn't cause any behavior changes and will be used by overlay
async IO implementation.

Signed-off-by: Jiufei Xue <jiufei.xue@linux.alibaba.com>
---
 fs/read_write.c    | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h | 16 +++++++++++++++
 2 files changed, 74 insertions(+)

diff --git a/fs/read_write.c b/fs/read_write.c
index 5bbf587..3dfbcec 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -984,6 +984,64 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
 }
 EXPORT_SYMBOL(vfs_iter_write);
 
+ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
+			   struct iov_iter *iter)
+{
+	ssize_t ret = 0;
+	ssize_t tot_len;
+
+	if (!file->f_op->read_iter)
+		return -EINVAL;
+	if (!(file->f_mode & FMODE_READ))
+		return -EBADF;
+	if (!(file->f_mode & FMODE_CAN_READ))
+		return -EINVAL;
+
+	tot_len = iov_iter_count(iter);
+	if (!tot_len)
+		return 0;
+
+	ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len);
+	if (ret < 0)
+		return ret;
+
+	ret = call_read_iter(file, iocb, iter);
+	if (ret >= 0)
+		fsnotify_access(file);
+
+	return ret;
+}
+EXPORT_SYMBOL(vfs_iocb_iter_read);
+
+ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
+			    struct iov_iter *iter)
+{
+	ssize_t ret = 0;
+	ssize_t tot_len;
+
+	if (!file->f_op->write_iter)
+		return -EINVAL;
+	if (!(file->f_mode & FMODE_WRITE))
+		return -EBADF;
+	if (!(file->f_mode & FMODE_CAN_WRITE))
+		return -EINVAL;
+
+	tot_len = iov_iter_count(iter);
+	if (!tot_len)
+		return 0;
+
+	ret = rw_verify_area(WRITE, file, &iocb->ki_pos, tot_len);
+	if (ret < 0)
+		return ret;
+
+	ret = call_write_iter(file, iocb, iter);
+	if (ret >= 0)
+		fsnotify_modify(file);
+
+	return ret;
+}
+EXPORT_SYMBOL(vfs_iocb_iter_write);
+
 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
 		  unsigned long vlen, loff_t *pos, rwf_t flags)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e0d909d..c885279 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2071,6 +2071,18 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
 	};
 }
 
+static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
+			       struct file *filp)
+{
+        *kiocb = (struct kiocb) {
+                .ki_filp = filp,
+                .ki_flags = kiocb_src->ki_flags,
+                .ki_hint = ki_hint_validate(file_write_hint(filp)),
+                .ki_ioprio = kiocb_src->ki_ioprio,
+                .ki_pos = kiocb_src->ki_pos,
+        };
+}
+
 /*
  * Inode state bits.  Protected by inode->i_lock
  *
@@ -3105,6 +3117,10 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
 		rwf_t flags);
 ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
 		rwf_t flags);
+ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
+			   struct iov_iter *iter);
+ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
+			    struct iov_iter *iter);
 
 /* fs/block_dev.c */
 extern ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2019-11-20  9:45 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-19  2:14 [PATCH 0/2] ovl: implement async IO routines Jiufei Xue
2019-11-19  2:14 ` [PATCH 1/2] vfs: add vfs_iocb_iter_[read|write] helper functions Jiufei Xue
2019-11-19  3:14   ` Amir Goldstein
2019-11-19  8:40     ` Jiufei Xue
2019-11-19  2:14 ` [PATCH 2/2] ovl: implement async IO routines Jiufei Xue
2019-11-19  4:22   ` Amir Goldstein
2019-11-19  8:37     ` Jiufei Xue
2019-11-19  9:38       ` Amir Goldstein
2019-11-19 11:22         ` Jiufei Xue
2019-11-19 12:50           ` Amir Goldstein
2019-11-20  9:45 [PATCH V2 0/2] " Jiufei Xue
2019-11-20  9:45 ` [PATCH 1/2] vfs: add vfs_iocb_iter_[read|write] helper functions Jiufei Xue

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).