linux-unionfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH 0/3] ovl: stacked mmap for shared map
@ 2020-08-29  9:50 Chengguang Xu
  2020-08-29  9:50 ` [RFC PATCH 1/3] mm: mmap: export necessary functions for overlayfs' mmap Chengguang Xu
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Chengguang Xu @ 2020-08-29  9:50 UTC (permalink / raw)
  To: linux-unionfs, linux-mm; +Cc: miklos, akpm, amir73il, riteshh, Chengguang Xu

Currently, there is still ro/rw inconsistency related to shared mmap
in overlayfs, this patch set implements stacked mmap for shared map
and transfer necessary operations to upper inode, so that we can keep
data consistency in any kind of mmap. 

Patch 1 exports necessary functions from kernel to module.
Patch 2 introduces struct ovl_file_entry to store real vm_ops.
Patch 3 implements stacked mmap for shared map to keep data
consistency.

Chengguang Xu (3):
  mm: mmap: export necessary functions for overlayfs' mmap
  ovl: introduce struct ovl_file_entry
  ovl: implement stacked mmap for shared map

 fs/overlayfs/file.c | 178 ++++++++++++++++++++++++++++++++++++++++----
 include/linux/mm.h  |   2 +
 mm/filemap.c        |  28 +++++++
 mm/internal.h       |  22 ------
 4 files changed, 195 insertions(+), 35 deletions(-)

-- 
2.20.1



^ permalink raw reply	[flat|nested] 9+ messages in thread

* [RFC PATCH 1/3] mm: mmap: export necessary functions for overlayfs' mmap
  2020-08-29  9:50 [RFC PATCH 0/3] ovl: stacked mmap for shared map Chengguang Xu
@ 2020-08-29  9:50 ` Chengguang Xu
  2020-08-29  9:51 ` [RFC PATCH 2/3] ovl: introduce struct ovl_file_entry Chengguang Xu
  2020-08-29  9:51 ` [RFC PATCH 3/3] ovl: implement stacked mmap for shared map Chengguang Xu
  2 siblings, 0 replies; 9+ messages in thread
From: Chengguang Xu @ 2020-08-29  9:50 UTC (permalink / raw)
  To: linux-unionfs, linux-mm; +Cc: miklos, akpm, amir73il, riteshh, Chengguang Xu

In shared mode mmap, if overlayfs' inode does not have data
in upper layer, it should call maybe_unlock_mmap_for_io()
to release lock and waiting for IO in ->fault handler.
Meanwhile, in order to avoid endless retry we should also
check flag FAULT_FLAG_TRIED carefully in ->fault handler.

Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
---
 include/linux/mm.h |  2 ++
 mm/filemap.c       | 28 ++++++++++++++++++++++++++++
 mm/internal.h      | 22 ----------------------
 3 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index dc7b87310c10..214b23734eed 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1656,6 +1656,8 @@ void unmap_mapping_pages(struct address_space *mapping,
 		pgoff_t start, pgoff_t nr, bool even_cows);
 void unmap_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen, int even_cows);
+struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, struct file *fpin);
+bool fault_flag_check(struct vm_fault *vmf, unsigned int flag);
 #else
 static inline vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
 		unsigned long address, unsigned int flags)
diff --git a/mm/filemap.c b/mm/filemap.c
index f0ae9a6308cb..8a226f8ca262 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2704,6 +2704,34 @@ int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
 		return -EINVAL;
 	return generic_file_mmap(file, vma);
 }
+
+struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
+				      struct file *fpin)
+{
+	int flags = vmf->flags;
+
+	if (fpin)
+		return fpin;
+
+	/*
+	 * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or
+	 * anything, so we only pin the file and drop the mmap_lock if only
+	 * FAULT_FLAG_ALLOW_RETRY is set, while this is the first attempt.
+	 */
+	if (fault_flag_allow_retry_first(flags) &&
+	    !(flags & FAULT_FLAG_RETRY_NOWAIT)) {
+		fpin = get_file(vmf->vma->vm_file);
+		mmap_read_unlock(vmf->vma->vm_mm);
+	}
+	return fpin;
+}
+EXPORT_SYMBOL(maybe_unlock_mmap_for_io);
+
+bool fault_flag_check(struct vm_fault *vmf, unsigned int flag)
+{
+	return vmf->flags & flag;
+}
+EXPORT_SYMBOL(fault_flag_check);
 #else
 vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
 {
diff --git a/mm/internal.h b/mm/internal.h
index 9886db20d94f..ef19235c6bf1 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -402,28 +402,6 @@ vma_address(struct page *page, struct vm_area_struct *vma)
 
 	return max(start, vma->vm_start);
 }
-
-static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
-						    struct file *fpin)
-{
-	int flags = vmf->flags;
-
-	if (fpin)
-		return fpin;
-
-	/*
-	 * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or
-	 * anything, so we only pin the file and drop the mmap_lock if only
-	 * FAULT_FLAG_ALLOW_RETRY is set, while this is the first attempt.
-	 */
-	if (fault_flag_allow_retry_first(flags) &&
-	    !(flags & FAULT_FLAG_RETRY_NOWAIT)) {
-		fpin = get_file(vmf->vma->vm_file);
-		mmap_read_unlock(vmf->vma->vm_mm);
-	}
-	return fpin;
-}
-
 #else /* !CONFIG_MMU */
 static inline void clear_page_mlock(struct page *page) { }
 static inline void mlock_vma_page(struct page *page) { }
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [RFC PATCH 2/3] ovl: introduce struct ovl_file_entry
  2020-08-29  9:50 [RFC PATCH 0/3] ovl: stacked mmap for shared map Chengguang Xu
  2020-08-29  9:50 ` [RFC PATCH 1/3] mm: mmap: export necessary functions for overlayfs' mmap Chengguang Xu
@ 2020-08-29  9:51 ` Chengguang Xu
  2020-08-29  9:51 ` [RFC PATCH 3/3] ovl: implement stacked mmap for shared map Chengguang Xu
  2 siblings, 0 replies; 9+ messages in thread
From: Chengguang Xu @ 2020-08-29  9:51 UTC (permalink / raw)
  To: linux-unionfs, linux-mm; +Cc: miklos, akpm, amir73il, riteshh, Chengguang Xu

Introduce new struct ovl_file_entry to store real file
and real vm_ops handler.

Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
---
 fs/overlayfs/file.c | 64 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 54 insertions(+), 10 deletions(-)

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 0d940e29d62b..14ab5344a918 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -21,6 +21,39 @@ struct ovl_aio_req {
 	struct fd fd;
 };
 
+struct ovl_file_entry {
+	struct file *realfile;
+	void *vm_ops;
+};
+
+struct file *ovl_get_realfile(struct file *file)
+{
+	struct ovl_file_entry *ofe = file->private_data;
+
+	return ofe->realfile;
+}
+
+void ovl_set_realfile(struct file *file, struct file *realfile)
+{
+	struct ovl_file_entry *ofe = file->private_data;
+
+	ofe->realfile = realfile;
+}
+
+void *ovl_get_real_vmops(struct file *file)
+{
+	struct ovl_file_entry *ofe = file->private_data;
+
+	return ofe->vm_ops;
+}
+
+void ovl_set_real_vmops(struct file *file, void *vm_ops)
+{
+	struct ovl_file_entry *ofe = file->private_data;
+
+	ofe->vm_ops = vm_ops;
+}
+
 static struct kmem_cache *ovl_aio_request_cachep;
 
 static char ovl_whatisit(struct inode *inode, struct inode *realinode)
@@ -105,14 +138,14 @@ static int ovl_change_flags(struct file *file, unsigned int flags)
 	return 0;
 }
 
-static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
+static int ovl_real_fdget_meta(struct file *file, struct fd *real,
 			       bool allow_meta)
 {
 	struct inode *inode = file_inode(file);
 	struct inode *realinode;
 
 	real->flags = 0;
-	real->file = file->private_data;
+	real->file = ovl_get_realfile(file);
 
 	if (allow_meta)
 		realinode = ovl_inode_real(inode);
@@ -134,7 +167,7 @@ static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
 	return 0;
 }
 
-static int ovl_real_fdget(const struct file *file, struct fd *real)
+static int ovl_real_fdget(struct file *file, struct fd *real)
 {
 	return ovl_real_fdget_meta(file, real, false);
 }
@@ -144,25 +177,36 @@ static int ovl_open(struct inode *inode, struct file *file)
 	struct file *realfile;
 	int err;
 
+	file->private_data = kzalloc(sizeof(struct ovl_file_entry), GFP_KERNEL);
+	if (!file->private_data)
+		return -ENOMEM;
+
 	err = ovl_maybe_copy_up(file_dentry(file), file->f_flags);
 	if (err)
-		return err;
+		goto out;
 
 	/* No longer need these flags, so don't pass them on to underlying fs */
 	file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
 	realfile = ovl_open_realfile(file, ovl_inode_realdata(inode));
-	if (IS_ERR(realfile))
-		return PTR_ERR(realfile);
-
-	file->private_data = realfile;
+	if (IS_ERR(realfile)) {
+		err = PTR_ERR(realfile);
+		goto out;
+	}
 
+	ovl_set_realfile(file, realfile);
 	return 0;
+out:
+	kfree(file->private_data);
+	file->private_data = NULL;
+	return err;
 }
 
 static int ovl_release(struct inode *inode, struct file *file)
 {
-	fput(file->private_data);
+	fput(ovl_get_realfile(file));
+	kfree(file->private_data);
+	file->private_data = NULL;
 
 	return 0;
 }
@@ -451,7 +495,7 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 
 static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	struct file *realfile = file->private_data;
+	struct file *realfile = ovl_get_realfile(file);
 	const struct cred *old_cred;
 	int ret;
 
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [RFC PATCH 3/3] ovl: implement stacked mmap for shared map
  2020-08-29  9:50 [RFC PATCH 0/3] ovl: stacked mmap for shared map Chengguang Xu
  2020-08-29  9:50 ` [RFC PATCH 1/3] mm: mmap: export necessary functions for overlayfs' mmap Chengguang Xu
  2020-08-29  9:51 ` [RFC PATCH 2/3] ovl: introduce struct ovl_file_entry Chengguang Xu
@ 2020-08-29  9:51 ` Chengguang Xu
  2020-08-30 11:33   ` Amir Goldstein
  2 siblings, 1 reply; 9+ messages in thread
From: Chengguang Xu @ 2020-08-29  9:51 UTC (permalink / raw)
  To: linux-unionfs, linux-mm; +Cc: miklos, akpm, amir73il, riteshh, Chengguang Xu

Implement stacked mmap for shared map to keep data
consistency.

Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
---
 fs/overlayfs/file.c | 120 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 114 insertions(+), 6 deletions(-)

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 14ab5344a918..db5ab200d984 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -21,9 +21,17 @@ struct ovl_aio_req {
 	struct fd fd;
 };
 
+static vm_fault_t ovl_fault(struct vm_fault *vmf);
+static vm_fault_t ovl_page_mkwrite(struct vm_fault *vmf);
+
+static const struct vm_operations_struct ovl_vm_ops = {
+	.fault		= ovl_fault,
+	.page_mkwrite	= ovl_page_mkwrite,
+};
+
 struct ovl_file_entry {
 	struct file *realfile;
-	void *vm_ops;
+	const struct vm_operations_struct *vm_ops;
 };
 
 struct file *ovl_get_realfile(struct file *file)
@@ -40,14 +48,15 @@ void ovl_set_realfile(struct file *file, struct file *realfile)
 	ofe->realfile = realfile;
 }
 
-void *ovl_get_real_vmops(struct file *file)
+const struct vm_operations_struct *ovl_get_real_vmops(struct file *file)
 {
 	struct ovl_file_entry *ofe = file->private_data;
 
 	return ofe->vm_ops;
 }
 
-void ovl_set_real_vmops(struct file *file, void *vm_ops)
+void ovl_set_real_vmops(struct file *file,
+			const struct vm_operations_struct *vm_ops)
 {
 	struct ovl_file_entry *ofe = file->private_data;
 
@@ -493,11 +502,104 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	return ret;
 }
 
+vm_fault_t ovl_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct file *file = vma->vm_file;
+	struct file *realfile;
+	struct file *fpin, *tmp;
+	struct inode *inode = file_inode(file);
+	struct inode *realinode;
+	const struct cred *old_cred;
+	bool retry_allowed;
+	vm_fault_t ret;
+	int err = 0;
+
+	if (fault_flag_check(vmf, FAULT_FLAG_TRIED)) {
+		realfile = ovl_get_realfile(file);
+
+		if (!ovl_has_upperdata(inode) ||
+		    realfile->f_inode != ovl_inode_upper(inode) ||
+		    !realfile->f_op->mmap)
+			return VM_FAULT_SIGBUS;
+
+		if (!ovl_get_real_vmops(file)) {
+			old_cred = ovl_override_creds(inode->i_sb);
+			err = call_mmap(realfile, vma);
+			revert_creds(old_cred);
+
+			vma->vm_file = file;
+			if (err) {
+				vma->vm_ops = &ovl_vm_ops;
+				return VM_FAULT_SIGBUS;
+			}
+			ovl_set_real_vmops(file, vma->vm_ops);
+			vma->vm_ops = &ovl_vm_ops;
+		}
+
+		retry_allowed = fault_flag_check(vmf, FAULT_FLAG_ALLOW_RETRY);
+		if (retry_allowed)
+			vma->vm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
+		vma->vm_file = realfile;
+		ret = ovl_get_real_vmops(file)->fault(vmf);
+		vma->vm_file = file;
+		if (retry_allowed)
+			vma->vm_flags |= FAULT_FLAG_ALLOW_RETRY;
+		return ret;
+
+	} else {
+		fpin = maybe_unlock_mmap_for_io(vmf, NULL);
+		if (!fpin)
+			return VM_FAULT_SIGBUS;
+
+		ret = VM_FAULT_RETRY;
+		if (!ovl_has_upperdata(inode)) {
+			err = ovl_copy_up_with_data(file->f_path.dentry);
+			if (err)
+				goto out;
+		}
+
+		realinode = ovl_inode_realdata(inode);
+		realfile = ovl_open_realfile(file, realinode);
+		if (IS_ERR(realfile))
+			goto out;
+
+		tmp = ovl_get_realfile(file);
+		ovl_set_realfile(file, realfile);
+		fput(tmp);
+
+out:
+		fput(fpin);
+		return ret;
+	}
+}
+
+static vm_fault_t ovl_page_mkwrite(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct file *file = vma->vm_file;
+	struct file *realfile;
+	struct inode *inode = file_inode(file);
+	vm_fault_t ret;
+
+	realfile = ovl_get_realfile(file);
+
+	sb_start_pagefault(inode->i_sb);
+	file_update_time(file);
+
+	vma->vm_file = realfile;
+	ret = ovl_get_real_vmops(file)->page_mkwrite(vmf);
+	vma->vm_file = file;
+
+	sb_end_pagefault(inode->i_sb);
+	return ret;
+}
+
 static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct file *realfile = ovl_get_realfile(file);
 	const struct cred *old_cred;
-	int ret;
+	int ret = 0;
 
 	if (!realfile->f_op->mmap)
 		return -ENODEV;
@@ -505,6 +607,13 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
 	if (WARN_ON(file != vma->vm_file))
 		return -EIO;
 
+	if (!ovl_has_upperdata(file_inode(file)) &&
+	    (vma->vm_flags & (VM_SHARED|VM_MAYSHARE))) {
+		vma->vm_ops = &ovl_vm_ops;
+		ovl_file_accessed(file);
+		return 0;
+	}
+
 	vma->vm_file = get_file(realfile);
 
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
@@ -517,10 +626,9 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
 	} else {
 		/* Drop reference count from previous vm_file value */
 		fput(file);
+		ovl_file_accessed(file);
 	}
 
-	ovl_file_accessed(file);
-
 	return ret;
 }
 
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH 3/3] ovl: implement stacked mmap for shared map
  2020-08-29  9:51 ` [RFC PATCH 3/3] ovl: implement stacked mmap for shared map Chengguang Xu
@ 2020-08-30 11:33   ` Amir Goldstein
  2020-08-31 13:47     ` cgxu
  0 siblings, 1 reply; 9+ messages in thread
From: Amir Goldstein @ 2020-08-30 11:33 UTC (permalink / raw)
  To: Chengguang Xu
  Cc: overlayfs, Linux MM, Miklos Szeredi, Andrew Morton, Ritesh Harjani

On Sat, Aug 29, 2020 at 12:51 PM Chengguang Xu <cgxu519@mykernel.net> wrote:
>
> Implement stacked mmap for shared map to keep data
> consistency.
>
> Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
> ---
>  fs/overlayfs/file.c | 120 +++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 114 insertions(+), 6 deletions(-)
>
> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> index 14ab5344a918..db5ab200d984 100644
> --- a/fs/overlayfs/file.c
> +++ b/fs/overlayfs/file.c
> @@ -21,9 +21,17 @@ struct ovl_aio_req {
>         struct fd fd;
>  };
>
> +static vm_fault_t ovl_fault(struct vm_fault *vmf);
> +static vm_fault_t ovl_page_mkwrite(struct vm_fault *vmf);
> +
> +static const struct vm_operations_struct ovl_vm_ops = {
> +       .fault          = ovl_fault,
> +       .page_mkwrite   = ovl_page_mkwrite,
> +};
> +

Interesting direction, not sure if this is workable.
I don't know enough about mm to say.

But what about the rest of the operations?
Did you go over them and decide that overlay doesn't need to implement them?
I doubt it, but if you did, please document that.

>  struct ovl_file_entry {
>         struct file *realfile;
> -       void *vm_ops;
> +       const struct vm_operations_struct *vm_ops;
>  };
>
>  struct file *ovl_get_realfile(struct file *file)
> @@ -40,14 +48,15 @@ void ovl_set_realfile(struct file *file, struct file *realfile)
>         ofe->realfile = realfile;
>  }
>
> -void *ovl_get_real_vmops(struct file *file)
> +const struct vm_operations_struct *ovl_get_real_vmops(struct file *file)
>  {
>         struct ovl_file_entry *ofe = file->private_data;
>
>         return ofe->vm_ops;
>  }
>
> -void ovl_set_real_vmops(struct file *file, void *vm_ops)
> +void ovl_set_real_vmops(struct file *file,
> +                       const struct vm_operations_struct *vm_ops)
>  {
>         struct ovl_file_entry *ofe = file->private_data;
>
> @@ -493,11 +502,104 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
>         return ret;
>  }
>
> +vm_fault_t ovl_fault(struct vm_fault *vmf)
> +{
> +       struct vm_area_struct *vma = vmf->vma;
> +       struct file *file = vma->vm_file;
> +       struct file *realfile;
> +       struct file *fpin, *tmp;
> +       struct inode *inode = file_inode(file);
> +       struct inode *realinode;
> +       const struct cred *old_cred;
> +       bool retry_allowed;
> +       vm_fault_t ret;
> +       int err = 0;
> +
> +       if (fault_flag_check(vmf, FAULT_FLAG_TRIED)) {
> +               realfile = ovl_get_realfile(file);
> +
> +               if (!ovl_has_upperdata(inode) ||
> +                   realfile->f_inode != ovl_inode_upper(inode) ||
> +                   !realfile->f_op->mmap)
> +                       return VM_FAULT_SIGBUS;
> +
> +               if (!ovl_get_real_vmops(file)) {
> +                       old_cred = ovl_override_creds(inode->i_sb);
> +                       err = call_mmap(realfile, vma);
> +                       revert_creds(old_cred);
> +
> +                       vma->vm_file = file;
> +                       if (err) {
> +                               vma->vm_ops = &ovl_vm_ops;
> +                               return VM_FAULT_SIGBUS;
> +                       }
> +                       ovl_set_real_vmops(file, vma->vm_ops);
> +                       vma->vm_ops = &ovl_vm_ops;
> +               }
> +
> +               retry_allowed = fault_flag_check(vmf, FAULT_FLAG_ALLOW_RETRY);
> +               if (retry_allowed)
> +                       vma->vm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
> +               vma->vm_file = realfile;
> +               ret = ovl_get_real_vmops(file)->fault(vmf);
> +               vma->vm_file = file;
> +               if (retry_allowed)
> +                       vma->vm_flags |= FAULT_FLAG_ALLOW_RETRY;
> +               return ret;
> +
> +       } else {
> +               fpin = maybe_unlock_mmap_for_io(vmf, NULL);
> +               if (!fpin)
> +                       return VM_FAULT_SIGBUS;
> +
> +               ret = VM_FAULT_RETRY;
> +               if (!ovl_has_upperdata(inode)) {
> +                       err = ovl_copy_up_with_data(file->f_path.dentry);
> +                       if (err)
> +                               goto out;
> +               }
> +
> +               realinode = ovl_inode_realdata(inode);
> +               realfile = ovl_open_realfile(file, realinode);
> +               if (IS_ERR(realfile))
> +                       goto out;
> +
> +               tmp = ovl_get_realfile(file);
> +               ovl_set_realfile(file, realfile);
> +               fput(tmp);
> +
> +out:
> +               fput(fpin);
> +               return ret;
> +       }
> +}


Please add some documentation to explain the method used.
Do we need to retry if real_vmops are already set?

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH 3/3] ovl: implement stacked mmap for shared map
  2020-08-30 11:33   ` Amir Goldstein
@ 2020-08-31 13:47     ` cgxu
  2020-08-31 15:51       ` Amir Goldstein
  0 siblings, 1 reply; 9+ messages in thread
From: cgxu @ 2020-08-31 13:47 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: overlayfs, Linux MM, Miklos Szeredi, Andrew Morton, Ritesh Harjani

On 8/30/20 7:33 PM, Amir Goldstein wrote:
> On Sat, Aug 29, 2020 at 12:51 PM Chengguang Xu <cgxu519@mykernel.net> wrote:
>>
>> Implement stacked mmap for shared map to keep data
>> consistency.
>>
>> Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
>> ---
>>   fs/overlayfs/file.c | 120 +++++++++++++++++++++++++++++++++++++++++---
>>   1 file changed, 114 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
>> index 14ab5344a918..db5ab200d984 100644
>> --- a/fs/overlayfs/file.c
>> +++ b/fs/overlayfs/file.c
>> @@ -21,9 +21,17 @@ struct ovl_aio_req {
>>          struct fd fd;
>>   };
>>
>> +static vm_fault_t ovl_fault(struct vm_fault *vmf);
>> +static vm_fault_t ovl_page_mkwrite(struct vm_fault *vmf);
>> +
>> +static const struct vm_operations_struct ovl_vm_ops = {
>> +       .fault          = ovl_fault,
>> +       .page_mkwrite   = ovl_page_mkwrite,
>> +};
>> +
> 
> Interesting direction, not sure if this is workable.
> I don't know enough about mm to say.
> 
> But what about the rest of the operations?
> Did you go over them and decide that overlay doesn't need to implement them?
> I doubt it, but if you did, please document that.

I did some check for rest of them, IIUC ->fault will be enough for this 
special case (shared read-only mmap with no upper), I will remove 
->page_mkwrite in v2.

# I do not consider support ->huge_fault in current stage due to many fs 
cannot support DAX properly.

BTW, do you know who should I add to CC list for further deep review of
this code? fadevel-list?



> 
>>   struct ovl_file_entry {
>>          struct file *realfile;
>> -       void *vm_ops;
>> +       const struct vm_operations_struct *vm_ops;
>>   };
>>
>>   struct file *ovl_get_realfile(struct file *file)
>> @@ -40,14 +48,15 @@ void ovl_set_realfile(struct file *file, struct file *realfile)
>>          ofe->realfile = realfile;
>>   }
>>
>> -void *ovl_get_real_vmops(struct file *file)
>> +const struct vm_operations_struct *ovl_get_real_vmops(struct file *file)
>>   {
>>          struct ovl_file_entry *ofe = file->private_data;
>>
>>          return ofe->vm_ops;
>>   }
>>
>> -void ovl_set_real_vmops(struct file *file, void *vm_ops)
>> +void ovl_set_real_vmops(struct file *file,
>> +                       const struct vm_operations_struct *vm_ops)
>>   {
>>          struct ovl_file_entry *ofe = file->private_data;
>>
>> @@ -493,11 +502,104 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
>>          return ret;
>>   }
>>
>> +vm_fault_t ovl_fault(struct vm_fault *vmf)
>> +{
>> +       struct vm_area_struct *vma = vmf->vma;
>> +       struct file *file = vma->vm_file;
>> +       struct file *realfile;
>> +       struct file *fpin, *tmp;
>> +       struct inode *inode = file_inode(file);
>> +       struct inode *realinode;
>> +       const struct cred *old_cred;
>> +       bool retry_allowed;
>> +       vm_fault_t ret;
>> +       int err = 0;
>> +
>> +       if (fault_flag_check(vmf, FAULT_FLAG_TRIED)) {
>> +               realfile = ovl_get_realfile(file);
>> +
>> +               if (!ovl_has_upperdata(inode) ||
>> +                   realfile->f_inode != ovl_inode_upper(inode) ||
>> +                   !realfile->f_op->mmap)
>> +                       return VM_FAULT_SIGBUS;
>> +
>> +               if (!ovl_get_real_vmops(file)) {
>> +                       old_cred = ovl_override_creds(inode->i_sb);
>> +                       err = call_mmap(realfile, vma);
>> +                       revert_creds(old_cred);
>> +
>> +                       vma->vm_file = file;
>> +                       if (err) {
>> +                               vma->vm_ops = &ovl_vm_ops;
>> +                               return VM_FAULT_SIGBUS;
>> +                       }
>> +                       ovl_set_real_vmops(file, vma->vm_ops);
>> +                       vma->vm_ops = &ovl_vm_ops;
>> +               }
>> +
>> +               retry_allowed = fault_flag_check(vmf, FAULT_FLAG_ALLOW_RETRY);
>> +               if (retry_allowed)
>> +                       vma->vm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
>> +               vma->vm_file = realfile;
>> +               ret = ovl_get_real_vmops(file)->fault(vmf);
>> +               vma->vm_file = file;
>> +               if (retry_allowed)
>> +                       vma->vm_flags |= FAULT_FLAG_ALLOW_RETRY;
>> +               return ret;
>> +
>> +       } else {
>> +               fpin = maybe_unlock_mmap_for_io(vmf, NULL);
>> +               if (!fpin)
>> +                       return VM_FAULT_SIGBUS;
>> +
>> +               ret = VM_FAULT_RETRY;
>> +               if (!ovl_has_upperdata(inode)) {
>> +                       err = ovl_copy_up_with_data(file->f_path.dentry);
>> +                       if (err)
>> +                               goto out;
>> +               }
>> +
>> +               realinode = ovl_inode_realdata(inode);
>> +               realfile = ovl_open_realfile(file, realinode);
>> +               if (IS_ERR(realfile))
>> +                       goto out;
>> +
>> +               tmp = ovl_get_realfile(file);
>> +               ovl_set_realfile(file, realfile);
>> +               fput(tmp);
>> +
>> +out:
>> +               fput(fpin);
>> +               return ret;
>> +       }
>> +}
> 
> 
> Please add some documentation to explain the method used.
> Do we need to retry if real_vmops are already set?
> 

Good catch, actually retry is not needed in that case.

Basically, we unlock(mmap_lock)->copy-up->open when
detecting no upper inode then retry fault operation.
However, we need to check fault retry flag carefully
for avoiding endless retry.

I'll add more explanation in v2.

---
cgxu





^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH 3/3] ovl: implement stacked mmap for shared map
  2020-08-31 13:47     ` cgxu
@ 2020-08-31 15:51       ` Amir Goldstein
  2020-09-01  7:44         ` Miklos Szeredi
  2020-09-01 13:20         ` cgxu
  0 siblings, 2 replies; 9+ messages in thread
From: Amir Goldstein @ 2020-08-31 15:51 UTC (permalink / raw)
  To: cgxu; +Cc: overlayfs, Linux MM, Miklos Szeredi, Andrew Morton, Ritesh Harjani

On Mon, Aug 31, 2020 at 4:47 PM cgxu <cgxu519@mykernel.net> wrote:
>
> On 8/30/20 7:33 PM, Amir Goldstein wrote:
> > On Sat, Aug 29, 2020 at 12:51 PM Chengguang Xu <cgxu519@mykernel.net> wrote:
> >>
> >> Implement stacked mmap for shared map to keep data
> >> consistency.
> >>
> >> Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
> >> ---
> >>   fs/overlayfs/file.c | 120 +++++++++++++++++++++++++++++++++++++++++---
> >>   1 file changed, 114 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> >> index 14ab5344a918..db5ab200d984 100644
> >> --- a/fs/overlayfs/file.c
> >> +++ b/fs/overlayfs/file.c
> >> @@ -21,9 +21,17 @@ struct ovl_aio_req {
> >>          struct fd fd;
> >>   };
> >>
> >> +static vm_fault_t ovl_fault(struct vm_fault *vmf);
> >> +static vm_fault_t ovl_page_mkwrite(struct vm_fault *vmf);
> >> +
> >> +static const struct vm_operations_struct ovl_vm_ops = {
> >> +       .fault          = ovl_fault,
> >> +       .page_mkwrite   = ovl_page_mkwrite,
> >> +};
> >> +
> >
> > Interesting direction, not sure if this is workable.
> > I don't know enough about mm to say.
> >
> > But what about the rest of the operations?
> > Did you go over them and decide that overlay doesn't need to implement them?
> > I doubt it, but if you did, please document that.
>
> I did some check for rest of them, IIUC ->fault will be enough for this
> special case (shared read-only mmap with no upper), I will remove
> ->page_mkwrite in v2.

Ok I suppose you checked that ->map_pages is not relevant?

>
> # I do not consider support ->huge_fault in current stage due to many fs
> cannot support DAX properly.
>
> BTW, do you know who should I add to CC list for further deep review of
> this code? fadevel-list?
>

fsdevel would be good, but I would wait for initial feedback from Miklos
before you post v2...

>
>
> >
> >>   struct ovl_file_entry {
> >>          struct file *realfile;
> >> -       void *vm_ops;
> >> +       const struct vm_operations_struct *vm_ops;
> >>   };
> >>
> >>   struct file *ovl_get_realfile(struct file *file)
> >> @@ -40,14 +48,15 @@ void ovl_set_realfile(struct file *file, struct file *realfile)
> >>          ofe->realfile = realfile;
> >>   }
> >>
> >> -void *ovl_get_real_vmops(struct file *file)
> >> +const struct vm_operations_struct *ovl_get_real_vmops(struct file *file)
> >>   {
> >>          struct ovl_file_entry *ofe = file->private_data;
> >>
> >>          return ofe->vm_ops;
> >>   }
> >>
> >> -void ovl_set_real_vmops(struct file *file, void *vm_ops)
> >> +void ovl_set_real_vmops(struct file *file,
> >> +                       const struct vm_operations_struct *vm_ops)
> >>   {
> >>          struct ovl_file_entry *ofe = file->private_data;
> >>
> >> @@ -493,11 +502,104 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
> >>          return ret;
> >>   }
> >>
> >> +vm_fault_t ovl_fault(struct vm_fault *vmf)
> >> +{
> >> +       struct vm_area_struct *vma = vmf->vma;
> >> +       struct file *file = vma->vm_file;
> >> +       struct file *realfile;
> >> +       struct file *fpin, *tmp;
> >> +       struct inode *inode = file_inode(file);
> >> +       struct inode *realinode;
> >> +       const struct cred *old_cred;
> >> +       bool retry_allowed;
> >> +       vm_fault_t ret;
> >> +       int err = 0;
> >> +
> >> +       if (fault_flag_check(vmf, FAULT_FLAG_TRIED)) {
> >> +               realfile = ovl_get_realfile(file);
> >> +
> >> +               if (!ovl_has_upperdata(inode) ||
> >> +                   realfile->f_inode != ovl_inode_upper(inode) ||
> >> +                   !realfile->f_op->mmap)
> >> +                       return VM_FAULT_SIGBUS;
> >> +
> >> +               if (!ovl_get_real_vmops(file)) {
> >> +                       old_cred = ovl_override_creds(inode->i_sb);
> >> +                       err = call_mmap(realfile, vma);
> >> +                       revert_creds(old_cred);
> >> +
> >> +                       vma->vm_file = file;
> >> +                       if (err) {
> >> +                               vma->vm_ops = &ovl_vm_ops;
> >> +                               return VM_FAULT_SIGBUS;
> >> +                       }
> >> +                       ovl_set_real_vmops(file, vma->vm_ops);
> >> +                       vma->vm_ops = &ovl_vm_ops;
> >> +               }
> >> +
> >> +               retry_allowed = fault_flag_check(vmf, FAULT_FLAG_ALLOW_RETRY);
> >> +               if (retry_allowed)
> >> +                       vma->vm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
> >> +               vma->vm_file = realfile;
> >> +               ret = ovl_get_real_vmops(file)->fault(vmf);
> >> +               vma->vm_file = file;
> >> +               if (retry_allowed)
> >> +                       vma->vm_flags |= FAULT_FLAG_ALLOW_RETRY;
> >> +               return ret;
> >> +
> >> +       } else {
> >> +               fpin = maybe_unlock_mmap_for_io(vmf, NULL);
> >> +               if (!fpin)
> >> +                       return VM_FAULT_SIGBUS;
> >> +
> >> +               ret = VM_FAULT_RETRY;
> >> +               if (!ovl_has_upperdata(inode)) {
> >> +                       err = ovl_copy_up_with_data(file->f_path.dentry);
> >> +                       if (err)
> >> +                               goto out;
> >> +               }
> >> +
> >> +               realinode = ovl_inode_realdata(inode);
> >> +               realfile = ovl_open_realfile(file, realinode);
> >> +               if (IS_ERR(realfile))
> >> +                       goto out;
> >> +
> >> +               tmp = ovl_get_realfile(file);
> >> +               ovl_set_realfile(file, realfile);
> >> +               fput(tmp);
> >> +
> >> +out:
> >> +               fput(fpin);
> >> +               return ret;
> >> +       }
> >> +}
> >
> >
> > Please add some documentation to explain the method used.
> > Do we need to retry if real_vmops are already set?
> >
>
> Good catch, actually retry is not needed in that case.
>
> Basically, we unlock(mmap_lock)->copy-up->open when
> detecting no upper inode then retry fault operation.
> However, we need to check fault retry flag carefully
> for avoiding endless retry.

That much I got, but the details of setting ->vm_file and vmops
look subtle, so better explain them.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH 3/3] ovl: implement stacked mmap for shared map
  2020-08-31 15:51       ` Amir Goldstein
@ 2020-09-01  7:44         ` Miklos Szeredi
  2020-09-01 13:20         ` cgxu
  1 sibling, 0 replies; 9+ messages in thread
From: Miklos Szeredi @ 2020-09-01  7:44 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: cgxu, overlayfs, Linux MM, Andrew Morton, Ritesh Harjani

On Mon, Aug 31, 2020 at 5:52 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Mon, Aug 31, 2020 at 4:47 PM cgxu <cgxu519@mykernel.net> wrote:
> >
> > On 8/30/20 7:33 PM, Amir Goldstein wrote:

> > > Interesting direction, not sure if this is workable.
> > > I don't know enough about mm to say.
> > >
> > > But what about the rest of the operations?
> > > Did you go over them and decide that overlay doesn't need to implement them?
> > > I doubt it, but if you did, please document that.
> >
> > I did some check for rest of them, IIUC ->fault will be enough for this
> > special case (shared read-only mmap with no upper), I will remove
> > ->page_mkwrite in v2.

Hmm, so this is just for that specific corner case?  Not a generic
shared mmap implementation?

>
> Ok I suppose you checked that ->map_pages is not relevant?
>
> >
> > # I do not consider support ->huge_fault in current stage due to many fs
> > cannot support DAX properly.
> >
> > BTW, do you know who should I add to CC list for further deep review of
> > this code? fadevel-list?
> >
>
> fsdevel would be good, but I would wait for initial feedback from Miklos
> before you post v2...

I could dig into the details of page fault handling, but that's not an
area that I'm intimately familiar with.    So yeah, a high level
description of what happens on mmap, page fault, write fault, etc...
would be really appreciated.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH 3/3] ovl: implement stacked mmap for shared map
  2020-08-31 15:51       ` Amir Goldstein
  2020-09-01  7:44         ` Miklos Szeredi
@ 2020-09-01 13:20         ` cgxu
  1 sibling, 0 replies; 9+ messages in thread
From: cgxu @ 2020-09-01 13:20 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: overlayfs, Linux MM, Miklos Szeredi, Andrew Morton, Ritesh Harjani

On 8/31/20 11:51 PM, Amir Goldstein wrote:
> On Mon, Aug 31, 2020 at 4:47 PM cgxu <cgxu519@mykernel.net> wrote:
>>
>> On 8/30/20 7:33 PM, Amir Goldstein wrote:
>>> On Sat, Aug 29, 2020 at 12:51 PM Chengguang Xu <cgxu519@mykernel.net> wrote:
>>>>
>>>> Implement stacked mmap for shared map to keep data
>>>> consistency.
>>>>
>>>> Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
>>>> ---
>>>>    fs/overlayfs/file.c | 120 +++++++++++++++++++++++++++++++++++++++++---
>>>>    1 file changed, 114 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
>>>> index 14ab5344a918..db5ab200d984 100644
>>>> --- a/fs/overlayfs/file.c
>>>> +++ b/fs/overlayfs/file.c
>>>> @@ -21,9 +21,17 @@ struct ovl_aio_req {
>>>>           struct fd fd;
>>>>    };
>>>>
>>>> +static vm_fault_t ovl_fault(struct vm_fault *vmf);
>>>> +static vm_fault_t ovl_page_mkwrite(struct vm_fault *vmf);
>>>> +
>>>> +static const struct vm_operations_struct ovl_vm_ops = {
>>>> +       .fault          = ovl_fault,
>>>> +       .page_mkwrite   = ovl_page_mkwrite,
>>>> +};
>>>> +
>>>
>>> Interesting direction, not sure if this is workable.
>>> I don't know enough about mm to say.
>>>
>>> But what about the rest of the operations?
>>> Did you go over them and decide that overlay doesn't need to implement them?
>>> I doubt it, but if you did, please document that.
>>
>> I did some check for rest of them, IIUC ->fault will be enough for this
>> special case (shared read-only mmap with no upper), I will remove
>> ->page_mkwrite in v2.
> 
> Ok I suppose you checked that ->map_pages is not relevant?


->map_pages() does easy maps and fallback to ->fault if the offset is 
not ready, so I think without ->map_pages() it still could work 
properly, we can also implement it for acceleration.


> 
>>
>> # I do not consider support ->huge_fault in current stage due to many fs
>> cannot support DAX properly.
>>
>> BTW, do you know who should I add to CC list for further deep review of
>> this code? fadevel-list?
>>
> 
> fsdevel would be good, but I would wait for initial feedback from Miklos
> before you post v2...
> 
>>
>>
>>>
>>>>    struct ovl_file_entry {
>>>>           struct file *realfile;
>>>> -       void *vm_ops;
>>>> +       const struct vm_operations_struct *vm_ops;
>>>>    };
>>>>
>>>>    struct file *ovl_get_realfile(struct file *file)
>>>> @@ -40,14 +48,15 @@ void ovl_set_realfile(struct file *file, struct file *realfile)
>>>>           ofe->realfile = realfile;
>>>>    }
>>>>
>>>> -void *ovl_get_real_vmops(struct file *file)
>>>> +const struct vm_operations_struct *ovl_get_real_vmops(struct file *file)
>>>>    {
>>>>           struct ovl_file_entry *ofe = file->private_data;
>>>>
>>>>           return ofe->vm_ops;
>>>>    }
>>>>
>>>> -void ovl_set_real_vmops(struct file *file, void *vm_ops)
>>>> +void ovl_set_real_vmops(struct file *file,
>>>> +                       const struct vm_operations_struct *vm_ops)
>>>>    {
>>>>           struct ovl_file_entry *ofe = file->private_data;
>>>>
>>>> @@ -493,11 +502,104 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
>>>>           return ret;
>>>>    }
>>>>
>>>> +vm_fault_t ovl_fault(struct vm_fault *vmf)
>>>> +{
>>>> +       struct vm_area_struct *vma = vmf->vma;
>>>> +       struct file *file = vma->vm_file;
>>>> +       struct file *realfile;
>>>> +       struct file *fpin, *tmp;
>>>> +       struct inode *inode = file_inode(file);
>>>> +       struct inode *realinode;
>>>> +       const struct cred *old_cred;
>>>> +       bool retry_allowed;
>>>> +       vm_fault_t ret;
>>>> +       int err = 0;
>>>> +
>>>> +       if (fault_flag_check(vmf, FAULT_FLAG_TRIED)) {
>>>> +               realfile = ovl_get_realfile(file);
>>>> +
>>>> +               if (!ovl_has_upperdata(inode) ||
>>>> +                   realfile->f_inode != ovl_inode_upper(inode) ||
>>>> +                   !realfile->f_op->mmap)
>>>> +                       return VM_FAULT_SIGBUS;
>>>> +
>>>> +               if (!ovl_get_real_vmops(file)) {
>>>> +                       old_cred = ovl_override_creds(inode->i_sb);
>>>> +                       err = call_mmap(realfile, vma);
>>>> +                       revert_creds(old_cred);
>>>> +
>>>> +                       vma->vm_file = file;
>>>> +                       if (err) {
>>>> +                               vma->vm_ops = &ovl_vm_ops;
>>>> +                               return VM_FAULT_SIGBUS;
>>>> +                       }
>>>> +                       ovl_set_real_vmops(file, vma->vm_ops);
>>>> +                       vma->vm_ops = &ovl_vm_ops;
>>>> +               }
>>>> +
>>>> +               retry_allowed = fault_flag_check(vmf, FAULT_FLAG_ALLOW_RETRY);
>>>> +               if (retry_allowed)
>>>> +                       vma->vm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
>>>> +               vma->vm_file = realfile;
>>>> +               ret = ovl_get_real_vmops(file)->fault(vmf);
>>>> +               vma->vm_file = file;
>>>> +               if (retry_allowed)
>>>> +                       vma->vm_flags |= FAULT_FLAG_ALLOW_RETRY;
>>>> +               return ret;
>>>> +
>>>> +       } else {
>>>> +               fpin = maybe_unlock_mmap_for_io(vmf, NULL);
>>>> +               if (!fpin)
>>>> +                       return VM_FAULT_SIGBUS;
>>>> +
>>>> +               ret = VM_FAULT_RETRY;
>>>> +               if (!ovl_has_upperdata(inode)) {
>>>> +                       err = ovl_copy_up_with_data(file->f_path.dentry);
>>>> +                       if (err)
>>>> +                               goto out;
>>>> +               }
>>>> +
>>>> +               realinode = ovl_inode_realdata(inode);
>>>> +               realfile = ovl_open_realfile(file, realinode);
>>>> +               if (IS_ERR(realfile))
>>>> +                       goto out;
>>>> +
>>>> +               tmp = ovl_get_realfile(file);
>>>> +               ovl_set_realfile(file, realfile);
>>>> +               fput(tmp);
>>>> +
>>>> +out:
>>>> +               fput(fpin);
>>>> +               return ret;
>>>> +       }
>>>> +}
>>>
>>>
>>> Please add some documentation to explain the method used.
>>> Do we need to retry if real_vmops are already set?
>>>
>>
>> Good catch, actually retry is not needed in that case.
>>
>> Basically, we unlock(mmap_lock)->copy-up->open when
>> detecting no upper inode then retry fault operation.
>> However, we need to check fault retry flag carefully
>> for avoiding endless retry.
> 
> That much I got, but the details of setting ->vm_file and vmops
> look subtle, so better explain them.
> 

I'll add some explanations in V2, but before that let me write some
comments based on code logic below. If there is still something not 
clear you can point out that again.



+	if (fault_flag_check(vmf, FAULT_FLAG_TRIED)) {
+		realfile = ovl_get_realfile(file);
+
+		if (!ovl_has_upperdata(inode) ||
+		    realfile->f_inode != ovl_inode_upper(inode) ||
+		    !realfile->f_op->mmap)
+			return VM_FAULT_SIGBUS;

Above condition indicates (copy-up)/(open real-file) failed or
(real-file does not support mmap), so we have to return SIGBUS.



+
+		if (!ovl_get_real_vmops(file)) {
+			old_cred = ovl_override_creds(inode->i_sb);
+			err = call_mmap(realfile, vma);
+			revert_creds(old_cred);
+
+			vma->vm_file = file;
+			if (err) {
+				vma->vm_ops = &ovl_vm_ops;
+				return VM_FAULT_SIGBUS;
+			}
+			ovl_set_real_vmops(file, vma->vm_ops);
+			vma->vm_ops = &ovl_vm_ops;


call_mmap() will rewrite vma->vm_file and vma->vm_ops to upper layer's,
so here recover to overlay's in order to jump into this ovl_fault()
in other page-faults.


+		}
+
+		retry_allowed = fault_flag_check(vmf, FAULT_FLAG_ALLOW_RETRY);
+		if (retry_allowed)
+			vma->vm_flags &= ~FAULT_FLAG_ALLOW_RETRY;

here, we disallow retry in real ->fault because retry will unlock 
mmap_lock, touching vma after unlock is not safe behavior.


+		vma->vm_file = realfile;
+		ret = ovl_get_real_vmops(file)->fault(vmf);


calling real fault handler.


+		vma->vm_file = file;
+		if (retry_allowed)
+			vma->vm_flags |= FAULT_FLAG_ALLOW_RETRY;


recover vm_file and vm_ops to overlay's so that we can jump into
ovl_fault in other page-faults.


+		return ret;
+
+	} else {



Thanks,
cgxu




^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-09-01 13:22 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-29  9:50 [RFC PATCH 0/3] ovl: stacked mmap for shared map Chengguang Xu
2020-08-29  9:50 ` [RFC PATCH 1/3] mm: mmap: export necessary functions for overlayfs' mmap Chengguang Xu
2020-08-29  9:51 ` [RFC PATCH 2/3] ovl: introduce struct ovl_file_entry Chengguang Xu
2020-08-29  9:51 ` [RFC PATCH 3/3] ovl: implement stacked mmap for shared map Chengguang Xu
2020-08-30 11:33   ` Amir Goldstein
2020-08-31 13:47     ` cgxu
2020-08-31 15:51       ` Amir Goldstein
2020-09-01  7:44         ` Miklos Szeredi
2020-09-01 13:20         ` cgxu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).