All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 0/3] ovl: stacked mmap for shared map
@ 2020-08-29  9:50 Chengguang Xu
  2020-08-29  9:50 ` [RFC PATCH 1/3] mm: mmap: export necessary functions for overlayfs' mmap Chengguang Xu
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Chengguang Xu @ 2020-08-29  9:50 UTC (permalink / raw)
  To: linux-unionfs, linux-mm; +Cc: miklos, akpm, amir73il, riteshh, Chengguang Xu

Currently, there is still ro/rw inconsistency related to shared mmap
in overlayfs, this patch set implements stacked mmap for shared map
and transfer necessary operations to upper inode, so that we can keep
data consistency in any kind of mmap. 

Patch 1 exports necessary functions from kernel to module.
Patch 2 introduces struct ovl_file_entry to store real vm_ops.
Patch 3 implements stacked mmap for shared map to keep data
consistency.

Chengguang Xu (3):
  mm: mmap: export necessary functions for overlayfs' mmap
  ovl: introduce struct ovl_file_entry
  ovl: implement stacked mmap for shared map

 fs/overlayfs/file.c | 178 ++++++++++++++++++++++++++++++++++++++++----
 include/linux/mm.h  |   2 +
 mm/filemap.c        |  28 +++++++
 mm/internal.h       |  22 ------
 4 files changed, 195 insertions(+), 35 deletions(-)

-- 
2.20.1



^ permalink raw reply	[flat|nested] 15+ messages in thread

* [RFC PATCH 1/3] mm: mmap: export necessary functions for overlayfs' mmap
  2020-08-29  9:50 [RFC PATCH 0/3] ovl: stacked mmap for shared map Chengguang Xu
@ 2020-08-29  9:50 ` Chengguang Xu
  2020-08-29  9:51 ` [RFC PATCH 2/3] ovl: introduce struct ovl_file_entry Chengguang Xu
  2020-08-29  9:51 ` [RFC PATCH 3/3] ovl: implement stacked mmap for shared map Chengguang Xu
  2 siblings, 0 replies; 15+ messages in thread
From: Chengguang Xu @ 2020-08-29  9:50 UTC (permalink / raw)
  To: linux-unionfs, linux-mm; +Cc: miklos, akpm, amir73il, riteshh, Chengguang Xu

In shared mode mmap, if overlayfs' inode does not have data
in upper layer, it should call maybe_unlock_mmap_for_io()
to release lock and waiting for IO in ->fault handler.
Meanwhile, in order to avoid endless retry we should also
check flag FAULT_FLAG_TRIED carefully in ->fault handler.

Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
---
 include/linux/mm.h |  2 ++
 mm/filemap.c       | 28 ++++++++++++++++++++++++++++
 mm/internal.h      | 22 ----------------------
 3 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index dc7b87310c10..214b23734eed 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1656,6 +1656,8 @@ void unmap_mapping_pages(struct address_space *mapping,
 		pgoff_t start, pgoff_t nr, bool even_cows);
 void unmap_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen, int even_cows);
+struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, struct file *fpin);
+bool fault_flag_check(struct vm_fault *vmf, unsigned int flag);
 #else
 static inline vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
 		unsigned long address, unsigned int flags)
diff --git a/mm/filemap.c b/mm/filemap.c
index f0ae9a6308cb..8a226f8ca262 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2704,6 +2704,34 @@ int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
 		return -EINVAL;
 	return generic_file_mmap(file, vma);
 }
+
+struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
+				      struct file *fpin)
+{
+	int flags = vmf->flags;
+
+	if (fpin)
+		return fpin;
+
+	/*
+	 * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or
+	 * anything, so we only pin the file and drop the mmap_lock if only
+	 * FAULT_FLAG_ALLOW_RETRY is set, while this is the first attempt.
+	 */
+	if (fault_flag_allow_retry_first(flags) &&
+	    !(flags & FAULT_FLAG_RETRY_NOWAIT)) {
+		fpin = get_file(vmf->vma->vm_file);
+		mmap_read_unlock(vmf->vma->vm_mm);
+	}
+	return fpin;
+}
+EXPORT_SYMBOL(maybe_unlock_mmap_for_io);
+
+bool fault_flag_check(struct vm_fault *vmf, unsigned int flag)
+{
+	return vmf->flags & flag;
+}
+EXPORT_SYMBOL(fault_flag_check);
 #else
 vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
 {
diff --git a/mm/internal.h b/mm/internal.h
index 9886db20d94f..ef19235c6bf1 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -402,28 +402,6 @@ vma_address(struct page *page, struct vm_area_struct *vma)
 
 	return max(start, vma->vm_start);
 }
-
-static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
-						    struct file *fpin)
-{
-	int flags = vmf->flags;
-
-	if (fpin)
-		return fpin;
-
-	/*
-	 * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or
-	 * anything, so we only pin the file and drop the mmap_lock if only
-	 * FAULT_FLAG_ALLOW_RETRY is set, while this is the first attempt.
-	 */
-	if (fault_flag_allow_retry_first(flags) &&
-	    !(flags & FAULT_FLAG_RETRY_NOWAIT)) {
-		fpin = get_file(vmf->vma->vm_file);
-		mmap_read_unlock(vmf->vma->vm_mm);
-	}
-	return fpin;
-}
-
 #else /* !CONFIG_MMU */
 static inline void clear_page_mlock(struct page *page) { }
 static inline void mlock_vma_page(struct page *page) { }
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC PATCH 2/3] ovl: introduce struct ovl_file_entry
  2020-08-29  9:50 [RFC PATCH 0/3] ovl: stacked mmap for shared map Chengguang Xu
  2020-08-29  9:50 ` [RFC PATCH 1/3] mm: mmap: export necessary functions for overlayfs' mmap Chengguang Xu
@ 2020-08-29  9:51 ` Chengguang Xu
  2020-08-29 12:16   ` kernel test robot
  2020-08-29 12:16   ` [RFC PATCH] ovl: ovl_get_realfile() can be static kernel test robot
  2020-08-29  9:51 ` [RFC PATCH 3/3] ovl: implement stacked mmap for shared map Chengguang Xu
  2 siblings, 2 replies; 15+ messages in thread
From: Chengguang Xu @ 2020-08-29  9:51 UTC (permalink / raw)
  To: linux-unionfs, linux-mm; +Cc: miklos, akpm, amir73il, riteshh, Chengguang Xu

Introduce new struct ovl_file_entry to store real file
and real vm_ops handler.

Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
---
 fs/overlayfs/file.c | 64 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 54 insertions(+), 10 deletions(-)

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 0d940e29d62b..14ab5344a918 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -21,6 +21,39 @@ struct ovl_aio_req {
 	struct fd fd;
 };
 
+struct ovl_file_entry {
+	struct file *realfile;
+	void *vm_ops;
+};
+
+struct file *ovl_get_realfile(struct file *file)
+{
+	struct ovl_file_entry *ofe = file->private_data;
+
+	return ofe->realfile;
+}
+
+void ovl_set_realfile(struct file *file, struct file *realfile)
+{
+	struct ovl_file_entry *ofe = file->private_data;
+
+	ofe->realfile = realfile;
+}
+
+void *ovl_get_real_vmops(struct file *file)
+{
+	struct ovl_file_entry *ofe = file->private_data;
+
+	return ofe->vm_ops;
+}
+
+void ovl_set_real_vmops(struct file *file, void *vm_ops)
+{
+	struct ovl_file_entry *ofe = file->private_data;
+
+	ofe->vm_ops = vm_ops;
+}
+
 static struct kmem_cache *ovl_aio_request_cachep;
 
 static char ovl_whatisit(struct inode *inode, struct inode *realinode)
@@ -105,14 +138,14 @@ static int ovl_change_flags(struct file *file, unsigned int flags)
 	return 0;
 }
 
-static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
+static int ovl_real_fdget_meta(struct file *file, struct fd *real,
 			       bool allow_meta)
 {
 	struct inode *inode = file_inode(file);
 	struct inode *realinode;
 
 	real->flags = 0;
-	real->file = file->private_data;
+	real->file = ovl_get_realfile(file);
 
 	if (allow_meta)
 		realinode = ovl_inode_real(inode);
@@ -134,7 +167,7 @@ static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
 	return 0;
 }
 
-static int ovl_real_fdget(const struct file *file, struct fd *real)
+static int ovl_real_fdget(struct file *file, struct fd *real)
 {
 	return ovl_real_fdget_meta(file, real, false);
 }
@@ -144,25 +177,36 @@ static int ovl_open(struct inode *inode, struct file *file)
 	struct file *realfile;
 	int err;
 
+	file->private_data = kzalloc(sizeof(struct ovl_file_entry), GFP_KERNEL);
+	if (!file->private_data)
+		return -ENOMEM;
+
 	err = ovl_maybe_copy_up(file_dentry(file), file->f_flags);
 	if (err)
-		return err;
+		goto out;
 
 	/* No longer need these flags, so don't pass them on to underlying fs */
 	file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
 	realfile = ovl_open_realfile(file, ovl_inode_realdata(inode));
-	if (IS_ERR(realfile))
-		return PTR_ERR(realfile);
-
-	file->private_data = realfile;
+	if (IS_ERR(realfile)) {
+		err = PTR_ERR(realfile);
+		goto out;
+	}
 
+	ovl_set_realfile(file, realfile);
 	return 0;
+out:
+	kfree(file->private_data);
+	file->private_data = NULL;
+	return err;
 }
 
 static int ovl_release(struct inode *inode, struct file *file)
 {
-	fput(file->private_data);
+	fput(ovl_get_realfile(file));
+	kfree(file->private_data);
+	file->private_data = NULL;
 
 	return 0;
 }
@@ -451,7 +495,7 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 
 static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	struct file *realfile = file->private_data;
+	struct file *realfile = ovl_get_realfile(file);
 	const struct cred *old_cred;
 	int ret;
 
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC PATCH 3/3] ovl: implement stacked mmap for shared map
  2020-08-29  9:50 [RFC PATCH 0/3] ovl: stacked mmap for shared map Chengguang Xu
  2020-08-29  9:50 ` [RFC PATCH 1/3] mm: mmap: export necessary functions for overlayfs' mmap Chengguang Xu
  2020-08-29  9:51 ` [RFC PATCH 2/3] ovl: introduce struct ovl_file_entry Chengguang Xu
@ 2020-08-29  9:51 ` Chengguang Xu
  2020-08-29 11:35   ` kernel test robot
  2020-08-30 11:33     ` Amir Goldstein
  2 siblings, 2 replies; 15+ messages in thread
From: Chengguang Xu @ 2020-08-29  9:51 UTC (permalink / raw)
  To: linux-unionfs, linux-mm; +Cc: miklos, akpm, amir73il, riteshh, Chengguang Xu

Implement stacked mmap for shared map to keep data
consistency.

Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
---
 fs/overlayfs/file.c | 120 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 114 insertions(+), 6 deletions(-)

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 14ab5344a918..db5ab200d984 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -21,9 +21,17 @@ struct ovl_aio_req {
 	struct fd fd;
 };
 
+static vm_fault_t ovl_fault(struct vm_fault *vmf);
+static vm_fault_t ovl_page_mkwrite(struct vm_fault *vmf);
+
+static const struct vm_operations_struct ovl_vm_ops = {
+	.fault		= ovl_fault,
+	.page_mkwrite	= ovl_page_mkwrite,
+};
+
 struct ovl_file_entry {
 	struct file *realfile;
-	void *vm_ops;
+	const struct vm_operations_struct *vm_ops;
 };
 
 struct file *ovl_get_realfile(struct file *file)
@@ -40,14 +48,15 @@ void ovl_set_realfile(struct file *file, struct file *realfile)
 	ofe->realfile = realfile;
 }
 
-void *ovl_get_real_vmops(struct file *file)
+const struct vm_operations_struct *ovl_get_real_vmops(struct file *file)
 {
 	struct ovl_file_entry *ofe = file->private_data;
 
 	return ofe->vm_ops;
 }
 
-void ovl_set_real_vmops(struct file *file, void *vm_ops)
+void ovl_set_real_vmops(struct file *file,
+			const struct vm_operations_struct *vm_ops)
 {
 	struct ovl_file_entry *ofe = file->private_data;
 
@@ -493,11 +502,104 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	return ret;
 }
 
+vm_fault_t ovl_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct file *file = vma->vm_file;
+	struct file *realfile;
+	struct file *fpin, *tmp;
+	struct inode *inode = file_inode(file);
+	struct inode *realinode;
+	const struct cred *old_cred;
+	bool retry_allowed;
+	vm_fault_t ret;
+	int err = 0;
+
+	if (fault_flag_check(vmf, FAULT_FLAG_TRIED)) {
+		realfile = ovl_get_realfile(file);
+
+		if (!ovl_has_upperdata(inode) ||
+		    realfile->f_inode != ovl_inode_upper(inode) ||
+		    !realfile->f_op->mmap)
+			return VM_FAULT_SIGBUS;
+
+		if (!ovl_get_real_vmops(file)) {
+			old_cred = ovl_override_creds(inode->i_sb);
+			err = call_mmap(realfile, vma);
+			revert_creds(old_cred);
+
+			vma->vm_file = file;
+			if (err) {
+				vma->vm_ops = &ovl_vm_ops;
+				return VM_FAULT_SIGBUS;
+			}
+			ovl_set_real_vmops(file, vma->vm_ops);
+			vma->vm_ops = &ovl_vm_ops;
+		}
+
+		retry_allowed = fault_flag_check(vmf, FAULT_FLAG_ALLOW_RETRY);
+		if (retry_allowed)
+			vma->vm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
+		vma->vm_file = realfile;
+		ret = ovl_get_real_vmops(file)->fault(vmf);
+		vma->vm_file = file;
+		if (retry_allowed)
+			vma->vm_flags |= FAULT_FLAG_ALLOW_RETRY;
+		return ret;
+
+	} else {
+		fpin = maybe_unlock_mmap_for_io(vmf, NULL);
+		if (!fpin)
+			return VM_FAULT_SIGBUS;
+
+		ret = VM_FAULT_RETRY;
+		if (!ovl_has_upperdata(inode)) {
+			err = ovl_copy_up_with_data(file->f_path.dentry);
+			if (err)
+				goto out;
+		}
+
+		realinode = ovl_inode_realdata(inode);
+		realfile = ovl_open_realfile(file, realinode);
+		if (IS_ERR(realfile))
+			goto out;
+
+		tmp = ovl_get_realfile(file);
+		ovl_set_realfile(file, realfile);
+		fput(tmp);
+
+out:
+		fput(fpin);
+		return ret;
+	}
+}
+
+static vm_fault_t ovl_page_mkwrite(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct file *file = vma->vm_file;
+	struct file *realfile;
+	struct inode *inode = file_inode(file);
+	vm_fault_t ret;
+
+	realfile = ovl_get_realfile(file);
+
+	sb_start_pagefault(inode->i_sb);
+	file_update_time(file);
+
+	vma->vm_file = realfile;
+	ret = ovl_get_real_vmops(file)->page_mkwrite(vmf);
+	vma->vm_file = file;
+
+	sb_end_pagefault(inode->i_sb);
+	return ret;
+}
+
 static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct file *realfile = ovl_get_realfile(file);
 	const struct cred *old_cred;
-	int ret;
+	int ret = 0;
 
 	if (!realfile->f_op->mmap)
 		return -ENODEV;
@@ -505,6 +607,13 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
 	if (WARN_ON(file != vma->vm_file))
 		return -EIO;
 
+	if (!ovl_has_upperdata(file_inode(file)) &&
+	    (vma->vm_flags & (VM_SHARED|VM_MAYSHARE))) {
+		vma->vm_ops = &ovl_vm_ops;
+		ovl_file_accessed(file);
+		return 0;
+	}
+
 	vma->vm_file = get_file(realfile);
 
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
@@ -517,10 +626,9 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
 	} else {
 		/* Drop reference count from previous vm_file value */
 		fput(file);
+		ovl_file_accessed(file);
 	}
 
-	ovl_file_accessed(file);
-
 	return ret;
 }
 
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH 3/3] ovl: implement stacked mmap for shared map
  2020-08-29  9:51 ` [RFC PATCH 3/3] ovl: implement stacked mmap for shared map Chengguang Xu
@ 2020-08-29 11:35   ` kernel test robot
  2020-08-30 11:33     ` Amir Goldstein
  1 sibling, 0 replies; 15+ messages in thread
From: kernel test robot @ 2020-08-29 11:35 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 5572 bytes --]

Hi Chengguang,

[FYI, it's a private test report for your RFC patch.]
[auto build test ERROR on miklos-vfs/overlayfs-next]
[also build test ERROR on v5.9-rc2 next-20200828]
[cannot apply to hnaz-linux-mm/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Chengguang-Xu/ovl-stacked-mmap-for-shared-map/20200829-175321
base:   https://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git overlayfs-next
config: sh-allmodconfig (attached as .config)
compiler: sh4-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=sh 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All error/warnings (new ones prefixed by >>):

   fs/overlayfs/file.c:37:14: warning: no previous prototype for 'ovl_get_realfile' [-Wmissing-prototypes]
      37 | struct file *ovl_get_realfile(struct file *file)
         |              ^~~~~~~~~~~~~~~~
   fs/overlayfs/file.c:44:6: warning: no previous prototype for 'ovl_set_realfile' [-Wmissing-prototypes]
      44 | void ovl_set_realfile(struct file *file, struct file *realfile)
         |      ^~~~~~~~~~~~~~~~
   fs/overlayfs/file.c:51:36: warning: no previous prototype for 'ovl_get_real_vmops' [-Wmissing-prototypes]
      51 | const struct vm_operations_struct *ovl_get_real_vmops(struct file *file)
         |                                    ^~~~~~~~~~~~~~~~~~
   fs/overlayfs/file.c:58:6: warning: no previous prototype for 'ovl_set_real_vmops' [-Wmissing-prototypes]
      58 | void ovl_set_real_vmops(struct file *file,
         |      ^~~~~~~~~~~~~~~~~~
   fs/overlayfs/file.c: In function 'ovl_fault':
>> fs/overlayfs/file.c:518:6: error: implicit declaration of function 'fault_flag_check' [-Werror=implicit-function-declaration]
     518 |  if (fault_flag_check(vmf, FAULT_FLAG_TRIED)) {
         |      ^~~~~~~~~~~~~~~~
>> fs/overlayfs/file.c:551:10: error: implicit declaration of function 'maybe_unlock_mmap_for_io' [-Werror=implicit-function-declaration]
     551 |   fpin = maybe_unlock_mmap_for_io(vmf, NULL);
         |          ^~~~~~~~~~~~~~~~~~~~~~~~
>> fs/overlayfs/file.c:551:8: warning: assignment to 'struct file *' from 'int' makes pointer from integer without a cast [-Wint-conversion]
     551 |   fpin = maybe_unlock_mmap_for_io(vmf, NULL);
         |        ^
   cc1: some warnings being treated as errors

# https://github.com/0day-ci/linux/commit/a33b1899ccf797aa55f56333a2700cfbc67487cc
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Chengguang-Xu/ovl-stacked-mmap-for-shared-map/20200829-175321
git checkout a33b1899ccf797aa55f56333a2700cfbc67487cc
vim +/fault_flag_check +518 fs/overlayfs/file.c

   504	
   505	vm_fault_t ovl_fault(struct vm_fault *vmf)
   506	{
   507		struct vm_area_struct *vma = vmf->vma;
   508		struct file *file = vma->vm_file;
   509		struct file *realfile;
   510		struct file *fpin, *tmp;
   511		struct inode *inode = file_inode(file);
   512		struct inode *realinode;
   513		const struct cred *old_cred;
   514		bool retry_allowed;
   515		vm_fault_t ret;
   516		int err = 0;
   517	
 > 518		if (fault_flag_check(vmf, FAULT_FLAG_TRIED)) {
   519			realfile = ovl_get_realfile(file);
   520	
   521			if (!ovl_has_upperdata(inode) ||
   522			    realfile->f_inode != ovl_inode_upper(inode) ||
   523			    !realfile->f_op->mmap)
   524				return VM_FAULT_SIGBUS;
   525	
   526			if (!ovl_get_real_vmops(file)) {
   527				old_cred = ovl_override_creds(inode->i_sb);
   528				err = call_mmap(realfile, vma);
   529				revert_creds(old_cred);
   530	
   531				vma->vm_file = file;
   532				if (err) {
   533					vma->vm_ops = &ovl_vm_ops;
   534					return VM_FAULT_SIGBUS;
   535				}
   536				ovl_set_real_vmops(file, vma->vm_ops);
   537				vma->vm_ops = &ovl_vm_ops;
   538			}
   539	
   540			retry_allowed = fault_flag_check(vmf, FAULT_FLAG_ALLOW_RETRY);
   541			if (retry_allowed)
   542				vma->vm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
   543			vma->vm_file = realfile;
   544			ret = ovl_get_real_vmops(file)->fault(vmf);
   545			vma->vm_file = file;
   546			if (retry_allowed)
   547				vma->vm_flags |= FAULT_FLAG_ALLOW_RETRY;
   548			return ret;
   549	
   550		} else {
 > 551			fpin = maybe_unlock_mmap_for_io(vmf, NULL);
   552			if (!fpin)
   553				return VM_FAULT_SIGBUS;
   554	
   555			ret = VM_FAULT_RETRY;
   556			if (!ovl_has_upperdata(inode)) {
   557				err = ovl_copy_up_with_data(file->f_path.dentry);
   558				if (err)
   559					goto out;
   560			}
   561	
   562			realinode = ovl_inode_realdata(inode);
   563			realfile = ovl_open_realfile(file, realinode);
   564			if (IS_ERR(realfile))
   565				goto out;
   566	
   567			tmp = ovl_get_realfile(file);
   568			ovl_set_realfile(file, realfile);
   569			fput(tmp);
   570	
   571	out:
   572			fput(fpin);
   573			return ret;
   574		}
   575	}
   576	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 55735 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH 2/3] ovl: introduce struct ovl_file_entry
  2020-08-29  9:51 ` [RFC PATCH 2/3] ovl: introduce struct ovl_file_entry Chengguang Xu
@ 2020-08-29 12:16   ` kernel test robot
  2020-08-29 12:16   ` [RFC PATCH] ovl: ovl_get_realfile() can be static kernel test robot
  1 sibling, 0 replies; 15+ messages in thread
From: kernel test robot @ 2020-08-29 12:16 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 1997 bytes --]

Hi Chengguang,

[FYI, it's a private test report for your RFC patch.]
[auto build test WARNING on miklos-vfs/overlayfs-next]
[also build test WARNING on v5.9-rc2 next-20200828]
[cannot apply to hnaz-linux-mm/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Chengguang-Xu/ovl-stacked-mmap-for-shared-map/20200829-175321
base:   https://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git overlayfs-next
config: i386-randconfig-s002-20200829 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.2-191-g10164920-dirty
        # save the attached .config to linux build tree
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)

>> fs/overlayfs/file.c:29:13: sparse: sparse: symbol 'ovl_get_realfile' was not declared. Should it be static?
>> fs/overlayfs/file.c:36:6: sparse: sparse: symbol 'ovl_set_realfile' was not declared. Should it be static?
>> fs/overlayfs/file.c:43:6: sparse: sparse: symbol 'ovl_get_real_vmops' was not declared. Should it be static?
>> fs/overlayfs/file.c:50:6: sparse: sparse: symbol 'ovl_set_real_vmops' was not declared. Should it be static?
   fs/overlayfs/file.c:78:37: sparse: sparse: restricted fmode_t degrades to integer
   fs/overlayfs/file.c:111:18: sparse: sparse: restricted fmode_t degrades to integer
   fs/overlayfs/file.c:164:13: sparse: sparse: restricted fmode_t degrades to integer

Please review and possibly fold the followup patch.

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 33531 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [RFC PATCH] ovl: ovl_get_realfile() can be static
  2020-08-29  9:51 ` [RFC PATCH 2/3] ovl: introduce struct ovl_file_entry Chengguang Xu
  2020-08-29 12:16   ` kernel test robot
@ 2020-08-29 12:16   ` kernel test robot
  1 sibling, 0 replies; 15+ messages in thread
From: kernel test robot @ 2020-08-29 12:16 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 1163 bytes --]


Signed-off-by: kernel test robot <lkp@intel.com>
---
 file.c |    8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 14ab5344a91833..71ef43a140a55f 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -26,28 +26,28 @@ struct ovl_file_entry {
 	void *vm_ops;
 };
 
-struct file *ovl_get_realfile(struct file *file)
+static struct file *ovl_get_realfile(struct file *file)
 {
 	struct ovl_file_entry *ofe = file->private_data;
 
 	return ofe->realfile;
 }
 
-void ovl_set_realfile(struct file *file, struct file *realfile)
+static void ovl_set_realfile(struct file *file, struct file *realfile)
 {
 	struct ovl_file_entry *ofe = file->private_data;
 
 	ofe->realfile = realfile;
 }
 
-void *ovl_get_real_vmops(struct file *file)
+static void *ovl_get_real_vmops(struct file *file)
 {
 	struct ovl_file_entry *ofe = file->private_data;
 
 	return ofe->vm_ops;
 }
 
-void ovl_set_real_vmops(struct file *file, void *vm_ops)
+static void ovl_set_real_vmops(struct file *file, void *vm_ops)
 {
 	struct ovl_file_entry *ofe = file->private_data;
 

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH 3/3] ovl: implement stacked mmap for shared map
  2020-08-29  9:51 ` [RFC PATCH 3/3] ovl: implement stacked mmap for shared map Chengguang Xu
@ 2020-08-30 11:33     ` Amir Goldstein
  2020-08-30 11:33     ` Amir Goldstein
  1 sibling, 0 replies; 15+ messages in thread
From: Amir Goldstein @ 2020-08-30 11:33 UTC (permalink / raw)
  To: Chengguang Xu
  Cc: overlayfs, Linux MM, Miklos Szeredi, Andrew Morton, Ritesh Harjani

On Sat, Aug 29, 2020 at 12:51 PM Chengguang Xu <cgxu519@mykernel.net> wrote:
>
> Implement stacked mmap for shared map to keep data
> consistency.
>
> Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
> ---
>  fs/overlayfs/file.c | 120 +++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 114 insertions(+), 6 deletions(-)
>
> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> index 14ab5344a918..db5ab200d984 100644
> --- a/fs/overlayfs/file.c
> +++ b/fs/overlayfs/file.c
> @@ -21,9 +21,17 @@ struct ovl_aio_req {
>         struct fd fd;
>  };
>
> +static vm_fault_t ovl_fault(struct vm_fault *vmf);
> +static vm_fault_t ovl_page_mkwrite(struct vm_fault *vmf);
> +
> +static const struct vm_operations_struct ovl_vm_ops = {
> +       .fault          = ovl_fault,
> +       .page_mkwrite   = ovl_page_mkwrite,
> +};
> +

Interesting direction, not sure if this is workable.
I don't know enough about mm to say.

But what about the rest of the operations?
Did you go over them and decide that overlay doesn't need to implement them?
I doubt it, but if you did, please document that.

>  struct ovl_file_entry {
>         struct file *realfile;
> -       void *vm_ops;
> +       const struct vm_operations_struct *vm_ops;
>  };
>
>  struct file *ovl_get_realfile(struct file *file)
> @@ -40,14 +48,15 @@ void ovl_set_realfile(struct file *file, struct file *realfile)
>         ofe->realfile = realfile;
>  }
>
> -void *ovl_get_real_vmops(struct file *file)
> +const struct vm_operations_struct *ovl_get_real_vmops(struct file *file)
>  {
>         struct ovl_file_entry *ofe = file->private_data;
>
>         return ofe->vm_ops;
>  }
>
> -void ovl_set_real_vmops(struct file *file, void *vm_ops)
> +void ovl_set_real_vmops(struct file *file,
> +                       const struct vm_operations_struct *vm_ops)
>  {
>         struct ovl_file_entry *ofe = file->private_data;
>
> @@ -493,11 +502,104 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
>         return ret;
>  }
>
> +vm_fault_t ovl_fault(struct vm_fault *vmf)
> +{
> +       struct vm_area_struct *vma = vmf->vma;
> +       struct file *file = vma->vm_file;
> +       struct file *realfile;
> +       struct file *fpin, *tmp;
> +       struct inode *inode = file_inode(file);
> +       struct inode *realinode;
> +       const struct cred *old_cred;
> +       bool retry_allowed;
> +       vm_fault_t ret;
> +       int err = 0;
> +
> +       if (fault_flag_check(vmf, FAULT_FLAG_TRIED)) {
> +               realfile = ovl_get_realfile(file);
> +
> +               if (!ovl_has_upperdata(inode) ||
> +                   realfile->f_inode != ovl_inode_upper(inode) ||
> +                   !realfile->f_op->mmap)
> +                       return VM_FAULT_SIGBUS;
> +
> +               if (!ovl_get_real_vmops(file)) {
> +                       old_cred = ovl_override_creds(inode->i_sb);
> +                       err = call_mmap(realfile, vma);
> +                       revert_creds(old_cred);
> +
> +                       vma->vm_file = file;
> +                       if (err) {
> +                               vma->vm_ops = &ovl_vm_ops;
> +                               return VM_FAULT_SIGBUS;
> +                       }
> +                       ovl_set_real_vmops(file, vma->vm_ops);
> +                       vma->vm_ops = &ovl_vm_ops;
> +               }
> +
> +               retry_allowed = fault_flag_check(vmf, FAULT_FLAG_ALLOW_RETRY);
> +               if (retry_allowed)
> +                       vma->vm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
> +               vma->vm_file = realfile;
> +               ret = ovl_get_real_vmops(file)->fault(vmf);
> +               vma->vm_file = file;
> +               if (retry_allowed)
> +                       vma->vm_flags |= FAULT_FLAG_ALLOW_RETRY;
> +               return ret;
> +
> +       } else {
> +               fpin = maybe_unlock_mmap_for_io(vmf, NULL);
> +               if (!fpin)
> +                       return VM_FAULT_SIGBUS;
> +
> +               ret = VM_FAULT_RETRY;
> +               if (!ovl_has_upperdata(inode)) {
> +                       err = ovl_copy_up_with_data(file->f_path.dentry);
> +                       if (err)
> +                               goto out;
> +               }
> +
> +               realinode = ovl_inode_realdata(inode);
> +               realfile = ovl_open_realfile(file, realinode);
> +               if (IS_ERR(realfile))
> +                       goto out;
> +
> +               tmp = ovl_get_realfile(file);
> +               ovl_set_realfile(file, realfile);
> +               fput(tmp);
> +
> +out:
> +               fput(fpin);
> +               return ret;
> +       }
> +}


Please add some documentation to explain the method used.
Do we need to retry if real_vmops are already set?

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH 3/3] ovl: implement stacked mmap for shared map
@ 2020-08-30 11:33     ` Amir Goldstein
  0 siblings, 0 replies; 15+ messages in thread
From: Amir Goldstein @ 2020-08-30 11:33 UTC (permalink / raw)
  To: Chengguang Xu
  Cc: overlayfs, Linux MM, Miklos Szeredi, Andrew Morton, Ritesh Harjani

On Sat, Aug 29, 2020 at 12:51 PM Chengguang Xu <cgxu519@mykernel.net> wrote:
>
> Implement stacked mmap for shared map to keep data
> consistency.
>
> Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
> ---
>  fs/overlayfs/file.c | 120 +++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 114 insertions(+), 6 deletions(-)
>
> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> index 14ab5344a918..db5ab200d984 100644
> --- a/fs/overlayfs/file.c
> +++ b/fs/overlayfs/file.c
> @@ -21,9 +21,17 @@ struct ovl_aio_req {
>         struct fd fd;
>  };
>
> +static vm_fault_t ovl_fault(struct vm_fault *vmf);
> +static vm_fault_t ovl_page_mkwrite(struct vm_fault *vmf);
> +
> +static const struct vm_operations_struct ovl_vm_ops = {
> +       .fault          = ovl_fault,
> +       .page_mkwrite   = ovl_page_mkwrite,
> +};
> +

Interesting direction, not sure if this is workable.
I don't know enough about mm to say.

But what about the rest of the operations?
Did you go over them and decide that overlay doesn't need to implement them?
I doubt it, but if you did, please document that.

>  struct ovl_file_entry {
>         struct file *realfile;
> -       void *vm_ops;
> +       const struct vm_operations_struct *vm_ops;
>  };
>
>  struct file *ovl_get_realfile(struct file *file)
> @@ -40,14 +48,15 @@ void ovl_set_realfile(struct file *file, struct file *realfile)
>         ofe->realfile = realfile;
>  }
>
> -void *ovl_get_real_vmops(struct file *file)
> +const struct vm_operations_struct *ovl_get_real_vmops(struct file *file)
>  {
>         struct ovl_file_entry *ofe = file->private_data;
>
>         return ofe->vm_ops;
>  }
>
> -void ovl_set_real_vmops(struct file *file, void *vm_ops)
> +void ovl_set_real_vmops(struct file *file,
> +                       const struct vm_operations_struct *vm_ops)
>  {
>         struct ovl_file_entry *ofe = file->private_data;
>
> @@ -493,11 +502,104 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
>         return ret;
>  }
>
> +vm_fault_t ovl_fault(struct vm_fault *vmf)
> +{
> +       struct vm_area_struct *vma = vmf->vma;
> +       struct file *file = vma->vm_file;
> +       struct file *realfile;
> +       struct file *fpin, *tmp;
> +       struct inode *inode = file_inode(file);
> +       struct inode *realinode;
> +       const struct cred *old_cred;
> +       bool retry_allowed;
> +       vm_fault_t ret;
> +       int err = 0;
> +
> +       if (fault_flag_check(vmf, FAULT_FLAG_TRIED)) {
> +               realfile = ovl_get_realfile(file);
> +
> +               if (!ovl_has_upperdata(inode) ||
> +                   realfile->f_inode != ovl_inode_upper(inode) ||
> +                   !realfile->f_op->mmap)
> +                       return VM_FAULT_SIGBUS;
> +
> +               if (!ovl_get_real_vmops(file)) {
> +                       old_cred = ovl_override_creds(inode->i_sb);
> +                       err = call_mmap(realfile, vma);
> +                       revert_creds(old_cred);
> +
> +                       vma->vm_file = file;
> +                       if (err) {
> +                               vma->vm_ops = &ovl_vm_ops;
> +                               return VM_FAULT_SIGBUS;
> +                       }
> +                       ovl_set_real_vmops(file, vma->vm_ops);
> +                       vma->vm_ops = &ovl_vm_ops;
> +               }
> +
> +               retry_allowed = fault_flag_check(vmf, FAULT_FLAG_ALLOW_RETRY);
> +               if (retry_allowed)
> +                       vma->vm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
> +               vma->vm_file = realfile;
> +               ret = ovl_get_real_vmops(file)->fault(vmf);
> +               vma->vm_file = file;
> +               if (retry_allowed)
> +                       vma->vm_flags |= FAULT_FLAG_ALLOW_RETRY;
> +               return ret;
> +
> +       } else {
> +               fpin = maybe_unlock_mmap_for_io(vmf, NULL);
> +               if (!fpin)
> +                       return VM_FAULT_SIGBUS;
> +
> +               ret = VM_FAULT_RETRY;
> +               if (!ovl_has_upperdata(inode)) {
> +                       err = ovl_copy_up_with_data(file->f_path.dentry);
> +                       if (err)
> +                               goto out;
> +               }
> +
> +               realinode = ovl_inode_realdata(inode);
> +               realfile = ovl_open_realfile(file, realinode);
> +               if (IS_ERR(realfile))
> +                       goto out;
> +
> +               tmp = ovl_get_realfile(file);
> +               ovl_set_realfile(file, realfile);
> +               fput(tmp);
> +
> +out:
> +               fput(fpin);
> +               return ret;
> +       }
> +}


Please add some documentation to explain the method used.
Do we need to retry if real_vmops are already set?

Thanks,
Amir.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH 3/3] ovl: implement stacked mmap for shared map
  2020-08-30 11:33     ` Amir Goldstein
  (?)
@ 2020-08-31 13:47     ` cgxu
  2020-08-31 15:51         ` Amir Goldstein
  -1 siblings, 1 reply; 15+ messages in thread
From: cgxu @ 2020-08-31 13:47 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: overlayfs, Linux MM, Miklos Szeredi, Andrew Morton, Ritesh Harjani

On 8/30/20 7:33 PM, Amir Goldstein wrote:
> On Sat, Aug 29, 2020 at 12:51 PM Chengguang Xu <cgxu519@mykernel.net> wrote:
>>
>> Implement stacked mmap for shared map to keep data
>> consistency.
>>
>> Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
>> ---
>>   fs/overlayfs/file.c | 120 +++++++++++++++++++++++++++++++++++++++++---
>>   1 file changed, 114 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
>> index 14ab5344a918..db5ab200d984 100644
>> --- a/fs/overlayfs/file.c
>> +++ b/fs/overlayfs/file.c
>> @@ -21,9 +21,17 @@ struct ovl_aio_req {
>>          struct fd fd;
>>   };
>>
>> +static vm_fault_t ovl_fault(struct vm_fault *vmf);
>> +static vm_fault_t ovl_page_mkwrite(struct vm_fault *vmf);
>> +
>> +static const struct vm_operations_struct ovl_vm_ops = {
>> +       .fault          = ovl_fault,
>> +       .page_mkwrite   = ovl_page_mkwrite,
>> +};
>> +
> 
> Interesting direction, not sure if this is workable.
> I don't know enough about mm to say.
> 
> But what about the rest of the operations?
> Did you go over them and decide that overlay doesn't need to implement them?
> I doubt it, but if you did, please document that.

I did some check for rest of them, IIUC ->fault will be enough for this 
special case (shared read-only mmap with no upper), I will remove 
->page_mkwrite in v2.

# I do not consider support ->huge_fault in current stage due to many fs 
cannot support DAX properly.

BTW, do you know who should I add to CC list for further deep review of
this code? fadevel-list?



> 
>>   struct ovl_file_entry {
>>          struct file *realfile;
>> -       void *vm_ops;
>> +       const struct vm_operations_struct *vm_ops;
>>   };
>>
>>   struct file *ovl_get_realfile(struct file *file)
>> @@ -40,14 +48,15 @@ void ovl_set_realfile(struct file *file, struct file *realfile)
>>          ofe->realfile = realfile;
>>   }
>>
>> -void *ovl_get_real_vmops(struct file *file)
>> +const struct vm_operations_struct *ovl_get_real_vmops(struct file *file)
>>   {
>>          struct ovl_file_entry *ofe = file->private_data;
>>
>>          return ofe->vm_ops;
>>   }
>>
>> -void ovl_set_real_vmops(struct file *file, void *vm_ops)
>> +void ovl_set_real_vmops(struct file *file,
>> +                       const struct vm_operations_struct *vm_ops)
>>   {
>>          struct ovl_file_entry *ofe = file->private_data;
>>
>> @@ -493,11 +502,104 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
>>          return ret;
>>   }
>>
>> +vm_fault_t ovl_fault(struct vm_fault *vmf)
>> +{
>> +       struct vm_area_struct *vma = vmf->vma;
>> +       struct file *file = vma->vm_file;
>> +       struct file *realfile;
>> +       struct file *fpin, *tmp;
>> +       struct inode *inode = file_inode(file);
>> +       struct inode *realinode;
>> +       const struct cred *old_cred;
>> +       bool retry_allowed;
>> +       vm_fault_t ret;
>> +       int err = 0;
>> +
>> +       if (fault_flag_check(vmf, FAULT_FLAG_TRIED)) {
>> +               realfile = ovl_get_realfile(file);
>> +
>> +               if (!ovl_has_upperdata(inode) ||
>> +                   realfile->f_inode != ovl_inode_upper(inode) ||
>> +                   !realfile->f_op->mmap)
>> +                       return VM_FAULT_SIGBUS;
>> +
>> +               if (!ovl_get_real_vmops(file)) {
>> +                       old_cred = ovl_override_creds(inode->i_sb);
>> +                       err = call_mmap(realfile, vma);
>> +                       revert_creds(old_cred);
>> +
>> +                       vma->vm_file = file;
>> +                       if (err) {
>> +                               vma->vm_ops = &ovl_vm_ops;
>> +                               return VM_FAULT_SIGBUS;
>> +                       }
>> +                       ovl_set_real_vmops(file, vma->vm_ops);
>> +                       vma->vm_ops = &ovl_vm_ops;
>> +               }
>> +
>> +               retry_allowed = fault_flag_check(vmf, FAULT_FLAG_ALLOW_RETRY);
>> +               if (retry_allowed)
>> +                       vma->vm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
>> +               vma->vm_file = realfile;
>> +               ret = ovl_get_real_vmops(file)->fault(vmf);
>> +               vma->vm_file = file;
>> +               if (retry_allowed)
>> +                       vma->vm_flags |= FAULT_FLAG_ALLOW_RETRY;
>> +               return ret;
>> +
>> +       } else {
>> +               fpin = maybe_unlock_mmap_for_io(vmf, NULL);
>> +               if (!fpin)
>> +                       return VM_FAULT_SIGBUS;
>> +
>> +               ret = VM_FAULT_RETRY;
>> +               if (!ovl_has_upperdata(inode)) {
>> +                       err = ovl_copy_up_with_data(file->f_path.dentry);
>> +                       if (err)
>> +                               goto out;
>> +               }
>> +
>> +               realinode = ovl_inode_realdata(inode);
>> +               realfile = ovl_open_realfile(file, realinode);
>> +               if (IS_ERR(realfile))
>> +                       goto out;
>> +
>> +               tmp = ovl_get_realfile(file);
>> +               ovl_set_realfile(file, realfile);
>> +               fput(tmp);
>> +
>> +out:
>> +               fput(fpin);
>> +               return ret;
>> +       }
>> +}
> 
> 
> Please add some documentation to explain the method used.
> Do we need to retry if real_vmops are already set?
> 

Good catch, actually retry is not needed in that case.

Basically, we unlock(mmap_lock)->copy-up->open when
detecting no upper inode then retry fault operation.
However, we need to check fault retry flag carefully
for avoiding endless retry.

I'll add more explanation in v2.

---
cgxu





^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH 3/3] ovl: implement stacked mmap for shared map
  2020-08-31 13:47     ` cgxu
@ 2020-08-31 15:51         ` Amir Goldstein
  0 siblings, 0 replies; 15+ messages in thread
From: Amir Goldstein @ 2020-08-31 15:51 UTC (permalink / raw)
  To: cgxu; +Cc: overlayfs, Linux MM, Miklos Szeredi, Andrew Morton, Ritesh Harjani

On Mon, Aug 31, 2020 at 4:47 PM cgxu <cgxu519@mykernel.net> wrote:
>
> On 8/30/20 7:33 PM, Amir Goldstein wrote:
> > On Sat, Aug 29, 2020 at 12:51 PM Chengguang Xu <cgxu519@mykernel.net> wrote:
> >>
> >> Implement stacked mmap for shared map to keep data
> >> consistency.
> >>
> >> Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
> >> ---
> >>   fs/overlayfs/file.c | 120 +++++++++++++++++++++++++++++++++++++++++---
> >>   1 file changed, 114 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> >> index 14ab5344a918..db5ab200d984 100644
> >> --- a/fs/overlayfs/file.c
> >> +++ b/fs/overlayfs/file.c
> >> @@ -21,9 +21,17 @@ struct ovl_aio_req {
> >>          struct fd fd;
> >>   };
> >>
> >> +static vm_fault_t ovl_fault(struct vm_fault *vmf);
> >> +static vm_fault_t ovl_page_mkwrite(struct vm_fault *vmf);
> >> +
> >> +static const struct vm_operations_struct ovl_vm_ops = {
> >> +       .fault          = ovl_fault,
> >> +       .page_mkwrite   = ovl_page_mkwrite,
> >> +};
> >> +
> >
> > Interesting direction, not sure if this is workable.
> > I don't know enough about mm to say.
> >
> > But what about the rest of the operations?
> > Did you go over them and decide that overlay doesn't need to implement them?
> > I doubt it, but if you did, please document that.
>
> I did some check for rest of them, IIUC ->fault will be enough for this
> special case (shared read-only mmap with no upper), I will remove
> ->page_mkwrite in v2.

Ok I suppose you checked that ->map_pages is not relevant?

>
> # I do not consider support ->huge_fault in current stage due to many fs
> cannot support DAX properly.
>
> BTW, do you know who should I add to CC list for further deep review of
> this code? fadevel-list?
>

fsdevel would be good, but I would wait for initial feedback from Miklos
before you post v2...

>
>
> >
> >>   struct ovl_file_entry {
> >>          struct file *realfile;
> >> -       void *vm_ops;
> >> +       const struct vm_operations_struct *vm_ops;
> >>   };
> >>
> >>   struct file *ovl_get_realfile(struct file *file)
> >> @@ -40,14 +48,15 @@ void ovl_set_realfile(struct file *file, struct file *realfile)
> >>          ofe->realfile = realfile;
> >>   }
> >>
> >> -void *ovl_get_real_vmops(struct file *file)
> >> +const struct vm_operations_struct *ovl_get_real_vmops(struct file *file)
> >>   {
> >>          struct ovl_file_entry *ofe = file->private_data;
> >>
> >>          return ofe->vm_ops;
> >>   }
> >>
> >> -void ovl_set_real_vmops(struct file *file, void *vm_ops)
> >> +void ovl_set_real_vmops(struct file *file,
> >> +                       const struct vm_operations_struct *vm_ops)
> >>   {
> >>          struct ovl_file_entry *ofe = file->private_data;
> >>
> >> @@ -493,11 +502,104 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
> >>          return ret;
> >>   }
> >>
> >> +vm_fault_t ovl_fault(struct vm_fault *vmf)
> >> +{
> >> +       struct vm_area_struct *vma = vmf->vma;
> >> +       struct file *file = vma->vm_file;
> >> +       struct file *realfile;
> >> +       struct file *fpin, *tmp;
> >> +       struct inode *inode = file_inode(file);
> >> +       struct inode *realinode;
> >> +       const struct cred *old_cred;
> >> +       bool retry_allowed;
> >> +       vm_fault_t ret;
> >> +       int err = 0;
> >> +
> >> +       if (fault_flag_check(vmf, FAULT_FLAG_TRIED)) {
> >> +               realfile = ovl_get_realfile(file);
> >> +
> >> +               if (!ovl_has_upperdata(inode) ||
> >> +                   realfile->f_inode != ovl_inode_upper(inode) ||
> >> +                   !realfile->f_op->mmap)
> >> +                       return VM_FAULT_SIGBUS;
> >> +
> >> +               if (!ovl_get_real_vmops(file)) {
> >> +                       old_cred = ovl_override_creds(inode->i_sb);
> >> +                       err = call_mmap(realfile, vma);
> >> +                       revert_creds(old_cred);
> >> +
> >> +                       vma->vm_file = file;
> >> +                       if (err) {
> >> +                               vma->vm_ops = &ovl_vm_ops;
> >> +                               return VM_FAULT_SIGBUS;
> >> +                       }
> >> +                       ovl_set_real_vmops(file, vma->vm_ops);
> >> +                       vma->vm_ops = &ovl_vm_ops;
> >> +               }
> >> +
> >> +               retry_allowed = fault_flag_check(vmf, FAULT_FLAG_ALLOW_RETRY);
> >> +               if (retry_allowed)
> >> +                       vma->vm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
> >> +               vma->vm_file = realfile;
> >> +               ret = ovl_get_real_vmops(file)->fault(vmf);
> >> +               vma->vm_file = file;
> >> +               if (retry_allowed)
> >> +                       vma->vm_flags |= FAULT_FLAG_ALLOW_RETRY;
> >> +               return ret;
> >> +
> >> +       } else {
> >> +               fpin = maybe_unlock_mmap_for_io(vmf, NULL);
> >> +               if (!fpin)
> >> +                       return VM_FAULT_SIGBUS;
> >> +
> >> +               ret = VM_FAULT_RETRY;
> >> +               if (!ovl_has_upperdata(inode)) {
> >> +                       err = ovl_copy_up_with_data(file->f_path.dentry);
> >> +                       if (err)
> >> +                               goto out;
> >> +               }
> >> +
> >> +               realinode = ovl_inode_realdata(inode);
> >> +               realfile = ovl_open_realfile(file, realinode);
> >> +               if (IS_ERR(realfile))
> >> +                       goto out;
> >> +
> >> +               tmp = ovl_get_realfile(file);
> >> +               ovl_set_realfile(file, realfile);
> >> +               fput(tmp);
> >> +
> >> +out:
> >> +               fput(fpin);
> >> +               return ret;
> >> +       }
> >> +}
> >
> >
> > Please add some documentation to explain the method used.
> > Do we need to retry if real_vmops are already set?
> >
>
> Good catch, actually retry is not needed in that case.
>
> Basically, we unlock(mmap_lock)->copy-up->open when
> detecting no upper inode then retry fault operation.
> However, we need to check fault retry flag carefully
> for avoiding endless retry.

That much I got, but the details of setting ->vm_file and vmops
look subtle, so better explain them.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH 3/3] ovl: implement stacked mmap for shared map
@ 2020-08-31 15:51         ` Amir Goldstein
  0 siblings, 0 replies; 15+ messages in thread
From: Amir Goldstein @ 2020-08-31 15:51 UTC (permalink / raw)
  To: cgxu; +Cc: overlayfs, Linux MM, Miklos Szeredi, Andrew Morton, Ritesh Harjani

On Mon, Aug 31, 2020 at 4:47 PM cgxu <cgxu519@mykernel.net> wrote:
>
> On 8/30/20 7:33 PM, Amir Goldstein wrote:
> > On Sat, Aug 29, 2020 at 12:51 PM Chengguang Xu <cgxu519@mykernel.net> wrote:
> >>
> >> Implement stacked mmap for shared map to keep data
> >> consistency.
> >>
> >> Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
> >> ---
> >>   fs/overlayfs/file.c | 120 +++++++++++++++++++++++++++++++++++++++++---
> >>   1 file changed, 114 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> >> index 14ab5344a918..db5ab200d984 100644
> >> --- a/fs/overlayfs/file.c
> >> +++ b/fs/overlayfs/file.c
> >> @@ -21,9 +21,17 @@ struct ovl_aio_req {
> >>          struct fd fd;
> >>   };
> >>
> >> +static vm_fault_t ovl_fault(struct vm_fault *vmf);
> >> +static vm_fault_t ovl_page_mkwrite(struct vm_fault *vmf);
> >> +
> >> +static const struct vm_operations_struct ovl_vm_ops = {
> >> +       .fault          = ovl_fault,
> >> +       .page_mkwrite   = ovl_page_mkwrite,
> >> +};
> >> +
> >
> > Interesting direction, not sure if this is workable.
> > I don't know enough about mm to say.
> >
> > But what about the rest of the operations?
> > Did you go over them and decide that overlay doesn't need to implement them?
> > I doubt it, but if you did, please document that.
>
> I did some check for rest of them, IIUC ->fault will be enough for this
> special case (shared read-only mmap with no upper), I will remove
> ->page_mkwrite in v2.

Ok I suppose you checked that ->map_pages is not relevant?

>
> # I do not consider support ->huge_fault in current stage due to many fs
> cannot support DAX properly.
>
> BTW, do you know who should I add to CC list for further deep review of
> this code? fadevel-list?
>

fsdevel would be good, but I would wait for initial feedback from Miklos
before you post v2...

>
>
> >
> >>   struct ovl_file_entry {
> >>          struct file *realfile;
> >> -       void *vm_ops;
> >> +       const struct vm_operations_struct *vm_ops;
> >>   };
> >>
> >>   struct file *ovl_get_realfile(struct file *file)
> >> @@ -40,14 +48,15 @@ void ovl_set_realfile(struct file *file, struct file *realfile)
> >>          ofe->realfile = realfile;
> >>   }
> >>
> >> -void *ovl_get_real_vmops(struct file *file)
> >> +const struct vm_operations_struct *ovl_get_real_vmops(struct file *file)
> >>   {
> >>          struct ovl_file_entry *ofe = file->private_data;
> >>
> >>          return ofe->vm_ops;
> >>   }
> >>
> >> -void ovl_set_real_vmops(struct file *file, void *vm_ops)
> >> +void ovl_set_real_vmops(struct file *file,
> >> +                       const struct vm_operations_struct *vm_ops)
> >>   {
> >>          struct ovl_file_entry *ofe = file->private_data;
> >>
> >> @@ -493,11 +502,104 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
> >>          return ret;
> >>   }
> >>
> >> +vm_fault_t ovl_fault(struct vm_fault *vmf)
> >> +{
> >> +       struct vm_area_struct *vma = vmf->vma;
> >> +       struct file *file = vma->vm_file;
> >> +       struct file *realfile;
> >> +       struct file *fpin, *tmp;
> >> +       struct inode *inode = file_inode(file);
> >> +       struct inode *realinode;
> >> +       const struct cred *old_cred;
> >> +       bool retry_allowed;
> >> +       vm_fault_t ret;
> >> +       int err = 0;
> >> +
> >> +       if (fault_flag_check(vmf, FAULT_FLAG_TRIED)) {
> >> +               realfile = ovl_get_realfile(file);
> >> +
> >> +               if (!ovl_has_upperdata(inode) ||
> >> +                   realfile->f_inode != ovl_inode_upper(inode) ||
> >> +                   !realfile->f_op->mmap)
> >> +                       return VM_FAULT_SIGBUS;
> >> +
> >> +               if (!ovl_get_real_vmops(file)) {
> >> +                       old_cred = ovl_override_creds(inode->i_sb);
> >> +                       err = call_mmap(realfile, vma);
> >> +                       revert_creds(old_cred);
> >> +
> >> +                       vma->vm_file = file;
> >> +                       if (err) {
> >> +                               vma->vm_ops = &ovl_vm_ops;
> >> +                               return VM_FAULT_SIGBUS;
> >> +                       }
> >> +                       ovl_set_real_vmops(file, vma->vm_ops);
> >> +                       vma->vm_ops = &ovl_vm_ops;
> >> +               }
> >> +
> >> +               retry_allowed = fault_flag_check(vmf, FAULT_FLAG_ALLOW_RETRY);
> >> +               if (retry_allowed)
> >> +                       vma->vm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
> >> +               vma->vm_file = realfile;
> >> +               ret = ovl_get_real_vmops(file)->fault(vmf);
> >> +               vma->vm_file = file;
> >> +               if (retry_allowed)
> >> +                       vma->vm_flags |= FAULT_FLAG_ALLOW_RETRY;
> >> +               return ret;
> >> +
> >> +       } else {
> >> +               fpin = maybe_unlock_mmap_for_io(vmf, NULL);
> >> +               if (!fpin)
> >> +                       return VM_FAULT_SIGBUS;
> >> +
> >> +               ret = VM_FAULT_RETRY;
> >> +               if (!ovl_has_upperdata(inode)) {
> >> +                       err = ovl_copy_up_with_data(file->f_path.dentry);
> >> +                       if (err)
> >> +                               goto out;
> >> +               }
> >> +
> >> +               realinode = ovl_inode_realdata(inode);
> >> +               realfile = ovl_open_realfile(file, realinode);
> >> +               if (IS_ERR(realfile))
> >> +                       goto out;
> >> +
> >> +               tmp = ovl_get_realfile(file);
> >> +               ovl_set_realfile(file, realfile);
> >> +               fput(tmp);
> >> +
> >> +out:
> >> +               fput(fpin);
> >> +               return ret;
> >> +       }
> >> +}
> >
> >
> > Please add some documentation to explain the method used.
> > Do we need to retry if real_vmops are already set?
> >
>
> Good catch, actually retry is not needed in that case.
>
> Basically, we unlock(mmap_lock)->copy-up->open when
> detecting no upper inode then retry fault operation.
> However, we need to check fault retry flag carefully
> for avoiding endless retry.

That much I got, but the details of setting ->vm_file and vmops
look subtle, so better explain them.

Thanks,
Amir.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH 3/3] ovl: implement stacked mmap for shared map
  2020-08-31 15:51         ` Amir Goldstein
@ 2020-09-01  7:44           ` Miklos Szeredi
  -1 siblings, 0 replies; 15+ messages in thread
From: Miklos Szeredi @ 2020-09-01  7:44 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: cgxu, overlayfs, Linux MM, Andrew Morton, Ritesh Harjani

On Mon, Aug 31, 2020 at 5:52 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Mon, Aug 31, 2020 at 4:47 PM cgxu <cgxu519@mykernel.net> wrote:
> >
> > On 8/30/20 7:33 PM, Amir Goldstein wrote:

> > > Interesting direction, not sure if this is workable.
> > > I don't know enough about mm to say.
> > >
> > > But what about the rest of the operations?
> > > Did you go over them and decide that overlay doesn't need to implement them?
> > > I doubt it, but if you did, please document that.
> >
> > I did some check for rest of them, IIUC ->fault will be enough for this
> > special case (shared read-only mmap with no upper), I will remove
> > ->page_mkwrite in v2.

Hmm, so this is just for that specific corner case?  Not a generic
shared mmap implementation?

>
> Ok I suppose you checked that ->map_pages is not relevant?
>
> >
> > # I do not consider support ->huge_fault in current stage due to many fs
> > cannot support DAX properly.
> >
> > BTW, do you know who should I add to CC list for further deep review of
> > this code? fadevel-list?
> >
>
> fsdevel would be good, but I would wait for initial feedback from Miklos
> before you post v2...

I could dig into the details of page fault handling, but that's not an
area that I'm intimately familiar with.    So yeah, a high level
description of what happens on mmap, page fault, write fault, etc...
would be really appreciated.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH 3/3] ovl: implement stacked mmap for shared map
@ 2020-09-01  7:44           ` Miklos Szeredi
  0 siblings, 0 replies; 15+ messages in thread
From: Miklos Szeredi @ 2020-09-01  7:44 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: cgxu, overlayfs, Linux MM, Andrew Morton, Ritesh Harjani

On Mon, Aug 31, 2020 at 5:52 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Mon, Aug 31, 2020 at 4:47 PM cgxu <cgxu519@mykernel.net> wrote:
> >
> > On 8/30/20 7:33 PM, Amir Goldstein wrote:

> > > Interesting direction, not sure if this is workable.
> > > I don't know enough about mm to say.
> > >
> > > But what about the rest of the operations?
> > > Did you go over them and decide that overlay doesn't need to implement them?
> > > I doubt it, but if you did, please document that.
> >
> > I did some check for rest of them, IIUC ->fault will be enough for this
> > special case (shared read-only mmap with no upper), I will remove
> > ->page_mkwrite in v2.

Hmm, so this is just for that specific corner case?  Not a generic
shared mmap implementation?

>
> Ok I suppose you checked that ->map_pages is not relevant?
>
> >
> > # I do not consider support ->huge_fault in current stage due to many fs
> > cannot support DAX properly.
> >
> > BTW, do you know who should I add to CC list for further deep review of
> > this code? fadevel-list?
> >
>
> fsdevel would be good, but I would wait for initial feedback from Miklos
> before you post v2...

I could dig into the details of page fault handling, but that's not an
area that I'm intimately familiar with.    So yeah, a high level
description of what happens on mmap, page fault, write fault, etc...
would be really appreciated.

Thanks,
Miklos


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH 3/3] ovl: implement stacked mmap for shared map
  2020-08-31 15:51         ` Amir Goldstein
  (?)
  (?)
@ 2020-09-01 13:20         ` cgxu
  -1 siblings, 0 replies; 15+ messages in thread
From: cgxu @ 2020-09-01 13:20 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: overlayfs, Linux MM, Miklos Szeredi, Andrew Morton, Ritesh Harjani

On 8/31/20 11:51 PM, Amir Goldstein wrote:
> On Mon, Aug 31, 2020 at 4:47 PM cgxu <cgxu519@mykernel.net> wrote:
>>
>> On 8/30/20 7:33 PM, Amir Goldstein wrote:
>>> On Sat, Aug 29, 2020 at 12:51 PM Chengguang Xu <cgxu519@mykernel.net> wrote:
>>>>
>>>> Implement stacked mmap for shared map to keep data
>>>> consistency.
>>>>
>>>> Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
>>>> ---
>>>>    fs/overlayfs/file.c | 120 +++++++++++++++++++++++++++++++++++++++++---
>>>>    1 file changed, 114 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
>>>> index 14ab5344a918..db5ab200d984 100644
>>>> --- a/fs/overlayfs/file.c
>>>> +++ b/fs/overlayfs/file.c
>>>> @@ -21,9 +21,17 @@ struct ovl_aio_req {
>>>>           struct fd fd;
>>>>    };
>>>>
>>>> +static vm_fault_t ovl_fault(struct vm_fault *vmf);
>>>> +static vm_fault_t ovl_page_mkwrite(struct vm_fault *vmf);
>>>> +
>>>> +static const struct vm_operations_struct ovl_vm_ops = {
>>>> +       .fault          = ovl_fault,
>>>> +       .page_mkwrite   = ovl_page_mkwrite,
>>>> +};
>>>> +
>>>
>>> Interesting direction, not sure if this is workable.
>>> I don't know enough about mm to say.
>>>
>>> But what about the rest of the operations?
>>> Did you go over them and decide that overlay doesn't need to implement them?
>>> I doubt it, but if you did, please document that.
>>
>> I did some check for rest of them, IIUC ->fault will be enough for this
>> special case (shared read-only mmap with no upper), I will remove
>> ->page_mkwrite in v2.
> 
> Ok I suppose you checked that ->map_pages is not relevant?


->map_pages() does easy maps and fallback to ->fault if the offset is 
not ready, so I think without ->map_pages() it still could work 
properly, we can also implement it for acceleration.


> 
>>
>> # I do not consider support ->huge_fault in current stage due to many fs
>> cannot support DAX properly.
>>
>> BTW, do you know who should I add to CC list for further deep review of
>> this code? fadevel-list?
>>
> 
> fsdevel would be good, but I would wait for initial feedback from Miklos
> before you post v2...
> 
>>
>>
>>>
>>>>    struct ovl_file_entry {
>>>>           struct file *realfile;
>>>> -       void *vm_ops;
>>>> +       const struct vm_operations_struct *vm_ops;
>>>>    };
>>>>
>>>>    struct file *ovl_get_realfile(struct file *file)
>>>> @@ -40,14 +48,15 @@ void ovl_set_realfile(struct file *file, struct file *realfile)
>>>>           ofe->realfile = realfile;
>>>>    }
>>>>
>>>> -void *ovl_get_real_vmops(struct file *file)
>>>> +const struct vm_operations_struct *ovl_get_real_vmops(struct file *file)
>>>>    {
>>>>           struct ovl_file_entry *ofe = file->private_data;
>>>>
>>>>           return ofe->vm_ops;
>>>>    }
>>>>
>>>> -void ovl_set_real_vmops(struct file *file, void *vm_ops)
>>>> +void ovl_set_real_vmops(struct file *file,
>>>> +                       const struct vm_operations_struct *vm_ops)
>>>>    {
>>>>           struct ovl_file_entry *ofe = file->private_data;
>>>>
>>>> @@ -493,11 +502,104 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
>>>>           return ret;
>>>>    }
>>>>
>>>> +vm_fault_t ovl_fault(struct vm_fault *vmf)
>>>> +{
>>>> +       struct vm_area_struct *vma = vmf->vma;
>>>> +       struct file *file = vma->vm_file;
>>>> +       struct file *realfile;
>>>> +       struct file *fpin, *tmp;
>>>> +       struct inode *inode = file_inode(file);
>>>> +       struct inode *realinode;
>>>> +       const struct cred *old_cred;
>>>> +       bool retry_allowed;
>>>> +       vm_fault_t ret;
>>>> +       int err = 0;
>>>> +
>>>> +       if (fault_flag_check(vmf, FAULT_FLAG_TRIED)) {
>>>> +               realfile = ovl_get_realfile(file);
>>>> +
>>>> +               if (!ovl_has_upperdata(inode) ||
>>>> +                   realfile->f_inode != ovl_inode_upper(inode) ||
>>>> +                   !realfile->f_op->mmap)
>>>> +                       return VM_FAULT_SIGBUS;
>>>> +
>>>> +               if (!ovl_get_real_vmops(file)) {
>>>> +                       old_cred = ovl_override_creds(inode->i_sb);
>>>> +                       err = call_mmap(realfile, vma);
>>>> +                       revert_creds(old_cred);
>>>> +
>>>> +                       vma->vm_file = file;
>>>> +                       if (err) {
>>>> +                               vma->vm_ops = &ovl_vm_ops;
>>>> +                               return VM_FAULT_SIGBUS;
>>>> +                       }
>>>> +                       ovl_set_real_vmops(file, vma->vm_ops);
>>>> +                       vma->vm_ops = &ovl_vm_ops;
>>>> +               }
>>>> +
>>>> +               retry_allowed = fault_flag_check(vmf, FAULT_FLAG_ALLOW_RETRY);
>>>> +               if (retry_allowed)
>>>> +                       vma->vm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
>>>> +               vma->vm_file = realfile;
>>>> +               ret = ovl_get_real_vmops(file)->fault(vmf);
>>>> +               vma->vm_file = file;
>>>> +               if (retry_allowed)
>>>> +                       vma->vm_flags |= FAULT_FLAG_ALLOW_RETRY;
>>>> +               return ret;
>>>> +
>>>> +       } else {
>>>> +               fpin = maybe_unlock_mmap_for_io(vmf, NULL);
>>>> +               if (!fpin)
>>>> +                       return VM_FAULT_SIGBUS;
>>>> +
>>>> +               ret = VM_FAULT_RETRY;
>>>> +               if (!ovl_has_upperdata(inode)) {
>>>> +                       err = ovl_copy_up_with_data(file->f_path.dentry);
>>>> +                       if (err)
>>>> +                               goto out;
>>>> +               }
>>>> +
>>>> +               realinode = ovl_inode_realdata(inode);
>>>> +               realfile = ovl_open_realfile(file, realinode);
>>>> +               if (IS_ERR(realfile))
>>>> +                       goto out;
>>>> +
>>>> +               tmp = ovl_get_realfile(file);
>>>> +               ovl_set_realfile(file, realfile);
>>>> +               fput(tmp);
>>>> +
>>>> +out:
>>>> +               fput(fpin);
>>>> +               return ret;
>>>> +       }
>>>> +}
>>>
>>>
>>> Please add some documentation to explain the method used.
>>> Do we need to retry if real_vmops are already set?
>>>
>>
>> Good catch, actually retry is not needed in that case.
>>
>> Basically, we unlock(mmap_lock)->copy-up->open when
>> detecting no upper inode then retry fault operation.
>> However, we need to check fault retry flag carefully
>> for avoiding endless retry.
> 
> That much I got, but the details of setting ->vm_file and vmops
> look subtle, so better explain them.
> 

I'll add some explanations in V2, but before that let me write some
comments based on code logic below. If there is still something not 
clear you can point out that again.



+	if (fault_flag_check(vmf, FAULT_FLAG_TRIED)) {
+		realfile = ovl_get_realfile(file);
+
+		if (!ovl_has_upperdata(inode) ||
+		    realfile->f_inode != ovl_inode_upper(inode) ||
+		    !realfile->f_op->mmap)
+			return VM_FAULT_SIGBUS;

Above condition indicates (copy-up)/(open real-file) failed or
(real-file does not support mmap), so we have to return SIGBUS.



+
+		if (!ovl_get_real_vmops(file)) {
+			old_cred = ovl_override_creds(inode->i_sb);
+			err = call_mmap(realfile, vma);
+			revert_creds(old_cred);
+
+			vma->vm_file = file;
+			if (err) {
+				vma->vm_ops = &ovl_vm_ops;
+				return VM_FAULT_SIGBUS;
+			}
+			ovl_set_real_vmops(file, vma->vm_ops);
+			vma->vm_ops = &ovl_vm_ops;


call_mmap() will rewrite vma->vm_file and vma->vm_ops to upper layer's,
so here recover to overlay's in order to jump into this ovl_fault()
in other page-faults.


+		}
+
+		retry_allowed = fault_flag_check(vmf, FAULT_FLAG_ALLOW_RETRY);
+		if (retry_allowed)
+			vma->vm_flags &= ~FAULT_FLAG_ALLOW_RETRY;

here, we disallow retry in real ->fault because retry will unlock 
mmap_lock, touching vma after unlock is not safe behavior.


+		vma->vm_file = realfile;
+		ret = ovl_get_real_vmops(file)->fault(vmf);


calling real fault handler.


+		vma->vm_file = file;
+		if (retry_allowed)
+			vma->vm_flags |= FAULT_FLAG_ALLOW_RETRY;


recover vm_file and vm_ops to overlay's so that we can jump into
ovl_fault in other page-faults.


+		return ret;
+
+	} else {



Thanks,
cgxu




^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2020-09-01 13:22 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-29  9:50 [RFC PATCH 0/3] ovl: stacked mmap for shared map Chengguang Xu
2020-08-29  9:50 ` [RFC PATCH 1/3] mm: mmap: export necessary functions for overlayfs' mmap Chengguang Xu
2020-08-29  9:51 ` [RFC PATCH 2/3] ovl: introduce struct ovl_file_entry Chengguang Xu
2020-08-29 12:16   ` kernel test robot
2020-08-29 12:16   ` [RFC PATCH] ovl: ovl_get_realfile() can be static kernel test robot
2020-08-29  9:51 ` [RFC PATCH 3/3] ovl: implement stacked mmap for shared map Chengguang Xu
2020-08-29 11:35   ` kernel test robot
2020-08-30 11:33   ` Amir Goldstein
2020-08-30 11:33     ` Amir Goldstein
2020-08-31 13:47     ` cgxu
2020-08-31 15:51       ` Amir Goldstein
2020-08-31 15:51         ` Amir Goldstein
2020-09-01  7:44         ` Miklos Szeredi
2020-09-01  7:44           ` Miklos Szeredi
2020-09-01 13:20         ` cgxu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.