All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/9] overlay: fix inconsistency of ro file after copy-up
@ 2017-02-17 16:09 Miklos Szeredi
  2017-02-17 16:09 ` [PATCH 1/9] vfs: extract common parts of {compat_,}do_readv_writev() Miklos Szeredi
                   ` (9 more replies)
  0 siblings, 10 replies; 18+ messages in thread
From: Miklos Szeredi @ 2017-02-17 16:09 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-unionfs, linux-kernel, linux-fsdevel

A file is opened for read-only, opened read-write (resulting in a copy up)
and modified.  The data read back from the the read-only fd will be stale
in this case (the read-only file descriptor still refers to the lower,
unmodified file).

This patchset fixes issues related to this corner case.  This is a
requirement from various parties for accepting overlayfs as a "POSIX"
filesystem.

When an operation (read, mmap, fsync) is done on an overlay fd opened
read-only that is referring to a lower file, check if it has been copied up
in the mean time.  If so, open the upper file and use that for the operation.

To make the performance impact minimal for non-overlay case, use a flag in
file->f_mode to indicate that this is an overlay file.

Git tree is here:

git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git #overlay-ro-rw-fix

Miklos Szeredi (9):
  vfs: extract common parts of {compat_,}do_readv_writev()
  vfs: pass type instead of fn to do_{loop,iter}_readv_writev()
  vfs: use helpers for calling f_op->{read,write}_iter()
  vfs: intercept reads to overlay files
  mm: ovl: copy-up on MAP_SHARED
  mm: use helper for calling f_op->mmap()
  ovl: intercept mmap on overlay files
  vfs: use helper for calling f_op->fsync()
  vfs: intercept fsync on overlay files

 drivers/block/loop.c                   |   4 +-
 drivers/gpu/drm/i915/i915_gem_dmabuf.c |   2 +-
 drivers/gpu/drm/vgem/vgem_drv.c        |   2 +-
 fs/Makefile                            |   2 +-
 fs/aio.c                               |   4 +-
 fs/coda/file.c                         |   2 +-
 fs/open.c                              |   2 +
 fs/overlay_util.c                      |  77 +++++++++++++++++++++
 fs/read_write.c                        | 121 +++++++++++++--------------------
 fs/splice.c                            |   2 +-
 fs/sync.c                              |   2 +-
 include/linux/fs.h                     |  42 ++++++++++++
 include/linux/overlay_util.h           |  17 +++++
 ipc/shm.c                              |   4 +-
 mm/mmap.c                              |   5 +-
 mm/nommu.c                             |   4 +-
 mm/util.c                              |  22 ++++++
 17 files changed, 227 insertions(+), 87 deletions(-)
 create mode 100644 fs/overlay_util.c
 create mode 100644 include/linux/overlay_util.h

-- 
2.5.5

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 1/9] vfs: extract common parts of {compat_,}do_readv_writev()
  2017-02-17 16:09 [PATCH 0/9] overlay: fix inconsistency of ro file after copy-up Miklos Szeredi
@ 2017-02-17 16:09 ` Miklos Szeredi
  2017-02-17 16:09 ` [PATCH 2/9] vfs: pass type instead of fn to do_{loop,iter}_readv_writev() Miklos Szeredi
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 18+ messages in thread
From: Miklos Szeredi @ 2017-02-17 16:09 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-unionfs, linux-kernel, linux-fsdevel

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/read_write.c | 80 +++++++++++++++++++++------------------------------------
 1 file changed, 29 insertions(+), 51 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index 5816d4c4cab0..b7cf7e2cb8c5 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -834,25 +834,15 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 	return ret;
 }
 
-static ssize_t do_readv_writev(int type, struct file *file,
-			       const struct iovec __user * uvector,
-			       unsigned long nr_segs, loff_t *pos,
-			       int flags)
+static ssize_t __do_readv_writev(int type, struct file *file,
+				 struct iov_iter *iter, loff_t *pos, int flags)
 {
 	size_t tot_len;
-	struct iovec iovstack[UIO_FASTIOV];
-	struct iovec *iov = iovstack;
-	struct iov_iter iter;
-	ssize_t ret;
+	ssize_t ret = 0;
 	io_fn_t fn;
 	iter_fn_t iter_fn;
 
-	ret = import_iovec(type, uvector, nr_segs,
-			   ARRAY_SIZE(iovstack), &iov, &iter);
-	if (ret < 0)
-		return ret;
-
-	tot_len = iov_iter_count(&iter);
+	tot_len = iov_iter_count(iter);
 	if (!tot_len)
 		goto out;
 	ret = rw_verify_area(type, file, pos, tot_len);
@@ -869,15 +859,14 @@ static ssize_t do_readv_writev(int type, struct file *file,
 	}
 
 	if (iter_fn)
-		ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags);
+		ret = do_iter_readv_writev(file, iter, pos, iter_fn, flags);
 	else
-		ret = do_loop_readv_writev(file, &iter, pos, fn, flags);
+		ret = do_loop_readv_writev(file, iter, pos, fn, flags);
 
 	if (type != READ)
 		file_end_write(file);
 
 out:
-	kfree(iov);
 	if ((ret + (type == READ)) > 0) {
 		if (type == READ)
 			fsnotify_access(file);
@@ -887,6 +876,27 @@ static ssize_t do_readv_writev(int type, struct file *file,
 	return ret;
 }
 
+static ssize_t do_readv_writev(int type, struct file *file,
+			       const struct iovec __user *uvector,
+			       unsigned long nr_segs, loff_t *pos,
+			       int flags)
+{
+	struct iovec iovstack[UIO_FASTIOV];
+	struct iovec *iov = iovstack;
+	struct iov_iter iter;
+	ssize_t ret;
+
+	ret = import_iovec(type, uvector, nr_segs,
+			   ARRAY_SIZE(iovstack), &iov, &iter);
+	if (ret < 0)
+		return ret;
+
+	ret = __do_readv_writev(type, file, &iter, pos, flags);
+	kfree(iov);
+
+	return ret;
+}
+
 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
 		  unsigned long vlen, loff_t *pos, int flags)
 {
@@ -1064,51 +1074,19 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
 			       unsigned long nr_segs, loff_t *pos,
 			       int flags)
 {
-	compat_ssize_t tot_len;
 	struct iovec iovstack[UIO_FASTIOV];
 	struct iovec *iov = iovstack;
 	struct iov_iter iter;
 	ssize_t ret;
-	io_fn_t fn;
-	iter_fn_t iter_fn;
 
 	ret = compat_import_iovec(type, uvector, nr_segs,
 				  UIO_FASTIOV, &iov, &iter);
 	if (ret < 0)
 		return ret;
 
-	tot_len = iov_iter_count(&iter);
-	if (!tot_len)
-		goto out;
-	ret = rw_verify_area(type, file, pos, tot_len);
-	if (ret < 0)
-		goto out;
-
-	if (type == READ) {
-		fn = file->f_op->read;
-		iter_fn = file->f_op->read_iter;
-	} else {
-		fn = (io_fn_t)file->f_op->write;
-		iter_fn = file->f_op->write_iter;
-		file_start_write(file);
-	}
-
-	if (iter_fn)
-		ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags);
-	else
-		ret = do_loop_readv_writev(file, &iter, pos, fn, flags);
-
-	if (type != READ)
-		file_end_write(file);
-
-out:
+	ret = __do_readv_writev(type, file, &iter, pos, flags);
 	kfree(iov);
-	if ((ret + (type == READ)) > 0) {
-		if (type == READ)
-			fsnotify_access(file);
-		else
-			fsnotify_modify(file);
-	}
+
 	return ret;
 }
 
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/9] vfs: pass type instead of fn to do_{loop,iter}_readv_writev()
  2017-02-17 16:09 [PATCH 0/9] overlay: fix inconsistency of ro file after copy-up Miklos Szeredi
  2017-02-17 16:09 ` [PATCH 1/9] vfs: extract common parts of {compat_,}do_readv_writev() Miklos Szeredi
@ 2017-02-17 16:09 ` Miklos Szeredi
  2017-02-17 16:09 ` [PATCH 3/9] vfs: use helpers for calling f_op->{read,write}_iter() Miklos Szeredi
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 18+ messages in thread
From: Miklos Szeredi @ 2017-02-17 16:09 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-unionfs, linux-kernel, linux-fsdevel

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/read_write.c | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index b7cf7e2cb8c5..c9da4492a462 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -23,9 +23,6 @@
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
 
-typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
-typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *);
-
 const struct file_operations generic_ro_fops = {
 	.llseek		= generic_file_llseek,
 	.read_iter	= generic_file_read_iter,
@@ -675,7 +672,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
 EXPORT_SYMBOL(iov_shorten);
 
 static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
-		loff_t *ppos, iter_fn_t fn, int flags)
+		loff_t *ppos, int type, int flags)
 {
 	struct kiocb kiocb;
 	ssize_t ret;
@@ -692,7 +689,10 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
 		kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
 	kiocb.ki_pos = *ppos;
 
-	ret = fn(&kiocb, iter);
+	if (type == READ)
+		ret = filp->f_op->read_iter(&kiocb, iter);
+	else
+		ret = filp->f_op->write_iter(&kiocb, iter);
 	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
@@ -700,7 +700,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
 
 /* Do it by hand, with file-ops */
 static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
-		loff_t *ppos, io_fn_t fn, int flags)
+		loff_t *ppos, int type, int flags)
 {
 	ssize_t ret = 0;
 
@@ -711,7 +711,13 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
 		struct iovec iovec = iov_iter_iovec(iter);
 		ssize_t nr;
 
-		nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos);
+		if (type == READ) {
+			nr = filp->f_op->read(filp, iovec.iov_base,
+					      iovec.iov_len, ppos);
+		} else {
+			nr = filp->f_op->write(filp, iovec.iov_base,
+					       iovec.iov_len, ppos);
+		}
 
 		if (nr < 0) {
 			if (!ret)
@@ -839,8 +845,6 @@ static ssize_t __do_readv_writev(int type, struct file *file,
 {
 	size_t tot_len;
 	ssize_t ret = 0;
-	io_fn_t fn;
-	iter_fn_t iter_fn;
 
 	tot_len = iov_iter_count(iter);
 	if (!tot_len)
@@ -849,19 +853,14 @@ static ssize_t __do_readv_writev(int type, struct file *file,
 	if (ret < 0)
 		goto out;
 
-	if (type == READ) {
-		fn = file->f_op->read;
-		iter_fn = file->f_op->read_iter;
-	} else {
-		fn = (io_fn_t)file->f_op->write;
-		iter_fn = file->f_op->write_iter;
+	if (type != READ)
 		file_start_write(file);
-	}
 
-	if (iter_fn)
-		ret = do_iter_readv_writev(file, iter, pos, iter_fn, flags);
+	if ((type == READ && file->f_op->read_iter) ||
+	    (type == WRITE && file->f_op->write_iter))
+		ret = do_iter_readv_writev(file, iter, pos, type, flags);
 	else
-		ret = do_loop_readv_writev(file, iter, pos, fn, flags);
+		ret = do_loop_readv_writev(file, iter, pos, type, flags);
 
 	if (type != READ)
 		file_end_write(file);
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 3/9] vfs: use helpers for calling f_op->{read,write}_iter()
  2017-02-17 16:09 [PATCH 0/9] overlay: fix inconsistency of ro file after copy-up Miklos Szeredi
  2017-02-17 16:09 ` [PATCH 1/9] vfs: extract common parts of {compat_,}do_readv_writev() Miklos Szeredi
  2017-02-17 16:09 ` [PATCH 2/9] vfs: pass type instead of fn to do_{loop,iter}_readv_writev() Miklos Szeredi
@ 2017-02-17 16:09 ` Miklos Szeredi
  2017-02-17 16:09 ` [PATCH 4/9] vfs: intercept reads to overlay files Miklos Szeredi
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 18+ messages in thread
From: Miklos Szeredi @ 2017-02-17 16:09 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-unionfs, linux-kernel, linux-fsdevel

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 drivers/block/loop.c |  4 ++--
 fs/aio.c             |  4 ++--
 fs/read_write.c      | 12 ++++++------
 fs/splice.c          |  2 +-
 include/linux/fs.h   | 12 ++++++++++++
 5 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f347285c67ec..2cf2903a0715 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -501,9 +501,9 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
 	cmd->iocb.ki_flags = IOCB_DIRECT;
 
 	if (rw == WRITE)
-		ret = file->f_op->write_iter(&cmd->iocb, &iter);
+		ret = call_write_iter(file, &cmd->iocb, &iter);
 	else
-		ret = file->f_op->read_iter(&cmd->iocb, &iter);
+		ret = call_read_iter(file, &cmd->iocb, &iter);
 
 	if (ret != -EIOCBQUEUED)
 		cmd->iocb.ki_complete(&cmd->iocb, ret, 0);
diff --git a/fs/aio.c b/fs/aio.c
index 873b4ca82ccb..63e97f3168e7 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1495,7 +1495,7 @@ static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
 		return ret;
 	ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
 	if (!ret)
-		ret = aio_ret(req, file->f_op->read_iter(req, &iter));
+		ret = aio_ret(req, call_read_iter(file, req, &iter));
 	kfree(iovec);
 	return ret;
 }
@@ -1520,7 +1520,7 @@ static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
 	if (!ret) {
 		req->ki_flags |= IOCB_WRITE;
 		file_start_write(file);
-		ret = aio_ret(req, file->f_op->write_iter(req, &iter));
+		ret = aio_ret(req, call_write_iter(file, req, &iter));
 		/*
 		 * We release freeze protection in aio_complete().  Fool lockdep
 		 * by telling it the lock got released so that it doesn't
diff --git a/fs/read_write.c b/fs/read_write.c
index c9da4492a462..0d697cc15717 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -367,7 +367,7 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos)
 	kiocb.ki_pos = *ppos;
 
 	iter->type |= READ;
-	ret = file->f_op->read_iter(&kiocb, iter);
+	ret = call_read_iter(file, &kiocb, iter);
 	BUG_ON(ret == -EIOCBQUEUED);
 	if (ret > 0)
 		*ppos = kiocb.ki_pos;
@@ -387,7 +387,7 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)
 	kiocb.ki_pos = *ppos;
 
 	iter->type |= WRITE;
-	ret = file->f_op->write_iter(&kiocb, iter);
+	ret = call_write_iter(file, &kiocb, iter);
 	BUG_ON(ret == -EIOCBQUEUED);
 	if (ret > 0)
 		*ppos = kiocb.ki_pos;
@@ -436,7 +436,7 @@ static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo
 	kiocb.ki_pos = *ppos;
 	iov_iter_init(&iter, READ, &iov, 1, len);
 
-	ret = filp->f_op->read_iter(&kiocb, &iter);
+	ret = call_read_iter(filp, &kiocb, &iter);
 	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
@@ -493,7 +493,7 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t
 	kiocb.ki_pos = *ppos;
 	iov_iter_init(&iter, WRITE, &iov, 1, len);
 
-	ret = filp->f_op->write_iter(&kiocb, &iter);
+	ret = call_write_iter(filp, &kiocb, &iter);
 	BUG_ON(ret == -EIOCBQUEUED);
 	if (ret > 0)
 		*ppos = kiocb.ki_pos;
@@ -690,9 +690,9 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
 	kiocb.ki_pos = *ppos;
 
 	if (type == READ)
-		ret = filp->f_op->read_iter(&kiocb, iter);
+		ret = call_read_iter(filp, &kiocb, iter);
 	else
-		ret = filp->f_op->write_iter(&kiocb, iter);
+		ret = call_write_iter(filp, &kiocb, iter);
 	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
diff --git a/fs/splice.c b/fs/splice.c
index 873d83104e79..6518f058bd7f 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -306,7 +306,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
 	idx = to.idx;
 	init_sync_kiocb(&kiocb, in);
 	kiocb.ki_pos = *ppos;
-	ret = in->f_op->read_iter(&kiocb, &to);
+	ret = call_read_iter(in, &kiocb, &to);
 	if (ret > 0) {
 		*ppos = kiocb.ki_pos;
 		file_accessed(in);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2ba074328894..efdaad954b70 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1712,6 +1712,18 @@ struct inode_operations {
 	int (*set_acl)(struct inode *, struct posix_acl *, int);
 } ____cacheline_aligned;
 
+static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
+				     struct iov_iter *iter)
+{
+	return file->f_op->read_iter(kio, iter);
+}
+
+static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio,
+				      struct iov_iter *iter)
+{
+	return file->f_op->write_iter(kio, iter);
+}
+
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 4/9] vfs: intercept reads to overlay files
  2017-02-17 16:09 [PATCH 0/9] overlay: fix inconsistency of ro file after copy-up Miklos Szeredi
                   ` (2 preceding siblings ...)
  2017-02-17 16:09 ` [PATCH 3/9] vfs: use helpers for calling f_op->{read,write}_iter() Miklos Szeredi
@ 2017-02-17 16:09 ` Miklos Szeredi
  2017-02-19  9:05   ` Al Viro
       [not found]   ` <D39694FF47DA2A43B120BF3DF6163E7A10CD2335@DGGEMA504-MBX.china.huawei.com>
  2017-02-17 16:09 ` [PATCH 5/9] mm: ovl: copy-up on MAP_SHARED Miklos Szeredi
                   ` (5 subsequent siblings)
  9 siblings, 2 replies; 18+ messages in thread
From: Miklos Szeredi @ 2017-02-17 16:09 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-unionfs, linux-kernel, linux-fsdevel

...in order to handle the corner case when the file is copyied up after
being opened read-only.

Can be verified with the following script:

 - 8< - - - - - 8< - - - - - 8< - - - - - 8< - - - -
cd /
rm -rf /tmp/ovl-rorw-test
mkdir /tmp/ovl-rorw-test
cd /tmp/ovl-rorw-test
mkdir -p mnt lower upper work
echo baba > lower/foo
mount -t overlay overlay -olowerdir=lower,upperdir=upper,workdir=work mnt
exec 3< mnt/foo
echo bubu > mnt/foo
cat <&3
exec 3>&-
umount mnt
 - 8< - - - - - 8< - - - - - 8< - - - - - 8< - - - -

Correct output is "bubu", incorrect output is "baba".

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/Makefile                  |  2 +-
 fs/open.c                    |  2 ++
 fs/overlay_util.c            | 39 +++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h           | 13 +++++++++++++
 include/linux/overlay_util.h | 13 +++++++++++++
 5 files changed, 68 insertions(+), 1 deletion(-)
 create mode 100644 fs/overlay_util.c
 create mode 100644 include/linux/overlay_util.h

diff --git a/fs/Makefile b/fs/Makefile
index 7bbaca9c67b1..8c8f197d7c75 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y :=	open.o read_write.o file_table.o super.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o \
 		pnode.o splice.o sync.o utimes.o \
-		stack.o fs_struct.o statfs.o fs_pin.o nsfs.o
+		stack.o fs_struct.o statfs.o fs_pin.o nsfs.o overlay_util.o
 
 ifeq ($(CONFIG_BLOCK),y)
 obj-y +=	buffer.o block_dev.o direct-io.o mpage.o
diff --git a/fs/open.c b/fs/open.c
index 9921f70bc5ca..4916ccff29f5 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -762,6 +762,8 @@ static int do_dentry_open(struct file *f,
 	if ((f->f_mode & FMODE_WRITE) &&
 	     likely(f->f_op->write || f->f_op->write_iter))
 		f->f_mode |= FMODE_CAN_WRITE;
+	if (unlikely(d_inode(f->f_path.dentry) != inode))
+		f->f_mode |= FMODE_OVERLAY;
 
 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
diff --git a/fs/overlay_util.c b/fs/overlay_util.c
new file mode 100644
index 000000000000..0daff19bad0b
--- /dev/null
+++ b/fs/overlay_util.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+#if IS_ENABLED(CONFIG_OVERLAY_FS)
+
+#include <linux/overlay_util.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include "internal.h"
+
+static bool overlay_file_consistent(struct file *file)
+{
+	return d_real_inode(file->f_path.dentry) == file_inode(file);
+}
+
+ssize_t overlay_read_iter(struct file *file, struct kiocb *kio,
+			  struct iov_iter *iter)
+{
+	ssize_t ret;
+
+	if (likely(overlay_file_consistent(file)))
+		return file->f_op->read_iter(kio, iter);
+
+	file = filp_clone_open(file);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	ret = vfs_iter_read(file, iter, &kio->ki_pos);
+	fput(file);
+
+	return ret;
+}
+EXPORT_SYMBOL(overlay_read_iter);
+
+#endif /* IS_ENABLED(CONFIG_OVERLAY_FS) */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index efdaad954b70..d186d5390e99 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -31,6 +31,7 @@
 #include <linux/workqueue.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/delayed_call.h>
+#include <linux/overlay_util.h>
 
 #include <asm/byteorder.h>
 #include <uapi/linux/fs.h>
@@ -143,6 +144,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY		((__force fmode_t)0x4000000)
 
+/* File comes from overlay filesystem */
+#define FMODE_OVERLAY		((__force fmode_t)0x8000000)
+
 /*
  * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
  * that indicates that they should check the contents of the iovec are
@@ -1712,9 +1716,18 @@ struct inode_operations {
 	int (*set_acl)(struct inode *, struct posix_acl *, int);
 } ____cacheline_aligned;
 
+
+static inline bool is_overlay_file(struct file *file)
+{
+	return IS_ENABLED(CONFIG_OVERLAY_FS) && file->f_mode & FMODE_OVERLAY;
+}
+
 static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
 				     struct iov_iter *iter)
 {
+	if (unlikely(is_overlay_file(file)))
+		return overlay_read_iter(file, kio, iter);
+
 	return file->f_op->read_iter(kio, iter);
 }
 
diff --git a/include/linux/overlay_util.h b/include/linux/overlay_util.h
new file mode 100644
index 000000000000..886be9003bf3
--- /dev/null
+++ b/include/linux/overlay_util.h
@@ -0,0 +1,13 @@
+#ifndef _LINUX_OVERLAY_FS_H
+#define _LINUX_OVERLAY_FS_H
+
+#include <linux/types.h>
+
+struct file;
+struct kiocb;
+struct iov_iter;
+
+extern ssize_t overlay_read_iter(struct file *file, struct kiocb *kio,
+				 struct iov_iter *iter);
+
+#endif /* _LINUX_OVERLAY_FS_H */
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 5/9] mm: ovl: copy-up on MAP_SHARED
  2017-02-17 16:09 [PATCH 0/9] overlay: fix inconsistency of ro file after copy-up Miklos Szeredi
                   ` (3 preceding siblings ...)
  2017-02-17 16:09 ` [PATCH 4/9] vfs: intercept reads to overlay files Miklos Szeredi
@ 2017-02-17 16:09 ` Miklos Szeredi
  2017-02-17 16:09 ` [PATCH 6/9] mm: use helper for calling f_op->mmap() Miklos Szeredi
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 18+ messages in thread
From: Miklos Szeredi @ 2017-02-17 16:09 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-unionfs, linux-kernel, linux-fsdevel

A corner case of a corner case is when

 - file opened for O_RDONLY
 - which is then memory mapped SHARED
 - file opened for O_WRONLY
 - contents modified
 - contents read back though the shared mapping

Unfortunately it looks very difficult to do anything about the established
shared map after the file is copied up.  Instead when a read-only file is
mapped shared overlayfs copies up the file before actually doing the map.
This may result in unnecessary copy-ups (but so may copy-up on open(O_RDWR)
for exampe).

We can revisit this later if it turns out to be a performance problem in
real life.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Tested-by: Amir Goldstein <amir73il@gmail.com>
---
 mm/util.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/mm/util.c b/mm/util.c
index 3cb2164f4099..6cea1c4f27f3 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -300,6 +300,28 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
 
 	ret = security_mmap_file(file, prot, flag);
 	if (!ret) {
+		/*
+		 * Special treatment for overlayfs:
+		 *
+		 * Take MAP_SHARED/PROT_READ as hint about future writes to the
+		 * file (through another file descriptor).  Caller might not
+		 * have had such an intent, but we hope MAP_PRIVATE will be used
+		 * in most such cases.
+		 *
+		 * If we don't copy up now and the file is modified, it becomes
+		 * really difficult to change the mapping to match that of the
+		 * file's content later.
+		 *
+		 * Copy up needs to be done without mmap_sem since it takes vfs
+		 * locks which would potentially deadlock under mmap_sem.
+		 */
+		if ((flag & MAP_SHARED) && !(prot & PROT_WRITE) && file) {
+			void *p = d_real(file->f_path.dentry, NULL, O_WRONLY);
+
+			if (IS_ERR(p))
+				return PTR_ERR(p);
+		}
+
 		if (down_write_killable(&mm->mmap_sem))
 			return -EINTR;
 		ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff,
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 6/9] mm: use helper for calling f_op->mmap()
  2017-02-17 16:09 [PATCH 0/9] overlay: fix inconsistency of ro file after copy-up Miklos Szeredi
                   ` (4 preceding siblings ...)
  2017-02-17 16:09 ` [PATCH 5/9] mm: ovl: copy-up on MAP_SHARED Miklos Szeredi
@ 2017-02-17 16:09 ` Miklos Szeredi
  2017-02-17 16:09 ` [PATCH 7/9] ovl: intercept mmap on overlay files Miklos Szeredi
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 18+ messages in thread
From: Miklos Szeredi @ 2017-02-17 16:09 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-unionfs, linux-kernel, linux-fsdevel

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 drivers/gpu/drm/i915/i915_gem_dmabuf.c | 2 +-
 drivers/gpu/drm/vgem/vgem_drv.c        | 2 +-
 fs/coda/file.c                         | 2 +-
 include/linux/fs.h                     | 5 +++++
 ipc/shm.c                              | 2 +-
 mm/mmap.c                              | 2 +-
 mm/nommu.c                             | 4 ++--
 7 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 5e38299b5df6..84fc9f001576 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -141,7 +141,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *
 	if (!obj->base.filp)
 		return -ENODEV;
 
-	ret = obj->base.filp->f_op->mmap(obj->base.filp, vma);
+	ret = call_mmap(obj->base.filp, vma);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 477e07f0ecb6..9f7e222b3e89 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -287,7 +287,7 @@ static int vgem_prime_mmap(struct drm_gem_object *obj,
 	if (!obj->filp)
 		return -ENODEV;
 
-	ret = obj->filp->f_op->mmap(obj->filp, vma);
+	ret = call_mmap(obj->filp, vma);
 	if (ret)
 		return ret;
 
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 6e0154eb6fcc..9d956cd6d46f 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -96,7 +96,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma)
 	cfi->cfi_mapcount++;
 	spin_unlock(&cii->c_lock);
 
-	return host_file->f_op->mmap(host_file, vma);
+	return call_mmap(host_file, vma);
 }
 
 int coda_open(struct inode *coda_inode, struct file *coda_file)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d186d5390e99..4728c5178f3f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1737,6 +1737,11 @@ static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio,
 	return file->f_op->write_iter(kio, iter);
 }
 
+static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	return file->f_op->mmap(file, vma);
+}
+
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,
diff --git a/ipc/shm.c b/ipc/shm.c
index 81203e8ba013..4329fe3ef594 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -423,7 +423,7 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma)
 	if (ret)
 		return ret;
 
-	ret = sfd->file->f_op->mmap(sfd->file, vma);
+	ret = call_mmap(sfd->file, vma);
 	if (ret) {
 		shm_close(vma);
 		return ret;
diff --git a/mm/mmap.c b/mm/mmap.c
index dc4291dcc99b..3714aa4e6f81 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1668,7 +1668,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 		 * new file must not have been exposed to user-space, yet.
 		 */
 		vma->vm_file = get_file(file);
-		error = file->f_op->mmap(file, vma);
+		error = call_mmap(file, vma);
 		if (error)
 			goto unmap_and_free_vma;
 
diff --git a/mm/nommu.c b/mm/nommu.c
index 24f9f5f39145..e366354f777d 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1084,7 +1084,7 @@ static int do_mmap_shared_file(struct vm_area_struct *vma)
 {
 	int ret;
 
-	ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
+	ret = call_mmap(vma->vm_file, vma);
 	if (ret == 0) {
 		vma->vm_region->vm_top = vma->vm_region->vm_end;
 		return 0;
@@ -1115,7 +1115,7 @@ static int do_mmap_private(struct vm_area_struct *vma,
 	 * - VM_MAYSHARE will be set if it may attempt to share
 	 */
 	if (capabilities & NOMMU_MAP_DIRECT) {
-		ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
+		ret = call_mmap(vma->vm_file, vma);
 		if (ret == 0) {
 			/* shouldn't return success if we're not sharing */
 			BUG_ON(!(vma->vm_flags & VM_MAYSHARE));
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 7/9] ovl: intercept mmap on overlay files
  2017-02-17 16:09 [PATCH 0/9] overlay: fix inconsistency of ro file after copy-up Miklos Szeredi
                   ` (5 preceding siblings ...)
  2017-02-17 16:09 ` [PATCH 6/9] mm: use helper for calling f_op->mmap() Miklos Szeredi
@ 2017-02-17 16:09 ` Miklos Szeredi
  2017-02-17 16:09 ` [PATCH 8/9] vfs: use helper for calling f_op->fsync() Miklos Szeredi
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 18+ messages in thread
From: Miklos Szeredi @ 2017-02-17 16:09 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-unionfs, linux-kernel, linux-fsdevel

... in order to handle the corner case when the file is copied up after
being opened read-only and mapped shared.

Can be verified with the following script:

 - 8< - - - - - 8< - - - - - 8< - - - - - 8< - - - -
cd /
rm -rf /tmp/ovl-rorw-test
mkdir /tmp/ovl-rorw-test
cd /tmp/ovl-rorw-test
cat << EOF > rorw-map.c
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <err.h>
#include <sys/mman.h>

int main(int argc, char *argv[])
{
        int rofd, rwfd;
        int ret;
        char buf[4];
        char *addr;

        rofd = open(argv[1], O_RDONLY);
        if (rofd == -1)
                err(1, "ro open");

        addr = mmap(NULL, 4, PROT_READ, MAP_SHARED, rofd, 0);
        if (addr == MAP_FAILED)
                err(1, "ro mmap");

        if (memcmp(addr, "bubu", 4) == 0)
                errx(1, "identical startup data");

        rwfd = open(argv[1], O_WRONLY);
        if (rwfd == -1)
                err(1, "rw open");

        ret = write(rwfd, "bubu", 4);
        if (ret == -1)
                err(1, "write");
        if (ret < 4)
                errx(1, "short write");

        if (memcmp(addr, "bubu", 4) != 0)
                errx(1, "bad mmap data");

        ret = read(rofd, buf, 4);
        if (ret == -1)
                err(1, "read");
        if (ret < 4)
                errx(1, "short read");
        if (memcmp(buf, "bubu", 4) != 0)
                errx(1, "bad read data");

        return 0;
}
EOF
gcc -o rorw-map rorw-map.c
mkdir -p mnt lower upper work
echo baba > lower/foo
mount -t overlay overlay -olowerdir=lower,upperdir=upper,workdir=work mnt
./rorw-map mnt/foo
umount mnt
 - 8< - - - - - 8< - - - - - 8< - - - - - 8< - - - -

No output means success, "rorw-map: bad mmap data" means failure.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlay_util.c            | 19 +++++++++++++++++++
 include/linux/fs.h           |  3 +++
 include/linux/overlay_util.h |  2 ++
 mm/mmap.c                    |  3 +++
 4 files changed, 27 insertions(+)

diff --git a/fs/overlay_util.c b/fs/overlay_util.c
index 0daff19bad0b..063f8c5719d1 100644
--- a/fs/overlay_util.c
+++ b/fs/overlay_util.c
@@ -10,6 +10,7 @@
 #include <linux/overlay_util.h>
 #include <linux/fs.h>
 #include <linux/file.h>
+#include <linux/mm.h>
 #include "internal.h"
 
 static bool overlay_file_consistent(struct file *file)
@@ -36,4 +37,22 @@ ssize_t overlay_read_iter(struct file *file, struct kiocb *kio,
 }
 EXPORT_SYMBOL(overlay_read_iter);
 
+int overlay_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (unlikely(!overlay_file_consistent(file))) {
+		file = filp_clone_open(file);
+		if (IS_ERR(file))
+			return PTR_ERR(file);
+
+		fput(vma->vm_file);
+		/* transfer ref: */
+		vma->vm_file = file;
+
+		if (!file->f_op->mmap)
+			return -ENODEV;
+	}
+	return file->f_op->mmap(file, vma);
+}
+EXPORT_SYMBOL(overlay_mmap);
+
 #endif /* IS_ENABLED(CONFIG_OVERLAY_FS) */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4728c5178f3f..6e74b726c3ca 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1739,6 +1739,9 @@ static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio,
 
 static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
 {
+	if (unlikely(is_overlay_file(file)))
+		return overlay_mmap(file, vma);
+
 	return file->f_op->mmap(file, vma);
 }
 
diff --git a/include/linux/overlay_util.h b/include/linux/overlay_util.h
index 886be9003bf3..2cd66bc316cc 100644
--- a/include/linux/overlay_util.h
+++ b/include/linux/overlay_util.h
@@ -6,8 +6,10 @@
 struct file;
 struct kiocb;
 struct iov_iter;
+struct vm_area_struct;
 
 extern ssize_t overlay_read_iter(struct file *file, struct kiocb *kio,
 				 struct iov_iter *iter);
+extern int overlay_mmap(struct file *file, struct vm_area_struct *vma);
 
 #endif /* _LINUX_OVERLAY_FS_H */
diff --git a/mm/mmap.c b/mm/mmap.c
index 3714aa4e6f81..c43dfe1846d1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1746,6 +1746,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 	return error;
 }
 
+/*
+ * Overlay needs special handling after copy-up
+ */
 unsigned long unmapped_area(struct vm_unmapped_area_info *info)
 {
 	/*
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 8/9] vfs: use helper for calling f_op->fsync()
  2017-02-17 16:09 [PATCH 0/9] overlay: fix inconsistency of ro file after copy-up Miklos Szeredi
                   ` (6 preceding siblings ...)
  2017-02-17 16:09 ` [PATCH 7/9] ovl: intercept mmap on overlay files Miklos Szeredi
@ 2017-02-17 16:09 ` Miklos Szeredi
  2017-02-17 16:09 ` [PATCH 9/9] vfs: intercept fsync on overlay files Miklos Szeredi
  2017-02-19  9:14 ` [PATCH 0/9] overlay: fix inconsistency of ro file after copy-up Al Viro
  9 siblings, 0 replies; 18+ messages in thread
From: Miklos Szeredi @ 2017-02-17 16:09 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-unionfs, linux-kernel, linux-fsdevel

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/sync.c          | 2 +-
 include/linux/fs.h | 6 ++++++
 ipc/shm.c          | 2 +-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/sync.c b/fs/sync.c
index 2a54c1f22035..11ba023434b1 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -192,7 +192,7 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
 		spin_unlock(&inode->i_lock);
 		mark_inode_dirty_sync(inode);
 	}
-	return file->f_op->fsync(file, start, end, datasync);
+	return call_fsync(file, start, end, datasync);
 }
 EXPORT_SYMBOL(vfs_fsync_range);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6e74b726c3ca..1293005de2e3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1745,6 +1745,12 @@ static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
 	return file->f_op->mmap(file, vma);
 }
 
+static inline int call_fsync(struct file *file, loff_t start, loff_t end,
+			     int datasync)
+{
+	return file->f_op->fsync(file, start, end, datasync);
+}
+
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,
diff --git a/ipc/shm.c b/ipc/shm.c
index 4329fe3ef594..258aff2e03bb 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -452,7 +452,7 @@ static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 
 	if (!sfd->file->f_op->fsync)
 		return -EINVAL;
-	return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
+	return call_fsync(sfd->file, start, end, datasync);
 }
 
 static long shm_fallocate(struct file *file, int mode, loff_t offset,
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 9/9] vfs: intercept fsync on overlay files
  2017-02-17 16:09 [PATCH 0/9] overlay: fix inconsistency of ro file after copy-up Miklos Szeredi
                   ` (7 preceding siblings ...)
  2017-02-17 16:09 ` [PATCH 8/9] vfs: use helper for calling f_op->fsync() Miklos Szeredi
@ 2017-02-17 16:09 ` Miklos Szeredi
  2017-02-19  9:14 ` [PATCH 0/9] overlay: fix inconsistency of ro file after copy-up Al Viro
  9 siblings, 0 replies; 18+ messages in thread
From: Miklos Szeredi @ 2017-02-17 16:09 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-unionfs, linux-kernel, linux-fsdevel

Fsync is really an inode operation (AFAICS) so a doing it on a O_RDONLY
file descriptor should flush any data written through an O_WRONLY file
descriptor for example.

To make this work correctly in case the file is copied up after being
opened for read, intercept the fsync operation, similarly to read_iter and
mmap.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlay_util.c            | 19 +++++++++++++++++++
 include/linux/fs.h           |  3 +++
 include/linux/overlay_util.h |  2 ++
 3 files changed, 24 insertions(+)

diff --git a/fs/overlay_util.c b/fs/overlay_util.c
index 063f8c5719d1..dae9d23b1519 100644
--- a/fs/overlay_util.c
+++ b/fs/overlay_util.c
@@ -37,6 +37,25 @@ ssize_t overlay_read_iter(struct file *file, struct kiocb *kio,
 }
 EXPORT_SYMBOL(overlay_read_iter);
 
+int overlay_fsync(struct file *file, loff_t start, loff_t end,
+		  int datasync)
+{
+	int ret;
+
+	if (likely(overlay_file_consistent(file)))
+		return file->f_op->fsync(file, start, end, datasync);
+
+	file = filp_clone_open(file);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	ret = vfs_fsync_range(file, start, end, datasync);
+	fput(file);
+
+	return ret;
+}
+EXPORT_SYMBOL(overlay_fsync);
+
 int overlay_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	if (unlikely(!overlay_file_consistent(file))) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1293005de2e3..1cfa3b33659c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1748,6 +1748,9 @@ static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
 static inline int call_fsync(struct file *file, loff_t start, loff_t end,
 			     int datasync)
 {
+	if (unlikely(is_overlay_file(file)))
+		return overlay_fsync(file, start, end, datasync);
+
 	return file->f_op->fsync(file, start, end, datasync);
 }
 
diff --git a/include/linux/overlay_util.h b/include/linux/overlay_util.h
index 2cd66bc316cc..06a0dd923404 100644
--- a/include/linux/overlay_util.h
+++ b/include/linux/overlay_util.h
@@ -11,5 +11,7 @@ struct vm_area_struct;
 extern ssize_t overlay_read_iter(struct file *file, struct kiocb *kio,
 				 struct iov_iter *iter);
 extern int overlay_mmap(struct file *file, struct vm_area_struct *vma);
+extern int overlay_fsync(struct file *file, loff_t start, loff_t end,
+			 int datasync);
 
 #endif /* _LINUX_OVERLAY_FS_H */
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/9] vfs: intercept reads to overlay files
  2017-02-17 16:09 ` [PATCH 4/9] vfs: intercept reads to overlay files Miklos Szeredi
@ 2017-02-19  9:05   ` Al Viro
  2017-02-19  9:24     ` Miklos Szeredi
       [not found]   ` <D39694FF47DA2A43B120BF3DF6163E7A10CD2335@DGGEMA504-MBX.china.huawei.com>
  1 sibling, 1 reply; 18+ messages in thread
From: Al Viro @ 2017-02-19  9:05 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-unionfs, linux-kernel, linux-fsdevel

On Fri, Feb 17, 2017 at 05:09:33PM +0100, Miklos Szeredi wrote:
> ...in order to handle the corner case when the file is copyied up after
> being opened read-only.

> --- /dev/null
> +++ b/fs/overlay_util.c
> @@ -0,0 +1,39 @@
> +/*
> + * Copyright (C) 2017 Red Hat, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published by
> + * the Free Software Foundation.
> + */
> +#if IS_ENABLED(CONFIG_OVERLAY_FS)

This is crap - it should be handled in fs/Makefile, not with IS_ENABLED.

> +#include <linux/overlay_util.h>
> +#include <linux/fs.h>
> +#include <linux/file.h>
> +#include "internal.h"
> +
> +static bool overlay_file_consistent(struct file *file)
> +{
> +	return d_real_inode(file->f_path.dentry) == file_inode(file);
> +}
> +
> +ssize_t overlay_read_iter(struct file *file, struct kiocb *kio,
> +			  struct iov_iter *iter)
> +{
> +	ssize_t ret;
> +
> +	if (likely(overlay_file_consistent(file)))
> +		return file->f_op->read_iter(kio, iter);
> +
> +	file = filp_clone_open(file);
> +	if (IS_ERR(file))
> +		return PTR_ERR(file);
> +
> +	ret = vfs_iter_read(file, iter, &kio->ki_pos);
> +	fput(file);

You do realize that a bunch of such calls will breed arseloads of struct file,
right?  Freeing is delayed...

> +static inline bool is_overlay_file(struct file *file)
> +{
> +	return IS_ENABLED(CONFIG_OVERLAY_FS) && file->f_mode & FMODE_OVERLAY;
> +}
> +
>  static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
>  				     struct iov_iter *iter)
>  {
> +	if (unlikely(is_overlay_file(file)))
> +		return overlay_read_iter(file, kio, iter);
> +
>  	return file->f_op->read_iter(kio, iter);
>  }

1) that IS_ENABLED is fairly pointless and it's not obvious that nobody
else will use that flag

2) what that check should include is overlay_file_consistent(), with
no method call in overlay_read_iter().

3) anything that does a plenty of calls of kernel_read() is going to be
very unpleasantly surprised by the effects of that thing.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/9] overlay: fix inconsistency of ro file after copy-up
  2017-02-17 16:09 [PATCH 0/9] overlay: fix inconsistency of ro file after copy-up Miklos Szeredi
                   ` (8 preceding siblings ...)
  2017-02-17 16:09 ` [PATCH 9/9] vfs: intercept fsync on overlay files Miklos Szeredi
@ 2017-02-19  9:14 ` Al Viro
  2017-02-20 15:16   ` Miklos Szeredi
  2017-03-07 16:26   ` Miklos Szeredi
  9 siblings, 2 replies; 18+ messages in thread
From: Al Viro @ 2017-02-19  9:14 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-unionfs, linux-kernel, linux-fsdevel

On Fri, Feb 17, 2017 at 05:09:29PM +0100, Miklos Szeredi wrote:
> A file is opened for read-only, opened read-write (resulting in a copy up)
> and modified.  The data read back from the the read-only fd will be stale
> in this case (the read-only file descriptor still refers to the lower,
> unmodified file).
> 
> This patchset fixes issues related to this corner case.  This is a
> requirement from various parties for accepting overlayfs as a "POSIX"
> filesystem.
> 
> When an operation (read, mmap, fsync) is done on an overlay fd opened
> read-only that is referring to a lower file, check if it has been copied up
> in the mean time.  If so, open the upper file and use that for the operation.
> 
> To make the performance impact minimal for non-overlay case, use a flag in
> file->f_mode to indicate that this is an overlay file.

This is one hell of a DoS vector - it's really easy to eat tons of struct
file that way.  Preparatory parts of that series make sense on their own,
but your "let's allocate a struct file, call ->open() and schedule that
struct file for closing upon the exit to userland on each kernel_read()"
is not.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/9] vfs: intercept reads to overlay files
  2017-02-19  9:05   ` Al Viro
@ 2017-02-19  9:24     ` Miklos Szeredi
  0 siblings, 0 replies; 18+ messages in thread
From: Miklos Szeredi @ 2017-02-19  9:24 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-unionfs, lkml, linux-fsdevel

On Sun, Feb 19, 2017 at 10:05 AM, Al Viro <viro@zeniv.linux.org.uk> wrote:
> On Fri, Feb 17, 2017 at 05:09:33PM +0100, Miklos Szeredi wrote:
>> ...in order to handle the corner case when the file is copyied up after
>> being opened read-only.
>
>> --- /dev/null
>> +++ b/fs/overlay_util.c
>> @@ -0,0 +1,39 @@
>> +/*
>> + * Copyright (C) 2017 Red Hat, Inc.
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms of the GNU General Public License version 2 as published by
>> + * the Free Software Foundation.
>> + */
>> +#if IS_ENABLED(CONFIG_OVERLAY_FS)
>
> This is crap - it should be handled in fs/Makefile, not with IS_ENABLED.

This is needed if overlay is built in or a module.  Couldn't figure
out how makefile could handle that.

>
>> +#include <linux/overlay_util.h>
>> +#include <linux/fs.h>
>> +#include <linux/file.h>
>> +#include "internal.h"
>> +
>> +static bool overlay_file_consistent(struct file *file)
>> +{
>> +     return d_real_inode(file->f_path.dentry) == file_inode(file);
>> +}
>> +
>> +ssize_t overlay_read_iter(struct file *file, struct kiocb *kio,
>> +                       struct iov_iter *iter)
>> +{
>> +     ssize_t ret;
>> +
>> +     if (likely(overlay_file_consistent(file)))
>> +             return file->f_op->read_iter(kio, iter);
>> +
>> +     file = filp_clone_open(file);
>> +     if (IS_ERR(file))
>> +             return PTR_ERR(file);
>> +
>> +     ret = vfs_iter_read(file, iter, &kio->ki_pos);
>> +     fput(file);
>
> You do realize that a bunch of such calls will breed arseloads of struct file,
> right?  Freeing is delayed...

No, I hadn't realized that.  Could we force freeing file here?

>
>> +static inline bool is_overlay_file(struct file *file)
>> +{
>> +     return IS_ENABLED(CONFIG_OVERLAY_FS) && file->f_mode & FMODE_OVERLAY;
>> +}
>> +
>>  static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
>>                                    struct iov_iter *iter)
>>  {
>> +     if (unlikely(is_overlay_file(file)))
>> +             return overlay_read_iter(file, kio, iter);
>> +
>>       return file->f_op->read_iter(kio, iter);
>>  }
>
> 1) that IS_ENABLED is fairly pointless and it's not obvious that nobody
> else will use that flag

It's mean to be a micro-optimization for the CONFIG_OVERLAYFS=n case.

>
> 2) what that check should include is overlay_file_consistent(), with
> no method call in overlay_read_iter().

This is again a micro-optimization for the case when this is not an
overlay file.  Which is the very very likely case.

What's the problem with putting that test in the non-inline function?

>
> 3) anything that does a plenty of calls of kernel_read() is going to be
> very unpleasantly surprised by the effects of that thing.

Why is that?

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/9] vfs: intercept reads to overlay files
       [not found]   ` <D39694FF47DA2A43B120BF3DF6163E7A10CD2335@DGGEMA504-MBX.china.huawei.com>
@ 2017-02-20  7:47       ` zhangyi (F)
  0 siblings, 0 replies; 18+ messages in thread
From: zhangyi (F) @ 2017-02-20  7:47 UTC (permalink / raw)
  To: mszeredi; +Cc: linux-unionfs, linux-kernel, linux-fsdevel, Al Viro, miaoxie

On 2017/2/18 00:10, Miklos Szeredi wrote:
> diff --git a/fs/open.c b/fs/open.c
> index 9921f70bc5ca..4916ccff29f5 100644
> --- a/fs/open.c
> +++ b/fs/open.c
> @@ -762,6 +762,8 @@ static int do_dentry_open(struct file *f,
>  	if ((f->f_mode & FMODE_WRITE) &&
>  	     likely(f->f_op->write || f->f_op->write_iter))
>  		f->f_mode |= FMODE_CAN_WRITE;
> +	if (unlikely(d_inode(f->f_path.dentry) != inode))
> +		f->f_mode |= FMODE_OVERLAY;

Can we just add flag to the "readonly && not copied" file, not all overlayfs files?
Beacuse we just want to check the ro file before copied-up.

>  	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
>  
> diff --git a/fs/overlay_util.c b/fs/overlay_util.c new file mode 100644 index 000000000000..0daff19bad0b
> --- /dev/null
> +++ b/fs/overlay_util.c
> @@ -0,0 +1,39 @@
> +/*
> + * Copyright (C) 2017 Red Hat, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify 
> +it
> + * under the terms of the GNU General Public License version 2 as 
> +published by
> + * the Free Software Foundation.
> + */
> +#if IS_ENABLED(CONFIG_OVERLAY_FS)
> +
> +#include <linux/overlay_util.h>
> +#include <linux/fs.h>
> +#include <linux/file.h>
> +#include "internal.h"
> +
> +static bool overlay_file_consistent(struct file *file) {
> +	return d_real_inode(file->f_path.dentry) == file_inode(file); }
> +
> +ssize_t overlay_read_iter(struct file *file, struct kiocb *kio,
> +			  struct iov_iter *iter)
> +{
> +	ssize_t ret;
> +
> +	if (likely(overlay_file_consistent(file)))
> +		return file->f_op->read_iter(kio, iter);
> +
> +	file = filp_clone_open(file);
> +	if (IS_ERR(file))
> +		return PTR_ERR(file);
> +
> +	ret = vfs_iter_read(file, iter, &kio->ki_pos);
> +	fput(file);
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL(overlay_read_iter);

Can we try to replace the old file with the new file, and then clear the f_mode flag we added?
If so, we can avoid opening file for each reading call and avoid copied file consistency check.

Thanks.

zhangyi

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/9] vfs: intercept reads to overlay files
@ 2017-02-20  7:47       ` zhangyi (F)
  0 siblings, 0 replies; 18+ messages in thread
From: zhangyi (F) @ 2017-02-20  7:47 UTC (permalink / raw)
  To: mszeredi; +Cc: linux-unionfs, linux-kernel, linux-fsdevel, Al Viro, miaoxie

On 2017/2/18 00:10, Miklos Szeredi wrote:
> diff --git a/fs/open.c b/fs/open.c
> index 9921f70bc5ca..4916ccff29f5 100644
> --- a/fs/open.c
> +++ b/fs/open.c
> @@ -762,6 +762,8 @@ static int do_dentry_open(struct file *f,
>  	if ((f->f_mode & FMODE_WRITE) &&
>  	     likely(f->f_op->write || f->f_op->write_iter))
>  		f->f_mode |= FMODE_CAN_WRITE;
> +	if (unlikely(d_inode(f->f_path.dentry) != inode))
> +		f->f_mode |= FMODE_OVERLAY;

Can we just add flag to the "readonly && not copied" file, not all overlayfs files?
Beacuse we just want to check the ro file before copied-up.

>  	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
>  
> diff --git a/fs/overlay_util.c b/fs/overlay_util.c new file mode 100644 index 000000000000..0daff19bad0b
> --- /dev/null
> +++ b/fs/overlay_util.c
> @@ -0,0 +1,39 @@
> +/*
> + * Copyright (C) 2017 Red Hat, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify 
> +it
> + * under the terms of the GNU General Public License version 2 as 
> +published by
> + * the Free Software Foundation.
> + */
> +#if IS_ENABLED(CONFIG_OVERLAY_FS)
> +
> +#include <linux/overlay_util.h>
> +#include <linux/fs.h>
> +#include <linux/file.h>
> +#include "internal.h"
> +
> +static bool overlay_file_consistent(struct file *file) {
> +	return d_real_inode(file->f_path.dentry) == file_inode(file); }
> +
> +ssize_t overlay_read_iter(struct file *file, struct kiocb *kio,
> +			  struct iov_iter *iter)
> +{
> +	ssize_t ret;
> +
> +	if (likely(overlay_file_consistent(file)))
> +		return file->f_op->read_iter(kio, iter);
> +
> +	file = filp_clone_open(file);
> +	if (IS_ERR(file))
> +		return PTR_ERR(file);
> +
> +	ret = vfs_iter_read(file, iter, &kio->ki_pos);
> +	fput(file);
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL(overlay_read_iter);

Can we try to replace the old file with the new file, and then clear the f_mode flag we added?
If so, we can avoid opening file for each reading call and avoid copied file consistency check.

Thanks.

zhangyi

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/9] vfs: intercept reads to overlay files
  2017-02-20  7:47       ` zhangyi (F)
  (?)
@ 2017-02-20  8:52       ` Miklos Szeredi
  -1 siblings, 0 replies; 18+ messages in thread
From: Miklos Szeredi @ 2017-02-20  8:52 UTC (permalink / raw)
  To: zhangyi (F)
  Cc: Miklos Szeredi, linux-unionfs, linux-kernel, linux-fsdevel,
	Al Viro, miaoxie

On Mon, Feb 20, 2017 at 8:47 AM, zhangyi (F) <yi.zhang@huawei.com> wrote:
> On 2017/2/18 00:10, Miklos Szeredi wrote:
>> diff --git a/fs/open.c b/fs/open.c
>> index 9921f70bc5ca..4916ccff29f5 100644
>> --- a/fs/open.c
>> +++ b/fs/open.c
>> @@ -762,6 +762,8 @@ static int do_dentry_open(struct file *f,
>>       if ((f->f_mode & FMODE_WRITE) &&
>>            likely(f->f_op->write || f->f_op->write_iter))
>>               f->f_mode |= FMODE_CAN_WRITE;
>> +     if (unlikely(d_inode(f->f_path.dentry) != inode))
>> +             f->f_mode |= FMODE_OVERLAY;
>
> Can we just add flag to the "readonly && not copied" file, not all overlayfs files?
> Beacuse we just want to check the ro file before copied-up.

Can't do it without adding new infrastructure.  Likely not worth it.

>
>>       f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
>>
>> diff --git a/fs/overlay_util.c b/fs/overlay_util.c new file mode 100644 index 000000000000..0daff19bad0b
>> --- /dev/null
>> +++ b/fs/overlay_util.c
>> @@ -0,0 +1,39 @@
>> +/*
>> + * Copyright (C) 2017 Red Hat, Inc.
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> +it
>> + * under the terms of the GNU General Public License version 2 as
>> +published by
>> + * the Free Software Foundation.
>> + */
>> +#if IS_ENABLED(CONFIG_OVERLAY_FS)
>> +
>> +#include <linux/overlay_util.h>
>> +#include <linux/fs.h>
>> +#include <linux/file.h>
>> +#include "internal.h"
>> +
>> +static bool overlay_file_consistent(struct file *file) {
>> +     return d_real_inode(file->f_path.dentry) == file_inode(file); }
>> +
>> +ssize_t overlay_read_iter(struct file *file, struct kiocb *kio,
>> +                       struct iov_iter *iter)
>> +{
>> +     ssize_t ret;
>> +
>> +     if (likely(overlay_file_consistent(file)))
>> +             return file->f_op->read_iter(kio, iter);
>> +
>> +     file = filp_clone_open(file);
>> +     if (IS_ERR(file))
>> +             return PTR_ERR(file);
>> +
>> +     ret = vfs_iter_read(file, iter, &kio->ki_pos);
>> +     fput(file);
>> +
>> +     return ret;
>> +}
>> +EXPORT_SYMBOL(overlay_read_iter);
>
> Can we try to replace the old file with the new file, and then clear the f_mode flag we added?
> If so, we can avoid opening file for each reading call and avoid copied file consistency check.

Could probably try replacing file in fd table.  But no point in doing
so without having an actual real life use case that would benefit from
that.  AFAIK there's no such thing.

And there are other pointers to file that can't be replaced, so this
fallback would need to be kept around.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/9] overlay: fix inconsistency of ro file after copy-up
  2017-02-19  9:14 ` [PATCH 0/9] overlay: fix inconsistency of ro file after copy-up Al Viro
@ 2017-02-20 15:16   ` Miklos Szeredi
  2017-03-07 16:26   ` Miklos Szeredi
  1 sibling, 0 replies; 18+ messages in thread
From: Miklos Szeredi @ 2017-02-20 15:16 UTC (permalink / raw)
  To: Al Viro
  Cc: Miklos Szeredi, linux-unionfs, linux-kernel, linux-fsdevel,
	David Howells, J. Bruce Fields, Jeff Layton

On Sun, Feb 19, 2017 at 10:14 AM, Al Viro <viro@zeniv.linux.org.uk> wrote:

> This is one hell of a DoS vector - it's really easy to eat tons of struct
> file that way.  Preparatory parts of that series make sense on their own,
> but your "let's allocate a struct file, call ->open() and schedule that
> struct file for closing upon the exit to userland on each kernel_read()"
> is not.

Found a couple of instances of this pattern (haven't looked very hard,
possibly there's more):

nfsd_read()
cachefiles_write_page()

How come this hasn't been a problem for them?

Would flush_delayed_fput() work here?  I couldn't really find what the
locking issues with synchronous fputs were.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/9] overlay: fix inconsistency of ro file after copy-up
  2017-02-19  9:14 ` [PATCH 0/9] overlay: fix inconsistency of ro file after copy-up Al Viro
  2017-02-20 15:16   ` Miklos Szeredi
@ 2017-03-07 16:26   ` Miklos Szeredi
  1 sibling, 0 replies; 18+ messages in thread
From: Miklos Szeredi @ 2017-03-07 16:26 UTC (permalink / raw)
  To: Al Viro; +Cc: Miklos Szeredi, linux-unionfs, linux-kernel, linux-fsdevel

On Sun, Feb 19, 2017 at 09:14:41AM +0000, Al Viro wrote:
> On Fri, Feb 17, 2017 at 05:09:29PM +0100, Miklos Szeredi wrote:
> > A file is opened for read-only, opened read-write (resulting in a copy up)
> > and modified.  The data read back from the the read-only fd will be stale
> > in this case (the read-only file descriptor still refers to the lower,
> > unmodified file).
> > 
> > This patchset fixes issues related to this corner case.  This is a
> > requirement from various parties for accepting overlayfs as a "POSIX"
> > filesystem.
> > 
> > When an operation (read, mmap, fsync) is done on an overlay fd opened
> > read-only that is referring to a lower file, check if it has been copied up
> > in the mean time.  If so, open the upper file and use that for the operation.
> > 
> > To make the performance impact minimal for non-overlay case, use a flag in
> > file->f_mode to indicate that this is an overlay file.
> 
> This is one hell of a DoS vector - it's really easy to eat tons of struct
> file that way.  Preparatory parts of that series make sense on their own,
> but your "let's allocate a struct file, call ->open() and schedule that
> struct file for closing upon the exit to userland on each kernel_read()"
> is not.

How about this?  Do you see a problem with calling __fput() synchronously here?

Thanks,
Miklos

---
 fs/Makefile                  |    2 -
 fs/file_table.c              |    2 -
 fs/internal.h                |    1 
 fs/overlay_util.c            |   53 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h           |   11 ++++++++
 include/linux/overlay_util.h |   13 ++++++++++
 6 files changed, 80 insertions(+), 2 deletions(-)

--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -31,6 +31,7 @@
 #include <linux/workqueue.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/delayed_call.h>
+#include <linux/overlay_util.h>
 
 #include <asm/byteorder.h>
 #include <uapi/linux/fs.h>
@@ -1721,9 +1722,19 @@ struct inode_operations {
 	int (*set_acl)(struct inode *, struct posix_acl *, int);
 } ____cacheline_aligned;
 
+
+static inline bool overlay_file_inconsistent(struct file *file)
+{
+	return unlikely(file->f_path.dentry->d_flags & DCACHE_OP_REAL) &&
+		unlikely(d_real_inode(file->f_path.dentry) != file_inode(file));
+}
+
 static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
 				     struct iov_iter *iter)
 {
+	if (overlay_file_inconsistent(file))
+		return overlay_read_iter(file, kio, iter);
+
 	return file->f_op->read_iter(kio, iter);
 }
 
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y :=	open.o read_write.o file_table.
 		attr.o bad_inode.o file.o filesystems.o namespace.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o \
 		pnode.o splice.o sync.o utimes.o \
-		stack.o fs_struct.o statfs.o fs_pin.o nsfs.o
+		stack.o fs_struct.o statfs.o fs_pin.o nsfs.o overlay_util.o
 
 ifeq ($(CONFIG_BLOCK),y)
 obj-y +=	buffer.o block_dev.o direct-io.o mpage.o
--- /dev/null
+++ b/fs/overlay_util.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/overlay_util.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include "internal.h"
+
+static struct file *overlay_clone_file(struct file *file)
+{
+	file = filp_clone_open(file);
+	if (!IS_ERR(file))
+		file->f_mode |= FMODE_NONOTIFY;
+
+	return file;
+}
+
+/*
+ * Do the release synchronously.  Otherwise we'd have a DoS problem when doing
+ * multiple reads (e.g. through kernel_read()) and only releasing the cloned
+ * files when returning to userspace.
+ *
+ * There's no risk of final dput or final mntput happening, since caller holds
+ * ref to both through the original file.
+ */
+static void overlay_put_cloned_file(struct file *file)
+{
+	if (WARN_ON(!atomic_long_dec_and_test(&file->f_count)))
+		return;
+
+	__fput(file);
+}
+
+ssize_t overlay_read_iter(struct file *file, struct kiocb *kio,
+			  struct iov_iter *iter)
+{
+	ssize_t ret;
+
+	file = overlay_clone_file(file);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	ret = vfs_iter_read(file, iter, &kio->ki_pos);
+	overlay_put_cloned_file(file);
+
+	return ret;
+}
+EXPORT_SYMBOL(overlay_read_iter);
--- /dev/null
+++ b/include/linux/overlay_util.h
@@ -0,0 +1,13 @@
+#ifndef _LINUX_OVERLAY_FS_H
+#define _LINUX_OVERLAY_FS_H
+
+#include <linux/types.h>
+
+struct file;
+struct kiocb;
+struct iov_iter;
+
+extern ssize_t overlay_read_iter(struct file *file, struct kiocb *kio,
+				 struct iov_iter *iter);
+
+#endif /* _LINUX_OVERLAY_FS_H */
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -184,7 +184,7 @@ EXPORT_SYMBOL(alloc_file);
 
 /* the real guts of fput() - releasing the last reference to file
  */
-static void __fput(struct file *file)
+void __fput(struct file *file)
 {
 	struct dentry *dentry = file->f_path.dentry;
 	struct vfsmount *mnt = file->f_path.mnt;
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -83,6 +83,7 @@ extern void chroot_fs_refs(const struct
  * file_table.c
  */
 extern struct file *get_empty_filp(void);
+extern void __fput(struct file *);
 
 /*
  * super.c

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2017-03-07 16:26 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-17 16:09 [PATCH 0/9] overlay: fix inconsistency of ro file after copy-up Miklos Szeredi
2017-02-17 16:09 ` [PATCH 1/9] vfs: extract common parts of {compat_,}do_readv_writev() Miklos Szeredi
2017-02-17 16:09 ` [PATCH 2/9] vfs: pass type instead of fn to do_{loop,iter}_readv_writev() Miklos Szeredi
2017-02-17 16:09 ` [PATCH 3/9] vfs: use helpers for calling f_op->{read,write}_iter() Miklos Szeredi
2017-02-17 16:09 ` [PATCH 4/9] vfs: intercept reads to overlay files Miklos Szeredi
2017-02-19  9:05   ` Al Viro
2017-02-19  9:24     ` Miklos Szeredi
     [not found]   ` <D39694FF47DA2A43B120BF3DF6163E7A10CD2335@DGGEMA504-MBX.china.huawei.com>
2017-02-20  7:47     ` zhangyi (F)
2017-02-20  7:47       ` zhangyi (F)
2017-02-20  8:52       ` Miklos Szeredi
2017-02-17 16:09 ` [PATCH 5/9] mm: ovl: copy-up on MAP_SHARED Miklos Szeredi
2017-02-17 16:09 ` [PATCH 6/9] mm: use helper for calling f_op->mmap() Miklos Szeredi
2017-02-17 16:09 ` [PATCH 7/9] ovl: intercept mmap on overlay files Miklos Szeredi
2017-02-17 16:09 ` [PATCH 8/9] vfs: use helper for calling f_op->fsync() Miklos Szeredi
2017-02-17 16:09 ` [PATCH 9/9] vfs: intercept fsync on overlay files Miklos Szeredi
2017-02-19  9:14 ` [PATCH 0/9] overlay: fix inconsistency of ro file after copy-up Al Viro
2017-02-20 15:16   ` Miklos Szeredi
2017-03-07 16:26   ` Miklos Szeredi

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.