linux-unionfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH] ovl: copy-up on MAP_SHARED
@ 2020-02-10  3:10 Chengguang Xu
  2020-02-13 19:12 ` Miklos Szeredi
  0 siblings, 1 reply; 4+ messages in thread
From: Chengguang Xu @ 2020-02-10  3:10 UTC (permalink / raw)
  To: miklos; +Cc: linux-unionfs, Chengguang Xu

When a read-only file is mapped shared, copy up the file before
actually doing the map so that we can keep data consistency in
O_RDONLY/O_WRONLY combination of shared mapping.

Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
---
 fs/overlayfs/Kconfig     | 21 ++++++++++++++++
 fs/overlayfs/file.c      | 54 ++++++++++++++++++++++++++++++++++++++++
 fs/overlayfs/overlayfs.h |  6 +++++
 fs/overlayfs/ovl_entry.h |  1 +
 fs/overlayfs/super.c     | 22 ++++++++++++++++
 5 files changed, 104 insertions(+)

diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 444e2da4f60e..e9ce3010d5c7 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -123,3 +123,24 @@ config OVERLAY_FS_METACOPY
 	  that doesn't support this feature will have unexpected results.
 
 	  If unsure, say N.
+
+config OVERLAY_FS_COPY_UP_SHARED
+	bool "Overlayfs: copy up when mapping a file shared"
+	default n
+	depends on OVERLAY_FS
+	help
+	  If this option is enabled then on mapping a file with MAP_SHARED
+	  overlayfs copies up the file in anticipation of it being modified (just
+	  like we copy up the file on O_WRONLY and O_RDWR in anticipation of
+	  modification).  This does not interfere with shared library loading, as
+	  that uses MAP_PRIVATE.  But there might be use cases out there where
+	  this impacts performance and disk usage.
+
+	  This just selects the default, the feature can also be enabled or
+	  disabled in the running kernel or individually on each overlay mount.
+
+	  To get maximally standard compliant behavior, enable this option.
+
+	  To get a maximally backward compatible kernel, disable this option.
+
+	  If unsure, say N.
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index a5317216de73..69d4636d79ad 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -12,6 +12,7 @@
 #include <linux/splice.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
+#include <linux/mman.h>
 #include "overlayfs.h"
 
 struct ovl_aio_req {
@@ -429,12 +430,65 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	return ret;
 }
 
+struct ovl_copy_up_work {
+	struct work_struct work;
+	struct dentry *dentry;
+	unsigned long flags;
+	int err;
+};
+
+enum ovl_copy_up_work_flag {
+	OVL_COPY_UP_PENDING,
+};
+
+static void ovl_copy_up_work_fn(struct work_struct *work)
+{
+	struct ovl_copy_up_work *ovl_cuw;
+
+	ovl_cuw = container_of(work, struct ovl_copy_up_work, work);
+	ovl_cuw->err = ovl_copy_up(ovl_cuw->dentry);
+
+	clear_bit(OVL_COPY_UP_PENDING, &ovl_cuw->flags);
+	wake_up_bit(&ovl_cuw->flags, OVL_COPY_UP_PENDING);
+}
+
 static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct file *realfile = file->private_data;
 	const struct cred *old_cred;
+	struct inode *inode = file->f_inode;
+	struct ovl_copy_up_work ovl_cuw;
+	DEFINE_WAIT_BIT(wait, &ovl_cuw.flags, OVL_COPY_UP_PENDING);
+	wait_queue_head_t *wqh;
 	int ret;
 
+	if (vma->vm_flags & MAP_SHARED &&
+			ovl_copy_up_shared(file_inode(file)->i_sb)) {
+		ovl_cuw.err = 0;
+		ovl_cuw.flags = 0;
+		ovl_cuw.dentry = file_dentry(file);
+		set_bit(OVL_COPY_UP_PENDING, &ovl_cuw.flags);
+
+		wqh = bit_waitqueue(&ovl_cuw.flags, OVL_COPY_UP_PENDING);
+		prepare_to_wait(wqh, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
+
+		INIT_WORK(&ovl_cuw.work, ovl_copy_up_work_fn);
+		schedule_work(&ovl_cuw.work);
+
+		schedule();
+		finish_wait(wqh, &wait.wq_entry);
+
+		if (ovl_cuw.err)
+			return ovl_cuw.err;
+
+		realfile = ovl_open_realfile(file, ovl_inode_realdata(inode));
+		if (IS_ERR(realfile))
+			return PTR_ERR(realfile);
+
+		ovl_release(inode, file);
+		file->private_data = realfile;
+	}
+
 	if (!realfile->f_op->mmap)
 		return -ENODEV;
 
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 3623d28aa4fa..28853c18d59c 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -328,6 +328,12 @@ static inline void ovl_inode_unlock(struct inode *inode)
 	mutex_unlock(&OVL_I(inode)->lock);
 }
 
+static inline bool ovl_copy_up_shared(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	return !(sb->s_flags & SB_RDONLY) && ofs->config.copy_up_shared;
+}
 
 /* namei.c */
 int ovl_check_fb_len(struct ovl_fb *fb, int fb_len);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 89015ea822e7..6007cafd2ac7 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -17,6 +17,7 @@ struct ovl_config {
 	bool nfs_export;
 	int xino;
 	bool metacopy;
+	bool copy_up_shared;
 };
 
 struct ovl_sb {
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 319fe0d355b0..35ed1aef3266 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -53,6 +53,12 @@ module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
 MODULE_PARM_DESC(xino_auto,
 		 "Auto enable xino feature");
 
+static bool ovl_copy_up_shared_def =
+	IS_ENABLED(CONFIG_OVERLAY_FS_COPY_UP_SHARED);
+module_param_named(copy_up_shared, ovl_copy_up_shared_def, bool, 0644);
+MODULE_PARM_DESC(copy_up_shared,
+		 "Copy up when mapping a file shared");
+
 static void ovl_entry_stack_free(struct ovl_entry *oe)
 {
 	unsigned int i;
@@ -363,6 +369,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
 	if (ofs->config.metacopy != ovl_metacopy_def)
 		seq_printf(m, ",metacopy=%s",
 			   ofs->config.metacopy ? "on" : "off");
+	if (ofs->config.copy_up_shared != ovl_copy_up_shared_def)
+		seq_printf(m, ",copy_up_shared=%s",
+				ofs->config.copy_up_shared ? "on" : "off");
 	return 0;
 }
 
@@ -403,6 +412,8 @@ enum {
 	OPT_XINO_AUTO,
 	OPT_METACOPY_ON,
 	OPT_METACOPY_OFF,
+	OPT_COPY_UP_SHARED_ON,
+	OPT_COPY_UP_SHARED_OFF,
 	OPT_ERR,
 };
 
@@ -421,6 +432,8 @@ static const match_table_t ovl_tokens = {
 	{OPT_XINO_AUTO,			"xino=auto"},
 	{OPT_METACOPY_ON,		"metacopy=on"},
 	{OPT_METACOPY_OFF,		"metacopy=off"},
+	{OPT_COPY_UP_SHARED_ON,		"copy_up_shared=on"},
+	{OPT_COPY_UP_SHARED_OFF,	"copy_up_shared=off"},
 	{OPT_ERR,			NULL}
 };
 
@@ -559,6 +572,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 			config->metacopy = false;
 			break;
 
+		case OPT_COPY_UP_SHARED_ON:
+			config->copy_up_shared = true;
+			break;
+
+		case OPT_COPY_UP_SHARED_OFF:
+			config->copy_up_shared = false;
+			break;
+
 		default:
 			pr_err("unrecognized mount option \"%s\" or missing value\n",
 					p);
@@ -1609,6 +1630,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	ofs->config.nfs_export = ovl_nfs_export_def;
 	ofs->config.xino = ovl_xino_def();
 	ofs->config.metacopy = ovl_metacopy_def;
+	ofs->config.copy_up_shared = ovl_copy_up_shared_def;
 	err = ovl_parse_opt((char *) data, &ofs->config);
 	if (err)
 		goto out_err;
-- 
2.21.1




^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [RFC PATCH] ovl: copy-up on MAP_SHARED
  2020-02-10  3:10 [RFC PATCH] ovl: copy-up on MAP_SHARED Chengguang Xu
@ 2020-02-13 19:12 ` Miklos Szeredi
  2020-02-13 21:28   ` Amir Goldstein
  0 siblings, 1 reply; 4+ messages in thread
From: Miklos Szeredi @ 2020-02-13 19:12 UTC (permalink / raw)
  To: Chengguang Xu; +Cc: overlayfs, Amir Goldstein

On Mon, Feb 10, 2020 at 4:11 AM Chengguang Xu <cgxu519@mykernel.net> wrote:

>  static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
>  {
>         struct file *realfile = file->private_data;
>         const struct cred *old_cred;
> +       struct inode *inode = file->f_inode;
> +       struct ovl_copy_up_work ovl_cuw;
> +       DEFINE_WAIT_BIT(wait, &ovl_cuw.flags, OVL_COPY_UP_PENDING);
> +       wait_queue_head_t *wqh;
>         int ret;
>
> +       if (vma->vm_flags & MAP_SHARED &&
> +                       ovl_copy_up_shared(file_inode(file)->i_sb)) {
> +               ovl_cuw.err = 0;
> +               ovl_cuw.flags = 0;
> +               ovl_cuw.dentry = file_dentry(file);
> +               set_bit(OVL_COPY_UP_PENDING, &ovl_cuw.flags);
> +
> +               wqh = bit_waitqueue(&ovl_cuw.flags, OVL_COPY_UP_PENDING);
> +               prepare_to_wait(wqh, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
> +
> +               INIT_WORK(&ovl_cuw.work, ovl_copy_up_work_fn);
> +               schedule_work(&ovl_cuw.work);
> +
> +               schedule();
> +               finish_wait(wqh, &wait.wq_entry);

This just hides the bad lock dependency, it does not remove it.

The solution we've come up with is arguably more complex, but it does
fix this properly:  make overlay use its own address space operations
in case of a shared map.

Amir, I lost track, do you remember what's the status of this work?

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC PATCH] ovl: copy-up on MAP_SHARED
  2020-02-13 19:12 ` Miklos Szeredi
@ 2020-02-13 21:28   ` Amir Goldstein
  2020-02-14 13:50     ` Chengguang Xu
  0 siblings, 1 reply; 4+ messages in thread
From: Amir Goldstein @ 2020-02-13 21:28 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: Chengguang Xu, overlayfs

On Thu, Feb 13, 2020 at 9:12 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
>
> On Mon, Feb 10, 2020 at 4:11 AM Chengguang Xu <cgxu519@mykernel.net> wrote:
>
> >  static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
> >  {
> >         struct file *realfile = file->private_data;
> >         const struct cred *old_cred;
> > +       struct inode *inode = file->f_inode;
> > +       struct ovl_copy_up_work ovl_cuw;
> > +       DEFINE_WAIT_BIT(wait, &ovl_cuw.flags, OVL_COPY_UP_PENDING);
> > +       wait_queue_head_t *wqh;
> >         int ret;
> >
> > +       if (vma->vm_flags & MAP_SHARED &&
> > +                       ovl_copy_up_shared(file_inode(file)->i_sb)) {
> > +               ovl_cuw.err = 0;
> > +               ovl_cuw.flags = 0;
> > +               ovl_cuw.dentry = file_dentry(file);
> > +               set_bit(OVL_COPY_UP_PENDING, &ovl_cuw.flags);
> > +
> > +               wqh = bit_waitqueue(&ovl_cuw.flags, OVL_COPY_UP_PENDING);
> > +               prepare_to_wait(wqh, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
> > +
> > +               INIT_WORK(&ovl_cuw.work, ovl_copy_up_work_fn);
> > +               schedule_work(&ovl_cuw.work);
> > +
> > +               schedule();
> > +               finish_wait(wqh, &wait.wq_entry);
>
> This just hides the bad lock dependency, it does not remove it.
>
> The solution we've come up with is arguably more complex, but it does
> fix this properly:  make overlay use its own address space operations
> in case of a shared map.
>
> Amir, I lost track, do you remember what's the status of this work?
>

I'm afraid it is still standing at the side of the road where we left it...
I haven't had any time to work on it since.

The latest WIP branch is at:
https://github.com/amir73il/linux/commits/ovl-aops-wip

And summary of what it contains is at:
https://lore.kernel.org/linux-unionfs/CAJfpegsyA4SjmtAEpkMoKsvgmW0CiEwWEAbU7v3yJztLKmC0Eg@mail.gmail.com/

Problem is, this WIP doesn't even solve the MAP_SHARED case yet,
but it is a big step in the direction of the design you laid out here:
https://lore.kernel.org/linux-unionfs/CAJfpegvJU32_9_mVh7kem0s529-8Qs02fPSr4ChCC3ZJ2pRhLw@mail.gmail.com/

Chengguang,

If you are up for the task, feel free to pick up the WIP branch
and bring it into shape for merging.
Then we can also discuss the next steps for fixing MAP_SHARED.

BTW, you did not mention why MAP_SHARED case is important
in your workload. I'm just curious how important is it to solve it.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC PATCH] ovl: copy-up on MAP_SHARED
  2020-02-13 21:28   ` Amir Goldstein
@ 2020-02-14 13:50     ` Chengguang Xu
  0 siblings, 0 replies; 4+ messages in thread
From: Chengguang Xu @ 2020-02-14 13:50 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Miklos Szeredi, overlayfs

 ---- 在 星期五, 2020-02-14 05:28:10 Amir Goldstein <amir73il@gmail.com> 撰写 ----
 > On Thu, Feb 13, 2020 at 9:12 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
 > >
 > > On Mon, Feb 10, 2020 at 4:11 AM Chengguang Xu <cgxu519@mykernel.net> wrote:
 > >
 > > >  static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
 > > >  {
 > > >         struct file *realfile = file->private_data;
 > > >         const struct cred *old_cred;
 > > > +       struct inode *inode = file->f_inode;
 > > > +       struct ovl_copy_up_work ovl_cuw;
 > > > +       DEFINE_WAIT_BIT(wait, &ovl_cuw.flags, OVL_COPY_UP_PENDING);
 > > > +       wait_queue_head_t *wqh;
 > > >         int ret;
 > > >
 > > > +       if (vma->vm_flags & MAP_SHARED &&
 > > > +                       ovl_copy_up_shared(file_inode(file)->i_sb)) {
 > > > +               ovl_cuw.err = 0;
 > > > +               ovl_cuw.flags = 0;
 > > > +               ovl_cuw.dentry = file_dentry(file);
 > > > +               set_bit(OVL_COPY_UP_PENDING, &ovl_cuw.flags);
 > > > +
 > > > +               wqh = bit_waitqueue(&ovl_cuw.flags, OVL_COPY_UP_PENDING);
 > > > +               prepare_to_wait(wqh, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
 > > > +
 > > > +               INIT_WORK(&ovl_cuw.work, ovl_copy_up_work_fn);
 > > > +               schedule_work(&ovl_cuw.work);
 > > > +
 > > > +               schedule();
 > > > +               finish_wait(wqh, &wait.wq_entry);
 > >
 > > This just hides the bad lock dependency, it does not remove it.
 > >
 > > The solution we've come up with is arguably more complex, but it does
 > > fix this properly:  make overlay use its own address space operations
 > > in case of a shared map.
 > >
 > > Amir, I lost track, do you remember what's the status of this work?
 > >
 > 
 > I'm afraid it is still standing at the side of the road where we left it...
 > I haven't had any time to work on it since.
 > 
 > The latest WIP branch is at:
 > https://github.com/amir73il/linux/commits/ovl-aops-wip
 > 
 > And summary of what it contains is at:
 > https://lore.kernel.org/linux-unionfs/CAJfpegsyA4SjmtAEpkMoKsvgmW0CiEwWEAbU7v3yJztLKmC0Eg@mail.gmail.com/
 > 
 > Problem is, this WIP doesn't even solve the MAP_SHARED case yet,
 > but it is a big step in the direction of the design you laid out here:
 > https://lore.kernel.org/linux-unionfs/CAJfpegvJU32_9_mVh7kem0s529-8Qs02fPSr4ChCC3ZJ2pRhLw@mail.gmail.com/
 > 
 > Chengguang,
 > 
 > If you are up for the task, feel free to pick up the WIP branch
 > and bring it into shape for merging.
 > Then we can also discuss the next steps for fixing MAP_SHARED.

Thanks for the detailed information, I'll check your branch carefully later.

 
 > 
 > BTW, you did not mention why MAP_SHARED case is important
 > in your workload. I'm just curious how important is it to solve it.

I haven't received any complaint about MAP_SHARED problem yet,  
so it seems not important as performance/space saving in our workload.
However, if we implement overlayfs' own address space, I think we can
do further improvement for copy-up based on it. (like delay/partial copy-up)


Thanks,
Chengguang








^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-02-14 13:50 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-10  3:10 [RFC PATCH] ovl: copy-up on MAP_SHARED Chengguang Xu
2020-02-13 19:12 ` Miklos Szeredi
2020-02-13 21:28   ` Amir Goldstein
2020-02-14 13:50     ` Chengguang Xu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).