All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] ovl: whiteout inode sharing
@ 2020-04-14  9:53 Chengguang Xu
  2020-04-14 13:44 ` Amir Goldstein
  0 siblings, 1 reply; 11+ messages in thread
From: Chengguang Xu @ 2020-04-14  9:53 UTC (permalink / raw)
  To: miklos, amir73il; +Cc: linux-unionfs, Chengguang Xu

Sharing inode with different whiteout files for saving
inode and speeding up deleting operation.

Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
---
v1->v2:
- Address Amir's comments in v1

v2->v3:
- Address Amir's comments in v2
- Rebase on Amir's "Overlayfs use index dir as work dir" patch set
- Keep at most one whiteout tmpfile in work dir

 fs/overlayfs/dir.c       | 35 ++++++++++++++++++++++++++++-------
 fs/overlayfs/overlayfs.h |  9 +++++++--
 fs/overlayfs/ovl_entry.h |  4 ++++
 fs/overlayfs/readdir.c   |  3 ++-
 fs/overlayfs/super.c     |  9 +++++++++
 fs/overlayfs/util.c      |  3 ++-
 6 files changed, 52 insertions(+), 11 deletions(-)

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 279009dee366..dbe5e54dcb16 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -62,35 +62,55 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir)
 }
 
 /* caller holds i_mutex on workdir */
-static struct dentry *ovl_whiteout(struct dentry *workdir)
+static struct dentry *ovl_whiteout(struct ovl_fs *ofs, struct dentry *workdir)
 {
 	int err;
+	bool retried = false;
+	bool should_link = (ofs->whiteout_link_max > 1);
 	struct dentry *whiteout;
 	struct inode *wdir = workdir->d_inode;
 
+retry:
 	whiteout = ovl_lookup_temp(workdir);
 	if (IS_ERR(whiteout))
 		return whiteout;
 
+	if (should_link && ofs->whiteout) {
+		err = ovl_do_link(ofs->whiteout, wdir, whiteout);
+		if (err || !ovl_whiteout_linkable(ofs)) {
+			ovl_cleanup(wdir, ofs->whiteout);
+			dput(ofs->whiteout);
+			ofs->whiteout = NULL;
+		}
+
+		if (!err)
+			return whiteout;
+	}
+
 	err = ovl_do_whiteout(wdir, whiteout);
 	if (err) {
 		dput(whiteout);
-		whiteout = ERR_PTR(err);
+		return ERR_PTR(err);
 	}
 
-	return whiteout;
+	if (!should_link || retried)
+		return whiteout;
+
+	ofs->whiteout = whiteout;
+	retried = true;
+	goto retry;
 }
 
 /* Caller must hold i_mutex on both workdir and dir */
-int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
-			     struct dentry *dentry)
+int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *workdir,
+			     struct inode *dir, struct dentry *dentry)
 {
 	struct inode *wdir = workdir->d_inode;
 	struct dentry *whiteout;
 	int err;
 	int flags = 0;
 
-	whiteout = ovl_whiteout(workdir);
+	whiteout = ovl_whiteout(ofs, workdir);
 	err = PTR_ERR(whiteout);
 	if (IS_ERR(whiteout))
 		return err;
@@ -715,6 +735,7 @@ static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
 static int ovl_remove_and_whiteout(struct dentry *dentry,
 				   struct list_head *list)
 {
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 	struct dentry *workdir = ovl_workdir(dentry);
 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
 	struct dentry *upper;
@@ -748,7 +769,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
 		goto out_dput_upper;
 	}
 
-	err = ovl_cleanup_and_whiteout(workdir, d_inode(upperdir), upper);
+	err = ovl_cleanup_and_whiteout(ofs, workdir, d_inode(upperdir), upper);
 	if (err)
 		goto out_d_drop;
 
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index e00b1ff6dea9..3d7e0e342dae 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -225,6 +225,11 @@ static inline bool ovl_open_flags_need_copy_up(int flags)
 	return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC));
 }
 
+static inline bool ovl_whiteout_linkable(struct ovl_fs *ofs)
+{
+	return ofs->whiteout->d_inode->i_nlink <= ofs->whiteout_link_max;
+}
+
 /* util.c */
 int ovl_want_write(struct dentry *dentry);
 void ovl_drop_write(struct dentry *dentry);
@@ -455,8 +460,8 @@ static inline void ovl_copyflags(struct inode *from, struct inode *to)
 
 /* dir.c */
 extern const struct inode_operations ovl_dir_inode_operations;
-int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
-			     struct dentry *dentry);
+int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *workdir,
+			     struct inode *dir, struct dentry *dentry);
 struct ovl_cattr {
 	dev_t rdev;
 	umode_t mode;
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 5762d802fe01..de5f230b6e6b 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -77,6 +77,10 @@ struct ovl_fs {
 	int xino_mode;
 	/* For allocation of non-persistent inode numbers */
 	atomic_long_t last_ino;
+	/* Whiteout dentry cache */
+	struct dentry *whiteout;
+	/* Whiteout max link count */
+	unsigned int whiteout_link_max;
 };
 
 static inline struct ovl_fs *OVL_FS(struct super_block *sb)
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index d6b601caf122..eb4683e58cff 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -1147,7 +1147,8 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
 			 * Whiteout orphan index to block future open by
 			 * handle after overlay nlink dropped to zero.
 			 */
-			err = ovl_cleanup_and_whiteout(indexdir, dir, index);
+			err = ovl_cleanup_and_whiteout(ofs, indexdir, dir,
+						       index);
 		} else {
 			/* Cleanup orphan index entries */
 			err = ovl_cleanup(dir, index);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index b91b23a0366c..b6f2393ec111 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -26,6 +26,10 @@ struct ovl_dir_cache;
 
 #define OVL_MAX_STACK 500
 
+static unsigned int ovl_whiteout_link_max_def = 60000;
+module_param_named(whiteout_link_max, ovl_whiteout_link_max_def, uint, 0644);
+MODULE_PARM_DESC(whiteout_link_max, "Maximum count of whiteout file link");
+
 static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
 module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
 MODULE_PARM_DESC(redirect_dir,
@@ -219,6 +223,7 @@ static void ovl_free_fs(struct ovl_fs *ofs)
 	iput(ofs->upperdir_trap);
 	dput(ofs->indexdir);
 	dput(ofs->workdir);
+	dput(ofs->whiteout);
 	if (ofs->workdir_locked)
 		ovl_inuse_unlock(ofs->workbasedir);
 	dput(ofs->workbasedir);
@@ -1762,6 +1767,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 
 		if (!ofs->workdir)
 			sb->s_flags |= SB_RDONLY;
+		else
+			ofs->whiteout_link_max = min_not_zero(
+					ofs->workdir->d_sb->s_max_links,
+					ovl_whiteout_link_max_def ?: 1);
 
 		sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth;
 		sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 36b60788ee47..18df65ee81a8 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -669,6 +669,7 @@ bool ovl_need_index(struct dentry *dentry)
 /* Caller must hold OVL_I(inode)->lock */
 static void ovl_cleanup_index(struct dentry *dentry)
 {
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 	struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
 	struct inode *dir = indexdir->d_inode;
 	struct dentry *lowerdentry = ovl_dentry_lower(dentry);
@@ -707,7 +708,7 @@ static void ovl_cleanup_index(struct dentry *dentry)
 		index = NULL;
 	} else if (ovl_index_all(dentry->d_sb)) {
 		/* Whiteout orphan index to block future open by handle */
-		err = ovl_cleanup_and_whiteout(indexdir, dir, index);
+		err = ovl_cleanup_and_whiteout(ofs, indexdir, dir, index);
 	} else {
 		/* Cleanup orphan index entries */
 		err = ovl_cleanup(dir, index);
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] ovl: whiteout inode sharing
  2020-04-14  9:53 [PATCH v3] ovl: whiteout inode sharing Chengguang Xu
@ 2020-04-14 13:44 ` Amir Goldstein
  2020-04-15  1:03   ` Chengguang Xu
  0 siblings, 1 reply; 11+ messages in thread
From: Amir Goldstein @ 2020-04-14 13:44 UTC (permalink / raw)
  To: Chengguang Xu; +Cc: Miklos Szeredi, overlayfs

On Tue, Apr 14, 2020 at 12:53 PM Chengguang Xu <cgxu519@mykernel.net> wrote:
>
> Sharing inode with different whiteout files for saving
> inode and speeding up deleting operation.
>
> Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
> ---
> v1->v2:
> - Address Amir's comments in v1
>
> v2->v3:
> - Address Amir's comments in v2
> - Rebase on Amir's "Overlayfs use index dir as work dir" patch set
> - Keep at most one whiteout tmpfile in work dir
>
>  fs/overlayfs/dir.c       | 35 ++++++++++++++++++++++++++++-------
>  fs/overlayfs/overlayfs.h |  9 +++++++--
>  fs/overlayfs/ovl_entry.h |  4 ++++
>  fs/overlayfs/readdir.c   |  3 ++-
>  fs/overlayfs/super.c     |  9 +++++++++
>  fs/overlayfs/util.c      |  3 ++-
>  6 files changed, 52 insertions(+), 11 deletions(-)
>
> diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
> index 279009dee366..dbe5e54dcb16 100644
> --- a/fs/overlayfs/dir.c
> +++ b/fs/overlayfs/dir.c
> @@ -62,35 +62,55 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir)
>  }
>
>  /* caller holds i_mutex on workdir */
> -static struct dentry *ovl_whiteout(struct dentry *workdir)
> +static struct dentry *ovl_whiteout(struct ovl_fs *ofs, struct dentry *workdir)
>  {
>         int err;
> +       bool retried = false;
> +       bool should_link = (ofs->whiteout_link_max > 1);
>         struct dentry *whiteout;
>         struct inode *wdir = workdir->d_inode;
>
> +retry:
>         whiteout = ovl_lookup_temp(workdir);
>         if (IS_ERR(whiteout))
>                 return whiteout;
>
> +       if (should_link && ofs->whiteout) {

What happens with ofs->whiteout_link_max == 2 is that half of the
times, whiteout gets linked and then unlinked.
That is not needed.
I think code would look better like this:

          if (ovl_whiteout_linkable(ofs)) {
                  err = ovl_do_link(ofs->whiteout, wdir, whiteout);
...
          } else if (ofs->whiteout) {
                  ovl_cleanup(wdir, ofs->whiteout);
....

With:

static inline bool ovl_whiteout_linkable(struct ovl_fs *ofs)
{
       return ofs->whiteout &&
                 ofs->whiteout->d_inode->i_nlink < ofs->whiteout_link_max;
}

> +               err = ovl_do_link(ofs->whiteout, wdir, whiteout);
> +               if (err || !ovl_whiteout_linkable(ofs)) {
> +                       ovl_cleanup(wdir, ofs->whiteout);
> +                       dput(ofs->whiteout);
> +                       ofs->whiteout = NULL;
> +               }
> +
> +               if (!err)
> +                       return whiteout;
> +       }
> +
...
> @@ -1762,6 +1767,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
>
>                 if (!ofs->workdir)
>                         sb->s_flags |= SB_RDONLY;
> +               else
> +                       ofs->whiteout_link_max = min_not_zero(
> +                                       ofs->workdir->d_sb->s_max_links,
> +                                       ovl_whiteout_link_max_def ?: 1);
>

Please leave that code inside ovl_get_workdir() or ovl_make_workdir().

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] ovl: whiteout inode sharing
  2020-04-14 13:44 ` Amir Goldstein
@ 2020-04-15  1:03   ` Chengguang Xu
  2020-04-15  7:03     ` Amir Goldstein
  0 siblings, 1 reply; 11+ messages in thread
From: Chengguang Xu @ 2020-04-15  1:03 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Miklos Szeredi, overlayfs

 ---- 在 星期二, 2020-04-14 21:44:39 Amir Goldstein <amir73il@gmail.com> 撰写 ----
 > On Tue, Apr 14, 2020 at 12:53 PM Chengguang Xu <cgxu519@mykernel.net> wrote:
 > >
 > > Sharing inode with different whiteout files for saving
 > > inode and speeding up deleting operation.
 > >
 > > Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
 > > ---
 > > v1->v2:
 > > - Address Amir's comments in v1
 > >
 > > v2->v3:
 > > - Address Amir's comments in v2
 > > - Rebase on Amir's "Overlayfs use index dir as work dir" patch set
 > > - Keep at most one whiteout tmpfile in work dir
 > >
 > >  fs/overlayfs/dir.c       | 35 ++++++++++++++++++++++++++++-------
 > >  fs/overlayfs/overlayfs.h |  9 +++++++--
 > >  fs/overlayfs/ovl_entry.h |  4 ++++
 > >  fs/overlayfs/readdir.c   |  3 ++-
 > >  fs/overlayfs/super.c     |  9 +++++++++
 > >  fs/overlayfs/util.c      |  3 ++-
 > >  6 files changed, 52 insertions(+), 11 deletions(-)
 > >
 > > diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
 > > index 279009dee366..dbe5e54dcb16 100644
 > > --- a/fs/overlayfs/dir.c
 > > +++ b/fs/overlayfs/dir.c
 > > @@ -62,35 +62,55 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir)
 > >  }
 > >
 > >  /* caller holds i_mutex on workdir */
 > > -static struct dentry *ovl_whiteout(struct dentry *workdir)
 > > +static struct dentry *ovl_whiteout(struct ovl_fs *ofs, struct dentry *workdir)
 > >  {
 > >         int err;
 > > +       bool retried = false;
 > > +       bool should_link = (ofs->whiteout_link_max > 1);
 > >         struct dentry *whiteout;
 > >         struct inode *wdir = workdir->d_inode;
 > >
 > > +retry:
 > >         whiteout = ovl_lookup_temp(workdir);
 > >         if (IS_ERR(whiteout))
 > >                 return whiteout;
 > >
 > > +       if (should_link && ofs->whiteout) {
 > 
 > What happens with ofs->whiteout_link_max == 2 is that half of the
 > times, whiteout gets linked and then unlinked.
 > That is not needed.
 > I think code would look better like this:
 > 
 >           if (ovl_whiteout_linkable(ofs)) {
 >                   err = ovl_do_link(ofs->whiteout, wdir, whiteout);
 > ...
 >           } else if (ofs->whiteout) {
 >                   ovl_cleanup(wdir, ofs->whiteout);
 > ....
 > 
 > With:
 > 
 > static inline bool ovl_whiteout_linkable(struct ovl_fs *ofs)
 > {
 >        return ofs->whiteout &&
 >                  ofs->whiteout->d_inode->i_nlink < ofs->whiteout_link_max;
 > }
 > 

tmpfile has occupied one link, so the maximum link count of whiteout inode
in your code will be ofs->whiteout_link_max - 1, right?

Thanks,
cgxu



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] ovl: whiteout inode sharing
  2020-04-15  1:03   ` Chengguang Xu
@ 2020-04-15  7:03     ` Amir Goldstein
  2020-04-15  8:01       ` Chengguang Xu
  0 siblings, 1 reply; 11+ messages in thread
From: Amir Goldstein @ 2020-04-15  7:03 UTC (permalink / raw)
  To: Chengguang Xu; +Cc: Miklos Szeredi, overlayfs

On Wed, Apr 15, 2020 at 4:03 AM Chengguang Xu <cgxu519@mykernel.net> wrote:
>
>  ---- 在 星期二, 2020-04-14 21:44:39 Amir Goldstein <amir73il@gmail.com> 撰写 ----
>  > On Tue, Apr 14, 2020 at 12:53 PM Chengguang Xu <cgxu519@mykernel.net> wrote:
>  > >
>  > > Sharing inode with different whiteout files for saving
>  > > inode and speeding up deleting operation.
>  > >
>  > > Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
>  > > ---
>  > > v1->v2:
>  > > - Address Amir's comments in v1
>  > >
>  > > v2->v3:
>  > > - Address Amir's comments in v2
>  > > - Rebase on Amir's "Overlayfs use index dir as work dir" patch set
>  > > - Keep at most one whiteout tmpfile in work dir
>  > >
>  > >  fs/overlayfs/dir.c       | 35 ++++++++++++++++++++++++++++-------
>  > >  fs/overlayfs/overlayfs.h |  9 +++++++--
>  > >  fs/overlayfs/ovl_entry.h |  4 ++++
>  > >  fs/overlayfs/readdir.c   |  3 ++-
>  > >  fs/overlayfs/super.c     |  9 +++++++++
>  > >  fs/overlayfs/util.c      |  3 ++-
>  > >  6 files changed, 52 insertions(+), 11 deletions(-)
>  > >
>  > > diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
>  > > index 279009dee366..dbe5e54dcb16 100644
>  > > --- a/fs/overlayfs/dir.c
>  > > +++ b/fs/overlayfs/dir.c
>  > > @@ -62,35 +62,55 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir)
>  > >  }
>  > >
>  > >  /* caller holds i_mutex on workdir */
>  > > -static struct dentry *ovl_whiteout(struct dentry *workdir)
>  > > +static struct dentry *ovl_whiteout(struct ovl_fs *ofs, struct dentry *workdir)
>  > >  {
>  > >         int err;
>  > > +       bool retried = false;
>  > > +       bool should_link = (ofs->whiteout_link_max > 1);
>  > >         struct dentry *whiteout;
>  > >         struct inode *wdir = workdir->d_inode;
>  > >
>  > > +retry:
>  > >         whiteout = ovl_lookup_temp(workdir);
>  > >         if (IS_ERR(whiteout))
>  > >                 return whiteout;
>  > >
>  > > +       if (should_link && ofs->whiteout) {
>  >
>  > What happens with ofs->whiteout_link_max == 2 is that half of the
>  > times, whiteout gets linked and then unlinked.
>  > That is not needed.
>  > I think code would look better like this:
>  >
>  >           if (ovl_whiteout_linkable(ofs)) {
>  >                   err = ovl_do_link(ofs->whiteout, wdir, whiteout);
>  > ...
>  >           } else if (ofs->whiteout) {
>  >                   ovl_cleanup(wdir, ofs->whiteout);
>  > ....
>  >
>  > With:
>  >
>  > static inline bool ovl_whiteout_linkable(struct ovl_fs *ofs)
>  > {
>  >        return ofs->whiteout &&
>  >                  ofs->whiteout->d_inode->i_nlink < ofs->whiteout_link_max;
>  > }
>  >
>
> tmpfile has occupied one link, so the maximum link count of whiteout inode
> in your code will be ofs->whiteout_link_max - 1, right?
>

Right, but I wrote wrong pseudo code to use this helper.
The intention is that ovl_whiteout_linkable(ofs) means there is a tmp whiteout
and it may be linked to a new tmp whiteout.
The only reason to cleanup tmp whiteout is if link has unexpectedly failed and
in this case I think we should disable whiteout sharing.

Let me try again:

+       err = -EMLINK;
+       if (ovl_whiteout_linkable(ofs)) {
+               err = ovl_do_link(ofs->whiteout, wdir, whiteout);
+               if (!err)
+                       return whiteout;
+        }
+        if (err && ofs->whiteout) {
+               pr_warn("failed to link whiteout - disabling whiteout
sharing (nlink=%u, err = %i)\n",
+                             ofs->whiteout->d_inode->i_nlink, err);
+               ofs->whiteout_link_max = 0;
+               should_link = false;
+               ovl_cleanup(wdir, ofs->whiteout);
+               dput(ofs->whiteout);
+               ofs->whiteout = NULL;
+       }
+
         err = ovl_do_whiteout(wdir, whiteout);

Is that better?

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] ovl: whiteout inode sharing
  2020-04-15  7:03     ` Amir Goldstein
@ 2020-04-15  8:01       ` Chengguang Xu
  2020-04-15  8:12         ` Amir Goldstein
  0 siblings, 1 reply; 11+ messages in thread
From: Chengguang Xu @ 2020-04-15  8:01 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Miklos Szeredi, overlayfs

 ---- 在 星期三, 2020-04-15 15:03:09 Amir Goldstein <amir73il@gmail.com> 撰写 ----
 > On Wed, Apr 15, 2020 at 4:03 AM Chengguang Xu <cgxu519@mykernel.net> wrote:
 > >
 > >  ---- 在 星期二, 2020-04-14 21:44:39 Amir Goldstein <amir73il@gmail.com> 撰写 ----
 > >  > On Tue, Apr 14, 2020 at 12:53 PM Chengguang Xu <cgxu519@mykernel.net> wrote:
 > >  > >
 > >  > > Sharing inode with different whiteout files for saving
 > >  > > inode and speeding up deleting operation.
 > >  > >
 > >  > > Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
 > >  > > ---
 > >  > > v1->v2:
 > >  > > - Address Amir's comments in v1
 > >  > >
 > >  > > v2->v3:
 > >  > > - Address Amir's comments in v2
 > >  > > - Rebase on Amir's "Overlayfs use index dir as work dir" patch set
 > >  > > - Keep at most one whiteout tmpfile in work dir
 > >  > >
 > >  > >  fs/overlayfs/dir.c       | 35 ++++++++++++++++++++++++++++-------
 > >  > >  fs/overlayfs/overlayfs.h |  9 +++++++--
 > >  > >  fs/overlayfs/ovl_entry.h |  4 ++++
 > >  > >  fs/overlayfs/readdir.c   |  3 ++-
 > >  > >  fs/overlayfs/super.c     |  9 +++++++++
 > >  > >  fs/overlayfs/util.c      |  3 ++-
 > >  > >  6 files changed, 52 insertions(+), 11 deletions(-)
 > >  > >
 > >  > > diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
 > >  > > index 279009dee366..dbe5e54dcb16 100644
 > >  > > --- a/fs/overlayfs/dir.c
 > >  > > +++ b/fs/overlayfs/dir.c
 > >  > > @@ -62,35 +62,55 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir)
 > >  > >  }
 > >  > >
 > >  > >  /* caller holds i_mutex on workdir */
 > >  > > -static struct dentry *ovl_whiteout(struct dentry *workdir)
 > >  > > +static struct dentry *ovl_whiteout(struct ovl_fs *ofs, struct dentry *workdir)
 > >  > >  {
 > >  > >         int err;
 > >  > > +       bool retried = false;
 > >  > > +       bool should_link = (ofs->whiteout_link_max > 1);
 > >  > >         struct dentry *whiteout;
 > >  > >         struct inode *wdir = workdir->d_inode;
 > >  > >
 > >  > > +retry:
 > >  > >         whiteout = ovl_lookup_temp(workdir);
 > >  > >         if (IS_ERR(whiteout))
 > >  > >                 return whiteout;
 > >  > >
 > >  > > +       if (should_link && ofs->whiteout) {
 > >  >
 > >  > What happens with ofs->whiteout_link_max == 2 is that half of the
 > >  > times, whiteout gets linked and then unlinked.
 > >  > That is not needed.
 > >  > I think code would look better like this:
 > >  >
 > >  >           if (ovl_whiteout_linkable(ofs)) {
 > >  >                   err = ovl_do_link(ofs->whiteout, wdir, whiteout);
 > >  > ...
 > >  >           } else if (ofs->whiteout) {
 > >  >                   ovl_cleanup(wdir, ofs->whiteout);
 > >  > ....
 > >  >
 > >  > With:
 > >  >
 > >  > static inline bool ovl_whiteout_linkable(struct ovl_fs *ofs)
 > >  > {
 > >  >        return ofs->whiteout &&
 > >  >                  ofs->whiteout->d_inode->i_nlink < ofs->whiteout_link_max;
 > >  > }
 > >  >
 > >
 > > tmpfile has occupied one link, so the maximum link count of whiteout inode
 > > in your code will be ofs->whiteout_link_max - 1, right?
 > >
 > 
 > Right, but I wrote wrong pseudo code to use this helper.
 > The intention is that ovl_whiteout_linkable(ofs) means there is a tmp whiteout
 > and it may be linked to a new tmp whiteout.
 > The only reason to cleanup tmp whiteout is if link has unexpectedly failed and
 > in this case I think we should disable whiteout sharing.
 > 
 > Let me try again:
 > 
 > +       err = -EMLINK;
 > +       if (ovl_whiteout_linkable(ofs)) {
 > +               err = ovl_do_link(ofs->whiteout, wdir, whiteout);
 > +               if (!err)
 > +                       return whiteout;
 > +        }
 > +        if (err && ofs->whiteout) {
 > +               pr_warn("failed to link whiteout - disabling whiteout
 > sharing (nlink=%u, err = %i)\n",
 > +                             ofs->whiteout->d_inode->i_nlink, err);
 > +               ofs->whiteout_link_max = 0;
 > +               should_link = false;
 > +               ovl_cleanup(wdir, ofs->whiteout);
 > +               dput(ofs->whiteout);
 > +               ofs->whiteout = NULL;
 > +       }
 > +
 >          err = ovl_do_whiteout(wdir, whiteout);
 > 
 > Is that better?
 > 

I don't fully understand why should we limit to share only one inode for whiteout files.
Consider deleting a lot of files(not dir) in lowerdir, we will get -EMLINK error because
the link count of shared inode exceeds the maximum link limit in underlying file system.
In this case, we can choose another whiteout inode for sharing.

Thanks,
cgxu









^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] ovl: whiteout inode sharing
  2020-04-15  8:01       ` Chengguang Xu
@ 2020-04-15  8:12         ` Amir Goldstein
  2020-04-15  8:30           ` Chengguang Xu
  0 siblings, 1 reply; 11+ messages in thread
From: Amir Goldstein @ 2020-04-15  8:12 UTC (permalink / raw)
  To: Chengguang Xu; +Cc: Miklos Szeredi, overlayfs

On Wed, Apr 15, 2020 at 11:01 AM Chengguang Xu <cgxu519@mykernel.net> wrote:
>
>  ---- 在 星期三, 2020-04-15 15:03:09 Amir Goldstein <amir73il@gmail.com> 撰写 ----
>  > On Wed, Apr 15, 2020 at 4:03 AM Chengguang Xu <cgxu519@mykernel.net> wrote:
>  > >
>  > >  ---- 在 星期二, 2020-04-14 21:44:39 Amir Goldstein <amir73il@gmail.com> 撰写 ----
>  > >  > On Tue, Apr 14, 2020 at 12:53 PM Chengguang Xu <cgxu519@mykernel.net> wrote:
>  > >  > >
>  > >  > > Sharing inode with different whiteout files for saving
>  > >  > > inode and speeding up deleting operation.
>  > >  > >
>  > >  > > Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
>  > >  > > ---
>  > >  > > v1->v2:
>  > >  > > - Address Amir's comments in v1
>  > >  > >
>  > >  > > v2->v3:
>  > >  > > - Address Amir's comments in v2
>  > >  > > - Rebase on Amir's "Overlayfs use index dir as work dir" patch set
>  > >  > > - Keep at most one whiteout tmpfile in work dir
>  > >  > >
>  > >  > >  fs/overlayfs/dir.c       | 35 ++++++++++++++++++++++++++++-------
>  > >  > >  fs/overlayfs/overlayfs.h |  9 +++++++--
>  > >  > >  fs/overlayfs/ovl_entry.h |  4 ++++
>  > >  > >  fs/overlayfs/readdir.c   |  3 ++-
>  > >  > >  fs/overlayfs/super.c     |  9 +++++++++
>  > >  > >  fs/overlayfs/util.c      |  3 ++-
>  > >  > >  6 files changed, 52 insertions(+), 11 deletions(-)
>  > >  > >
>  > >  > > diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
>  > >  > > index 279009dee366..dbe5e54dcb16 100644
>  > >  > > --- a/fs/overlayfs/dir.c
>  > >  > > +++ b/fs/overlayfs/dir.c
>  > >  > > @@ -62,35 +62,55 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir)
>  > >  > >  }
>  > >  > >
>  > >  > >  /* caller holds i_mutex on workdir */
>  > >  > > -static struct dentry *ovl_whiteout(struct dentry *workdir)
>  > >  > > +static struct dentry *ovl_whiteout(struct ovl_fs *ofs, struct dentry *workdir)
>  > >  > >  {
>  > >  > >         int err;
>  > >  > > +       bool retried = false;
>  > >  > > +       bool should_link = (ofs->whiteout_link_max > 1);
>  > >  > >         struct dentry *whiteout;
>  > >  > >         struct inode *wdir = workdir->d_inode;
>  > >  > >
>  > >  > > +retry:
>  > >  > >         whiteout = ovl_lookup_temp(workdir);
>  > >  > >         if (IS_ERR(whiteout))
>  > >  > >                 return whiteout;
>  > >  > >
>  > >  > > +       if (should_link && ofs->whiteout) {
>  > >  >
>  > >  > What happens with ofs->whiteout_link_max == 2 is that half of the
>  > >  > times, whiteout gets linked and then unlinked.
>  > >  > That is not needed.
>  > >  > I think code would look better like this:
>  > >  >
>  > >  >           if (ovl_whiteout_linkable(ofs)) {
>  > >  >                   err = ovl_do_link(ofs->whiteout, wdir, whiteout);
>  > >  > ...
>  > >  >           } else if (ofs->whiteout) {
>  > >  >                   ovl_cleanup(wdir, ofs->whiteout);
>  > >  > ....
>  > >  >
>  > >  > With:
>  > >  >
>  > >  > static inline bool ovl_whiteout_linkable(struct ovl_fs *ofs)
>  > >  > {
>  > >  >        return ofs->whiteout &&
>  > >  >                  ofs->whiteout->d_inode->i_nlink < ofs->whiteout_link_max;
>  > >  > }
>  > >  >
>  > >
>  > > tmpfile has occupied one link, so the maximum link count of whiteout inode
>  > > in your code will be ofs->whiteout_link_max - 1, right?
>  > >
>  >
>  > Right, but I wrote wrong pseudo code to use this helper.
>  > The intention is that ovl_whiteout_linkable(ofs) means there is a tmp whiteout
>  > and it may be linked to a new tmp whiteout.
>  > The only reason to cleanup tmp whiteout is if link has unexpectedly failed and
>  > in this case I think we should disable whiteout sharing.
>  >
>  > Let me try again:
>  >
>  > +       err = -EMLINK;
>  > +       if (ovl_whiteout_linkable(ofs)) {
>  > +               err = ovl_do_link(ofs->whiteout, wdir, whiteout);
>  > +               if (!err)
>  > +                       return whiteout;
>  > +        }
>  > +        if (err && ofs->whiteout) {
>  > +               pr_warn("failed to link whiteout - disabling whiteout
>  > sharing (nlink=%u, err = %i)\n",
>  > +                             ofs->whiteout->d_inode->i_nlink, err);
>  > +               ofs->whiteout_link_max = 0;
>  > +               should_link = false;
>  > +               ovl_cleanup(wdir, ofs->whiteout);
>  > +               dput(ofs->whiteout);
>  > +               ofs->whiteout = NULL;
>  > +       }
>  > +
>  >          err = ovl_do_whiteout(wdir, whiteout);
>  >
>  > Is that better?
>  >
>
> I don't fully understand why should we limit to share only one inode for whiteout files.
> Consider deleting a lot of files(not dir) in lowerdir, we will get -EMLINK error because
> the link count of shared inode exceeds the maximum link limit in underlying file system.
> In this case, we can choose another whiteout inode for sharing.
>

I got it wrong again. I forgot the maxed out case. Take #3:

+       err = 0;
+       if (ovl_whiteout_linkable(ofs)) {
+               err = ovl_do_link(ofs->whiteout, wdir, whiteout);
+               if (!err)
+                       return whiteout;
+        } else if (ofs->whiteout) {
+               dput(whiteout);
+               whiteout = ofs->whiteout;
+               ofs->whiteout = NULL;
+               return whiteout;
+        }
+        if (err) {
+               pr_warn("failed to link whiteout - disabling whiteout
sharing (nlink=%u, err = %i)\n",
+                             ofs->whiteout->d_inode->i_nlink, err);
+               ofs->whiteout_link_max = 0;
+               should_link = false;
+               ovl_cleanup(wdir, ofs->whiteout);
+       }
+
         err = ovl_do_whiteout(wdir, whiteout);


I hope I got the logic right this time.
Feel free to organize differently.
Disabling should be in the case where failure is not expected
so we want to avoid having every whiteout creation try and fail.
For example if filesystem reported s_max_links is incorrect.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] ovl: whiteout inode sharing
  2020-04-15  8:12         ` Amir Goldstein
@ 2020-04-15  8:30           ` Chengguang Xu
  2020-04-15 11:26             ` Amir Goldstein
  0 siblings, 1 reply; 11+ messages in thread
From: Chengguang Xu @ 2020-04-15  8:30 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Miklos Szeredi, overlayfs

 ---- 在 星期三, 2020-04-15 16:12:04 Amir Goldstein <amir73il@gmail.com> 撰写 ----
 > On Wed, Apr 15, 2020 at 11:01 AM Chengguang Xu <cgxu519@mykernel.net> wrote:
 > >
 > >  ---- 在 星期三, 2020-04-15 15:03:09 Amir Goldstein <amir73il@gmail.com> 撰写 ----
 > >  > On Wed, Apr 15, 2020 at 4:03 AM Chengguang Xu <cgxu519@mykernel.net> wrote:
 > >  > >
 > >  > >  ---- 在 星期二, 2020-04-14 21:44:39 Amir Goldstein <amir73il@gmail.com> 撰写 ----
 > >  > >  > On Tue, Apr 14, 2020 at 12:53 PM Chengguang Xu <cgxu519@mykernel.net> wrote:
 > >  > >  > >
 > >  > >  > > Sharing inode with different whiteout files for saving
 > >  > >  > > inode and speeding up deleting operation.
 > >  > >  > >
 > >  > >  > > Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
 > >  > >  > > ---
 > >  > >  > > v1->v2:
 > >  > >  > > - Address Amir's comments in v1
 > >  > >  > >
 > >  > >  > > v2->v3:
 > >  > >  > > - Address Amir's comments in v2
 > >  > >  > > - Rebase on Amir's "Overlayfs use index dir as work dir" patch set
 > >  > >  > > - Keep at most one whiteout tmpfile in work dir
 > >  > >  > >
 > >  > >  > >  fs/overlayfs/dir.c       | 35 ++++++++++++++++++++++++++++-------
 > >  > >  > >  fs/overlayfs/overlayfs.h |  9 +++++++--
 > >  > >  > >  fs/overlayfs/ovl_entry.h |  4 ++++
 > >  > >  > >  fs/overlayfs/readdir.c   |  3 ++-
 > >  > >  > >  fs/overlayfs/super.c     |  9 +++++++++
 > >  > >  > >  fs/overlayfs/util.c      |  3 ++-
 > >  > >  > >  6 files changed, 52 insertions(+), 11 deletions(-)
 > >  > >  > >
 > >  > >  > > diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
 > >  > >  > > index 279009dee366..dbe5e54dcb16 100644
 > >  > >  > > --- a/fs/overlayfs/dir.c
 > >  > >  > > +++ b/fs/overlayfs/dir.c
 > >  > >  > > @@ -62,35 +62,55 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir)
 > >  > >  > >  }
 > >  > >  > >
 > >  > >  > >  /* caller holds i_mutex on workdir */
 > >  > >  > > -static struct dentry *ovl_whiteout(struct dentry *workdir)
 > >  > >  > > +static struct dentry *ovl_whiteout(struct ovl_fs *ofs, struct dentry *workdir)
 > >  > >  > >  {
 > >  > >  > >         int err;
 > >  > >  > > +       bool retried = false;
 > >  > >  > > +       bool should_link = (ofs->whiteout_link_max > 1);
 > >  > >  > >         struct dentry *whiteout;
 > >  > >  > >         struct inode *wdir = workdir->d_inode;
 > >  > >  > >
 > >  > >  > > +retry:
 > >  > >  > >         whiteout = ovl_lookup_temp(workdir);
 > >  > >  > >         if (IS_ERR(whiteout))
 > >  > >  > >                 return whiteout;
 > >  > >  > >
 > >  > >  > > +       if (should_link && ofs->whiteout) {
 > >  > >  >
 > >  > >  > What happens with ofs->whiteout_link_max == 2 is that half of the
 > >  > >  > times, whiteout gets linked and then unlinked.
 > >  > >  > That is not needed.
 > >  > >  > I think code would look better like this:
 > >  > >  >
 > >  > >  >           if (ovl_whiteout_linkable(ofs)) {
 > >  > >  >                   err = ovl_do_link(ofs->whiteout, wdir, whiteout);
 > >  > >  > ...
 > >  > >  >           } else if (ofs->whiteout) {
 > >  > >  >                   ovl_cleanup(wdir, ofs->whiteout);
 > >  > >  > ....
 > >  > >  >
 > >  > >  > With:
 > >  > >  >
 > >  > >  > static inline bool ovl_whiteout_linkable(struct ovl_fs *ofs)
 > >  > >  > {
 > >  > >  >        return ofs->whiteout &&
 > >  > >  >                  ofs->whiteout->d_inode->i_nlink < ofs->whiteout_link_max;
 > >  > >  > }
 > >  > >  >
 > >  > >
 > >  > > tmpfile has occupied one link, so the maximum link count of whiteout inode
 > >  > > in your code will be ofs->whiteout_link_max - 1, right?
 > >  > >
 > >  >
 > >  > Right, but I wrote wrong pseudo code to use this helper.
 > >  > The intention is that ovl_whiteout_linkable(ofs) means there is a tmp whiteout
 > >  > and it may be linked to a new tmp whiteout.
 > >  > The only reason to cleanup tmp whiteout is if link has unexpectedly failed and
 > >  > in this case I think we should disable whiteout sharing.
 > >  >
 > >  > Let me try again:
 > >  >
 > >  > +       err = -EMLINK;
 > >  > +       if (ovl_whiteout_linkable(ofs)) {
 > >  > +               err = ovl_do_link(ofs->whiteout, wdir, whiteout);
 > >  > +               if (!err)
 > >  > +                       return whiteout;
 > >  > +        }
 > >  > +        if (err && ofs->whiteout) {
 > >  > +               pr_warn("failed to link whiteout - disabling whiteout
 > >  > sharing (nlink=%u, err = %i)\n",
 > >  > +                             ofs->whiteout->d_inode->i_nlink, err);
 > >  > +               ofs->whiteout_link_max = 0;
 > >  > +               should_link = false;
 > >  > +               ovl_cleanup(wdir, ofs->whiteout);
 > >  > +               dput(ofs->whiteout);
 > >  > +               ofs->whiteout = NULL;
 > >  > +       }
 > >  > +
 > >  >          err = ovl_do_whiteout(wdir, whiteout);
 > >  >
 > >  > Is that better?
 > >  >
 > >
 > > I don't fully understand why should we limit to share only one inode for whiteout files.
 > > Consider deleting a lot of files(not dir) in lowerdir, we will get -EMLINK error because
 > > the link count of shared inode exceeds the maximum link limit in underlying file system.
 > > In this case, we can choose another whiteout inode for sharing.
 > >
 > 
 > I got it wrong again. I forgot the maxed out case. Take #3:
 > 
 > +       err = 0;
 > +       if (ovl_whiteout_linkable(ofs)) {
 > +               err = ovl_do_link(ofs->whiteout, wdir, whiteout);
 > +               if (!err)
 > +                       return whiteout;
 > +        } else if (ofs->whiteout) {
 > +               dput(whiteout);
 > +               whiteout = ofs->whiteout;
 > +               ofs->whiteout = NULL;
 > +               return whiteout;
 > +        }
 > +        if (err) {
 > +               pr_warn("failed to link whiteout - disabling whiteout
 > sharing (nlink=%u, err = %i)\n",
 > +                             ofs->whiteout->d_inode->i_nlink, err);
 > +               ofs->whiteout_link_max = 0;
 > +               should_link = false;
 > +               ovl_cleanup(wdir, ofs->whiteout);
 > +       }
 > +
 >          err = ovl_do_whiteout(wdir, whiteout);
 > 
 > 
 > I hope I got the logic right this time.
 > Feel free to organize differently.
 > Disabling should be in the case where failure is not expected
 > so we want to avoid having every whiteout creation try and fail.
 > For example if filesystem reported s_max_links is incorrect.
 > 
 
In case of any unexpected errors, we could set a error limit(for example, 10),
if link error count exceeds the limit then we disable the feature.

Thanks,
cgxu 



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] ovl: whiteout inode sharing
  2020-04-15  8:30           ` Chengguang Xu
@ 2020-04-15 11:26             ` Amir Goldstein
  2020-04-16  6:30               ` Chengguang Xu
  0 siblings, 1 reply; 11+ messages in thread
From: Amir Goldstein @ 2020-04-15 11:26 UTC (permalink / raw)
  To: Chengguang Xu; +Cc: Miklos Szeredi, overlayfs

On Wed, Apr 15, 2020 at 11:30 AM Chengguang Xu <cgxu519@mykernel.net> wrote:
>
>  ---- 在 星期三, 2020-04-15 16:12:04 Amir Goldstein <amir73il@gmail.com> 撰写 ----
>  > On Wed, Apr 15, 2020 at 11:01 AM Chengguang Xu <cgxu519@mykernel.net> wrote:
>  > >
>  > >  ---- 在 星期三, 2020-04-15 15:03:09 Amir Goldstein <amir73il@gmail.com> 撰写 ----
>  > >  > On Wed, Apr 15, 2020 at 4:03 AM Chengguang Xu <cgxu519@mykernel.net> wrote:
>  > >  > >
>  > >  > >  ---- 在 星期二, 2020-04-14 21:44:39 Amir Goldstein <amir73il@gmail.com> 撰写 ----
>  > >  > >  > On Tue, Apr 14, 2020 at 12:53 PM Chengguang Xu <cgxu519@mykernel.net> wrote:
>  > >  > >  > >
>  > >  > >  > > Sharing inode with different whiteout files for saving
>  > >  > >  > > inode and speeding up deleting operation.
>  > >  > >  > >
>  > >  > >  > > Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
>  > >  > >  > > ---
>  > >  > >  > > v1->v2:
>  > >  > >  > > - Address Amir's comments in v1
>  > >  > >  > >
>  > >  > >  > > v2->v3:
>  > >  > >  > > - Address Amir's comments in v2
>  > >  > >  > > - Rebase on Amir's "Overlayfs use index dir as work dir" patch set
>  > >  > >  > > - Keep at most one whiteout tmpfile in work dir
>  > >  > >  > >
>  > >  > >  > >  fs/overlayfs/dir.c       | 35 ++++++++++++++++++++++++++++-------
>  > >  > >  > >  fs/overlayfs/overlayfs.h |  9 +++++++--
>  > >  > >  > >  fs/overlayfs/ovl_entry.h |  4 ++++
>  > >  > >  > >  fs/overlayfs/readdir.c   |  3 ++-
>  > >  > >  > >  fs/overlayfs/super.c     |  9 +++++++++
>  > >  > >  > >  fs/overlayfs/util.c      |  3 ++-
>  > >  > >  > >  6 files changed, 52 insertions(+), 11 deletions(-)
>  > >  > >  > >
>  > >  > >  > > diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
>  > >  > >  > > index 279009dee366..dbe5e54dcb16 100644
>  > >  > >  > > --- a/fs/overlayfs/dir.c
>  > >  > >  > > +++ b/fs/overlayfs/dir.c
>  > >  > >  > > @@ -62,35 +62,55 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir)
>  > >  > >  > >  }
>  > >  > >  > >
>  > >  > >  > >  /* caller holds i_mutex on workdir */
>  > >  > >  > > -static struct dentry *ovl_whiteout(struct dentry *workdir)
>  > >  > >  > > +static struct dentry *ovl_whiteout(struct ovl_fs *ofs, struct dentry *workdir)
>  > >  > >  > >  {
>  > >  > >  > >         int err;
>  > >  > >  > > +       bool retried = false;
>  > >  > >  > > +       bool should_link = (ofs->whiteout_link_max > 1);
>  > >  > >  > >         struct dentry *whiteout;
>  > >  > >  > >         struct inode *wdir = workdir->d_inode;
>  > >  > >  > >
>  > >  > >  > > +retry:
>  > >  > >  > >         whiteout = ovl_lookup_temp(workdir);
>  > >  > >  > >         if (IS_ERR(whiteout))
>  > >  > >  > >                 return whiteout;
>  > >  > >  > >
>  > >  > >  > > +       if (should_link && ofs->whiteout) {
>  > >  > >  >
>  > >  > >  > What happens with ofs->whiteout_link_max == 2 is that half of the
>  > >  > >  > times, whiteout gets linked and then unlinked.
>  > >  > >  > That is not needed.
>  > >  > >  > I think code would look better like this:
>  > >  > >  >
>  > >  > >  >           if (ovl_whiteout_linkable(ofs)) {
>  > >  > >  >                   err = ovl_do_link(ofs->whiteout, wdir, whiteout);
>  > >  > >  > ...
>  > >  > >  >           } else if (ofs->whiteout) {
>  > >  > >  >                   ovl_cleanup(wdir, ofs->whiteout);
>  > >  > >  > ....
>  > >  > >  >
>  > >  > >  > With:
>  > >  > >  >
>  > >  > >  > static inline bool ovl_whiteout_linkable(struct ovl_fs *ofs)
>  > >  > >  > {
>  > >  > >  >        return ofs->whiteout &&
>  > >  > >  >                  ofs->whiteout->d_inode->i_nlink < ofs->whiteout_link_max;
>  > >  > >  > }
>  > >  > >  >
>  > >  > >
>  > >  > > tmpfile has occupied one link, so the maximum link count of whiteout inode
>  > >  > > in your code will be ofs->whiteout_link_max - 1, right?
>  > >  > >
>  > >  >
>  > >  > Right, but I wrote wrong pseudo code to use this helper.
>  > >  > The intention is that ovl_whiteout_linkable(ofs) means there is a tmp whiteout
>  > >  > and it may be linked to a new tmp whiteout.
>  > >  > The only reason to cleanup tmp whiteout is if link has unexpectedly failed and
>  > >  > in this case I think we should disable whiteout sharing.
>  > >  >
>  > >  > Let me try again:
>  > >  >
>  > >  > +       err = -EMLINK;
>  > >  > +       if (ovl_whiteout_linkable(ofs)) {
>  > >  > +               err = ovl_do_link(ofs->whiteout, wdir, whiteout);
>  > >  > +               if (!err)
>  > >  > +                       return whiteout;
>  > >  > +        }
>  > >  > +        if (err && ofs->whiteout) {
>  > >  > +               pr_warn("failed to link whiteout - disabling whiteout
>  > >  > sharing (nlink=%u, err = %i)\n",
>  > >  > +                             ofs->whiteout->d_inode->i_nlink, err);
>  > >  > +               ofs->whiteout_link_max = 0;
>  > >  > +               should_link = false;
>  > >  > +               ovl_cleanup(wdir, ofs->whiteout);
>  > >  > +               dput(ofs->whiteout);
>  > >  > +               ofs->whiteout = NULL;
>  > >  > +       }
>  > >  > +
>  > >  >          err = ovl_do_whiteout(wdir, whiteout);
>  > >  >
>  > >  > Is that better?
>  > >  >
>  > >
>  > > I don't fully understand why should we limit to share only one inode for whiteout files.
>  > > Consider deleting a lot of files(not dir) in lowerdir, we will get -EMLINK error because
>  > > the link count of shared inode exceeds the maximum link limit in underlying file system.
>  > > In this case, we can choose another whiteout inode for sharing.
>  > >
>  >
>  > I got it wrong again. I forgot the maxed out case. Take #3:
>  >
>  > +       err = 0;
>  > +       if (ovl_whiteout_linkable(ofs)) {
>  > +               err = ovl_do_link(ofs->whiteout, wdir, whiteout);
>  > +               if (!err)
>  > +                       return whiteout;
>  > +        } else if (ofs->whiteout) {
>  > +               dput(whiteout);
>  > +               whiteout = ofs->whiteout;
>  > +               ofs->whiteout = NULL;
>  > +               return whiteout;
>  > +        }
>  > +        if (err) {
>  > +               pr_warn("failed to link whiteout - disabling whiteout
>  > sharing (nlink=%u, err = %i)\n",
>  > +                             ofs->whiteout->d_inode->i_nlink, err);
>  > +               ofs->whiteout_link_max = 0;
>  > +               should_link = false;
>  > +               ovl_cleanup(wdir, ofs->whiteout);
>  > +       }
>  > +
>  >          err = ovl_do_whiteout(wdir, whiteout);
>  >
>  >
>  > I hope I got the logic right this time.
>  > Feel free to organize differently.
>  > Disabling should be in the case where failure is not expected
>  > so we want to avoid having every whiteout creation try and fail.
>  > For example if filesystem reported s_max_links is incorrect.
>  >
>
> In case of any unexpected errors, we could set a error limit(for example, 10),
> if link error count exceeds the limit then we disable the feature.
>

As far as I am concerned, you may post the patch without auto disable,
but I object to using an arbitrary number of errors (10) as trigger for auto
disable.

I do think that it is important to communicate to admin that the whiteout
sharing is not working as expected and it is not acceptable to flood
log with warning on each and every whiteout creation, not even with
pr_warn_ratelimited.

There are several ways you can go about this, but here is a suggestion:

+        if (err) {
+               ofs->whiteout_link_max >>= 1;
+               pr_warn("failed to link whiteout (nlink=%u, err = %i)
- lowering whiteout_link_max to %u\n",
+                             ofs->whiteout->d_inode->i_nlink, err,
ofs->whiteout_link_max);
+               should_link = false;
+               ovl_cleanup(wdir, ofs->whiteout);
+       }

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] ovl: whiteout inode sharing
  2020-04-15 11:26             ` Amir Goldstein
@ 2020-04-16  6:30               ` Chengguang Xu
  2020-04-16  6:47                 ` Amir Goldstein
  0 siblings, 1 reply; 11+ messages in thread
From: Chengguang Xu @ 2020-04-16  6:30 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Miklos Szeredi, overlayfs

 ---- 在 星期三, 2020-04-15 19:26:40 Amir Goldstein <amir73il@gmail.com> 撰写 ----
 > On Wed, Apr 15, 2020 at 11:30 AM Chengguang Xu <cgxu519@mykernel.net> wrote:
 > >
 > >  ---- 在 星期三, 2020-04-15 16:12:04 Amir Goldstein <amir73il@gmail.com> 撰写 ----
 > >  > On Wed, Apr 15, 2020 at 11:01 AM Chengguang Xu <cgxu519@mykernel.net> wrote:
 > >  > >
 > >  > >  ---- 在 星期三, 2020-04-15 15:03:09 Amir Goldstein <amir73il@gmail.com> 撰写 ----
 > >  > >  > On Wed, Apr 15, 2020 at 4:03 AM Chengguang Xu <cgxu519@mykernel.net> wrote:
 > >  > >  > >
 > >  > >  > >  ---- 在 星期二, 2020-04-14 21:44:39 Amir Goldstein <amir73il@gmail.com> 撰写 ----
 > >  > >  > >  > On Tue, Apr 14, 2020 at 12:53 PM Chengguang Xu <cgxu519@mykernel.net> wrote:
 > >  > >  > >  > >
 > >  > >  > >  > > Sharing inode with different whiteout files for saving
 > >  > >  > >  > > inode and speeding up deleting operation.
 > >  > >  > >  > >
 > >  > >  > >  > > Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
 > >  > >  > >  > > ---
 > >  > >  > >  > > v1->v2:
 > >  > >  > >  > > - Address Amir's comments in v1
 > >  > >  > >  > >
 > >  > >  > >  > > v2->v3:
 > >  > >  > >  > > - Address Amir's comments in v2
 > >  > >  > >  > > - Rebase on Amir's "Overlayfs use index dir as work dir" patch set
 > >  > >  > >  > > - Keep at most one whiteout tmpfile in work dir
 > >  > >  > >  > >
 > >  > >  > >  > >  fs/overlayfs/dir.c       | 35 ++++++++++++++++++++++++++++-------
 > >  > >  > >  > >  fs/overlayfs/overlayfs.h |  9 +++++++--
 > >  > >  > >  > >  fs/overlayfs/ovl_entry.h |  4 ++++
 > >  > >  > >  > >  fs/overlayfs/readdir.c   |  3 ++-
 > >  > >  > >  > >  fs/overlayfs/super.c     |  9 +++++++++
 > >  > >  > >  > >  fs/overlayfs/util.c      |  3 ++-
 > >  > >  > >  > >  6 files changed, 52 insertions(+), 11 deletions(-)
 > >  > >  > >  > >
 > >  > >  > >  > > diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
 > >  > >  > >  > > index 279009dee366..dbe5e54dcb16 100644
 > >  > >  > >  > > --- a/fs/overlayfs/dir.c
 > >  > >  > >  > > +++ b/fs/overlayfs/dir.c
 > >  > >  > >  > > @@ -62,35 +62,55 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir)
 > >  > >  > >  > >  }
 > >  > >  > >  > >
 > >  > >  > >  > >  /* caller holds i_mutex on workdir */
 > >  > >  > >  > > -static struct dentry *ovl_whiteout(struct dentry *workdir)
 > >  > >  > >  > > +static struct dentry *ovl_whiteout(struct ovl_fs *ofs, struct dentry *workdir)
 > >  > >  > >  > >  {
 > >  > >  > >  > >         int err;
 > >  > >  > >  > > +       bool retried = false;
 > >  > >  > >  > > +       bool should_link = (ofs->whiteout_link_max > 1);
 > >  > >  > >  > >         struct dentry *whiteout;
 > >  > >  > >  > >         struct inode *wdir = workdir->d_inode;
 > >  > >  > >  > >
 > >  > >  > >  > > +retry:
 > >  > >  > >  > >         whiteout = ovl_lookup_temp(workdir);
 > >  > >  > >  > >         if (IS_ERR(whiteout))
 > >  > >  > >  > >                 return whiteout;
 > >  > >  > >  > >
 > >  > >  > >  > > +       if (should_link && ofs->whiteout) {
 > >  > >  > >  >
 > >  > >  > >  > What happens with ofs->whiteout_link_max == 2 is that half of the
 > >  > >  > >  > times, whiteout gets linked and then unlinked.
 > >  > >  > >  > That is not needed.
 > >  > >  > >  > I think code would look better like this:
 > >  > >  > >  >
 > >  > >  > >  >           if (ovl_whiteout_linkable(ofs)) {
 > >  > >  > >  >                   err = ovl_do_link(ofs->whiteout, wdir, whiteout);
 > >  > >  > >  > ...
 > >  > >  > >  >           } else if (ofs->whiteout) {
 > >  > >  > >  >                   ovl_cleanup(wdir, ofs->whiteout);
 > >  > >  > >  > ....
 > >  > >  > >  >
 > >  > >  > >  > With:
 > >  > >  > >  >
 > >  > >  > >  > static inline bool ovl_whiteout_linkable(struct ovl_fs *ofs)
 > >  > >  > >  > {
 > >  > >  > >  >        return ofs->whiteout &&
 > >  > >  > >  >                  ofs->whiteout->d_inode->i_nlink < ofs->whiteout_link_max;
 > >  > >  > >  > }
 > >  > >  > >  >
 > >  > >  > >
 > >  > >  > > tmpfile has occupied one link, so the maximum link count of whiteout inode
 > >  > >  > > in your code will be ofs->whiteout_link_max - 1, right?
 > >  > >  > >
 > >  > >  >
 > >  > >  > Right, but I wrote wrong pseudo code to use this helper.
 > >  > >  > The intention is that ovl_whiteout_linkable(ofs) means there is a tmp whiteout
 > >  > >  > and it may be linked to a new tmp whiteout.
 > >  > >  > The only reason to cleanup tmp whiteout is if link has unexpectedly failed and
 > >  > >  > in this case I think we should disable whiteout sharing.
 > >  > >  >
 > >  > >  > Let me try again:
 > >  > >  >
 > >  > >  > +       err = -EMLINK;
 > >  > >  > +       if (ovl_whiteout_linkable(ofs)) {
 > >  > >  > +               err = ovl_do_link(ofs->whiteout, wdir, whiteout);
 > >  > >  > +               if (!err)
 > >  > >  > +                       return whiteout;
 > >  > >  > +        }
 > >  > >  > +        if (err && ofs->whiteout) {
 > >  > >  > +               pr_warn("failed to link whiteout - disabling whiteout
 > >  > >  > sharing (nlink=%u, err = %i)\n",
 > >  > >  > +                             ofs->whiteout->d_inode->i_nlink, err);
 > >  > >  > +               ofs->whiteout_link_max = 0;
 > >  > >  > +               should_link = false;
 > >  > >  > +               ovl_cleanup(wdir, ofs->whiteout);
 > >  > >  > +               dput(ofs->whiteout);
 > >  > >  > +               ofs->whiteout = NULL;
 > >  > >  > +       }
 > >  > >  > +
 > >  > >  >          err = ovl_do_whiteout(wdir, whiteout);
 > >  > >  >
 > >  > >  > Is that better?
 > >  > >  >
 > >  > >
 > >  > > I don't fully understand why should we limit to share only one inode for whiteout files.
 > >  > > Consider deleting a lot of files(not dir) in lowerdir, we will get -EMLINK error because
 > >  > > the link count of shared inode exceeds the maximum link limit in underlying file system.
 > >  > > In this case, we can choose another whiteout inode for sharing.
 > >  > >
 > >  >
 > >  > I got it wrong again. I forgot the maxed out case. Take #3:
 > >  >
 > >  > +       err = 0;
 > >  > +       if (ovl_whiteout_linkable(ofs)) {
 > >  > +               err = ovl_do_link(ofs->whiteout, wdir, whiteout);
 > >  > +               if (!err)
 > >  > +                       return whiteout;
 > >  > +        } else if (ofs->whiteout) {
 > >  > +               dput(whiteout);
 > >  > +               whiteout = ofs->whiteout;
 > >  > +               ofs->whiteout = NULL;
 > >  > +               return whiteout;
 > >  > +        }
 > >  > +        if (err) {
 > >  > +               pr_warn("failed to link whiteout - disabling whiteout
 > >  > sharing (nlink=%u, err = %i)\n",
 > >  > +                             ofs->whiteout->d_inode->i_nlink, err);
 > >  > +               ofs->whiteout_link_max = 0;
 > >  > +               should_link = false;
 > >  > +               ovl_cleanup(wdir, ofs->whiteout);
 > >  > +       }
 > >  > +
 > >  >          err = ovl_do_whiteout(wdir, whiteout);
 > >  >
 > >  >
 > >  > I hope I got the logic right this time.
 > >  > Feel free to organize differently.
 > >  > Disabling should be in the case where failure is not expected
 > >  > so we want to avoid having every whiteout creation try and fail.
 > >  > For example if filesystem reported s_max_links is incorrect.
 > >  >
 > >
 > > In case of any unexpected errors, we could set a error limit(for example, 10),
 > > if link error count exceeds the limit then we disable the feature.
 > >
 > 
 > As far as I am concerned, you may post the patch without auto disable,
 > but I object to using an arbitrary number of errors (10) as trigger for auto
 > disable.
 > 
 > I do think that it is important to communicate to admin that the whiteout
 > sharing is not working as expected and it is not acceptable to flood
 > log with warning on each and every whiteout creation, not even with
 > pr_warn_ratelimited.
 > 
 > There are several ways you can go about this, but here is a suggestion:
 > 
 > +        if (err) {
 > +               ofs->whiteout_link_max >>= 1;
 > +               pr_warn("failed to link whiteout (nlink=%u, err = %i)
 > - lowering whiteout_link_max to %u\n",
 > +                             ofs->whiteout->d_inode->i_nlink, err,
 > ofs->whiteout_link_max);
 > +               should_link = false;
 > +               ovl_cleanup(wdir, ofs->whiteout);
 > +       }
 > 

Frankly speaking, I don't like heuristic method to automatically enable/disable feature,
so I perfer to disable the feature immediately after hitting any error just like your previous suggestion,
and I also hope to add a mount option for this feature to mitigate global effect to different overlayfs instances.

Thanks,
cgxu





^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] ovl: whiteout inode sharing
  2020-04-16  6:30               ` Chengguang Xu
@ 2020-04-16  6:47                 ` Amir Goldstein
  2020-04-16  7:10                   ` Miklos Szeredi
  0 siblings, 1 reply; 11+ messages in thread
From: Amir Goldstein @ 2020-04-16  6:47 UTC (permalink / raw)
  To: Chengguang Xu; +Cc: Miklos Szeredi, overlayfs

>  > > In case of any unexpected errors, we could set a error limit(for example, 10),
>  > > if link error count exceeds the limit then we disable the feature.
>  > >
>  >
>  > As far as I am concerned, you may post the patch without auto disable,
>  > but I object to using an arbitrary number of errors (10) as trigger for auto
>  > disable.
>  >
>  > I do think that it is important to communicate to admin that the whiteout
>  > sharing is not working as expected and it is not acceptable to flood
>  > log with warning on each and every whiteout creation, not even with
>  > pr_warn_ratelimited.
>  >
>  > There are several ways you can go about this, but here is a suggestion:
>  >
>  > +        if (err) {
>  > +               ofs->whiteout_link_max >>= 1;
>  > +               pr_warn("failed to link whiteout (nlink=%u, err = %i)
>  > - lowering whiteout_link_max to %u\n",
>  > +                             ofs->whiteout->d_inode->i_nlink, err,
>  > ofs->whiteout_link_max);
>  > +               should_link = false;
>  > +               ovl_cleanup(wdir, ofs->whiteout);
>  > +       }
>  >
>
> Frankly speaking, I don't like heuristic method to automatically enable/disable feature,
> so I perfer to disable the feature immediately after hitting any error just like your previous suggestion,

Fine by me.

> and I also hope to add a mount option for this feature to mitigate global effect to different overlayfs instances.
>

I think that would make sense.

Off topic: from system perspective, the fact that whiteouts take up inodes
to begin with is a shame. If one had control over the underlying filesystem,
whiteout should have been implemented as a constant and immutable special
inode with no nlink count at all and getdents would return DT_WHITEOUT for
those entries.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v3] ovl: whiteout inode sharing
  2020-04-16  6:47                 ` Amir Goldstein
@ 2020-04-16  7:10                   ` Miklos Szeredi
  0 siblings, 0 replies; 11+ messages in thread
From: Miklos Szeredi @ 2020-04-16  7:10 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Chengguang Xu, overlayfs

On Thu, Apr 16, 2020 at 8:47 AM Amir Goldstein <amir73il@gmail.com> wrote:
>
> >  > > In case of any unexpected errors, we could set a error limit(for example, 10),
> >  > > if link error count exceeds the limit then we disable the feature.
> >  > >
> >  >
> >  > As far as I am concerned, you may post the patch without auto disable,
> >  > but I object to using an arbitrary number of errors (10) as trigger for auto
> >  > disable.
> >  >
> >  > I do think that it is important to communicate to admin that the whiteout
> >  > sharing is not working as expected and it is not acceptable to flood
> >  > log with warning on each and every whiteout creation, not even with
> >  > pr_warn_ratelimited.
> >  >
> >  > There are several ways you can go about this, but here is a suggestion:
> >  >
> >  > +        if (err) {
> >  > +               ofs->whiteout_link_max >>= 1;
> >  > +               pr_warn("failed to link whiteout (nlink=%u, err = %i)
> >  > - lowering whiteout_link_max to %u\n",
> >  > +                             ofs->whiteout->d_inode->i_nlink, err,
> >  > ofs->whiteout_link_max);
> >  > +               should_link = false;
> >  > +               ovl_cleanup(wdir, ofs->whiteout);
> >  > +       }
> >  >
> >
> > Frankly speaking, I don't like heuristic method to automatically enable/disable feature,
> > so I perfer to disable the feature immediately after hitting any error just like your previous suggestion,
>
> Fine by me.
>
> > and I also hope to add a mount option for this feature to mitigate global effect to different overlayfs instances.
> >
>
> I think that would make sense.
>
> Off topic: from system perspective, the fact that whiteouts take up inodes
> to begin with is a shame. If one had control over the underlying filesystem,
> whiteout should have been implemented as a constant and immutable special
> inode with no nlink count at all and getdents would return DT_WHITEOUT for
> those entries.

Yes, and filesystems are free to implement such a beast.  The
operations are given:  mknod(S_IFCHR, 0) and RENAME_WHITEOUT.  Adding
DT_WHITEOUT handling to overlayfs would be trivial, I guess.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2020-04-16  7:10 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-14  9:53 [PATCH v3] ovl: whiteout inode sharing Chengguang Xu
2020-04-14 13:44 ` Amir Goldstein
2020-04-15  1:03   ` Chengguang Xu
2020-04-15  7:03     ` Amir Goldstein
2020-04-15  8:01       ` Chengguang Xu
2020-04-15  8:12         ` Amir Goldstein
2020-04-15  8:30           ` Chengguang Xu
2020-04-15 11:26             ` Amir Goldstein
2020-04-16  6:30               ` Chengguang Xu
2020-04-16  6:47                 ` Amir Goldstein
2020-04-16  7:10                   ` Miklos Szeredi

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.