All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4] ovl: whiteout inode sharing
@ 2020-04-22 10:27 Chengguang Xu
  2020-04-22 11:50 ` Amir Goldstein
  0 siblings, 1 reply; 10+ messages in thread
From: Chengguang Xu @ 2020-04-22 10:27 UTC (permalink / raw)
  To: miklos, amir73il; +Cc: linux-unionfs, Chengguang Xu

Sharing inode with different whiteout files for saving
inode and speeding up deleting operation.

Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
---
v1->v2:
- Address Amir's comments in v1.

v2->v3:
- Address Amir's comments in v2.
- Rebase on Amir's "Overlayfs use index dir as work dir" patch set.
- Keep at most one whiteout tmpfile in work dir.

v3->v4:
- Disable the feature after link failure.
- Add mount option(whiteout link max) for overlayfs instance.

 fs/overlayfs/dir.c       | 47 ++++++++++++++++++++++++++++++++++------
 fs/overlayfs/overlayfs.h | 10 +++++++--
 fs/overlayfs/ovl_entry.h |  5 +++++
 fs/overlayfs/readdir.c   |  3 ++-
 fs/overlayfs/super.c     | 24 ++++++++++++++++++++
 fs/overlayfs/util.c      |  3 ++-
 6 files changed, 81 insertions(+), 11 deletions(-)

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 279009dee366..8b7d8854f31f 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -62,35 +62,67 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir)
 }
 
 /* caller holds i_mutex on workdir */
-static struct dentry *ovl_whiteout(struct dentry *workdir)
+static struct dentry *ovl_whiteout(struct ovl_fs *ofs, struct dentry *workdir)
 {
 	int err;
+	bool retried = false;
+	bool should_link = (ofs->whiteout_link_max > 1);
 	struct dentry *whiteout;
 	struct inode *wdir = workdir->d_inode;
 
+retry:
 	whiteout = ovl_lookup_temp(workdir);
 	if (IS_ERR(whiteout))
 		return whiteout;
 
+	err = 0;
+	if (should_link) {
+		if (ovl_whiteout_linkable(ofs)) {
+			err = ovl_do_link(ofs->whiteout, wdir, whiteout);
+			if (!err)
+				return whiteout;
+		} else if (ofs->whiteout) {
+			dput(whiteout);
+			whiteout = ofs->whiteout;
+			ofs->whiteout = NULL;
+			return whiteout;
+		}
+
+		if (err) {
+			pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n",
+				ofs->whiteout->d_inode->i_nlink, err);
+			ofs->whiteout_link_max = 0;
+			should_link = false;
+			ovl_cleanup(wdir, ofs->whiteout);
+			dput(ofs->whiteout);
+			ofs->whiteout = NULL;
+		}
+	}
+
 	err = ovl_do_whiteout(wdir, whiteout);
 	if (err) {
 		dput(whiteout);
-		whiteout = ERR_PTR(err);
+		return ERR_PTR(err);
 	}
 
-	return whiteout;
+	if (!should_link || retried)
+		return whiteout;
+
+	ofs->whiteout = whiteout;
+	retried = true;
+	goto retry;
 }
 
 /* Caller must hold i_mutex on both workdir and dir */
-int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
-			     struct dentry *dentry)
+int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *workdir,
+			     struct inode *dir, struct dentry *dentry)
 {
 	struct inode *wdir = workdir->d_inode;
 	struct dentry *whiteout;
 	int err;
 	int flags = 0;
 
-	whiteout = ovl_whiteout(workdir);
+	whiteout = ovl_whiteout(ofs, workdir);
 	err = PTR_ERR(whiteout);
 	if (IS_ERR(whiteout))
 		return err;
@@ -715,6 +747,7 @@ static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
 static int ovl_remove_and_whiteout(struct dentry *dentry,
 				   struct list_head *list)
 {
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 	struct dentry *workdir = ovl_workdir(dentry);
 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
 	struct dentry *upper;
@@ -748,7 +781,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
 		goto out_dput_upper;
 	}
 
-	err = ovl_cleanup_and_whiteout(workdir, d_inode(upperdir), upper);
+	err = ovl_cleanup_and_whiteout(ofs, workdir, d_inode(upperdir), upper);
 	if (err)
 		goto out_d_drop;
 
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index e00b1ff6dea9..3b127c997a6d 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -225,6 +225,12 @@ static inline bool ovl_open_flags_need_copy_up(int flags)
 	return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC));
 }
 
+static inline bool ovl_whiteout_linkable(struct ovl_fs *ofs)
+{
+	return (ofs->whiteout &&
+		ofs->whiteout->d_inode->i_nlink < ofs->whiteout_link_max);
+}
+
 /* util.c */
 int ovl_want_write(struct dentry *dentry);
 void ovl_drop_write(struct dentry *dentry);
@@ -455,8 +461,8 @@ static inline void ovl_copyflags(struct inode *from, struct inode *to)
 
 /* dir.c */
 extern const struct inode_operations ovl_dir_inode_operations;
-int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
-			     struct dentry *dentry);
+int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *workdir,
+			     struct inode *dir, struct dentry *dentry);
 struct ovl_cattr {
 	dev_t rdev;
 	umode_t mode;
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 5762d802fe01..c805c35e0594 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -17,6 +17,7 @@ struct ovl_config {
 	bool nfs_export;
 	int xino;
 	bool metacopy;
+	unsigned int whiteout_link_max;
 };
 
 struct ovl_sb {
@@ -77,6 +78,10 @@ struct ovl_fs {
 	int xino_mode;
 	/* For allocation of non-persistent inode numbers */
 	atomic_long_t last_ino;
+	/* Whiteout dentry cache */
+	struct dentry *whiteout;
+	/* Whiteout max link count */
+	unsigned int whiteout_link_max;
 };
 
 static inline struct ovl_fs *OVL_FS(struct super_block *sb)
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 20f5310d3ee4..bf22fb7792c1 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -1154,7 +1154,8 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
 			 * Whiteout orphan index to block future open by
 			 * handle after overlay nlink dropped to zero.
 			 */
-			err = ovl_cleanup_and_whiteout(indexdir, dir, index);
+			err = ovl_cleanup_and_whiteout(ofs, indexdir, dir,
+						       index);
 		} else {
 			/* Cleanup orphan index entries */
 			err = ovl_cleanup(dir, index);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index f57aa348dcd6..6bccab4d5596 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -26,6 +26,10 @@ struct ovl_dir_cache;
 
 #define OVL_MAX_STACK 500
 
+static unsigned int ovl_whiteout_link_max_def = 60000;
+module_param_named(whiteout_link_max, ovl_whiteout_link_max_def, uint, 0644);
+MODULE_PARM_DESC(whiteout_link_max, "Maximum count of whiteout file link");
+
 static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
 module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
 MODULE_PARM_DESC(redirect_dir,
@@ -219,6 +223,7 @@ static void ovl_free_fs(struct ovl_fs *ofs)
 	iput(ofs->upperdir_trap);
 	dput(ofs->indexdir);
 	dput(ofs->workdir);
+	dput(ofs->whiteout);
 	if (ofs->workdir_locked)
 		ovl_inuse_unlock(ofs->workbasedir);
 	dput(ofs->workbasedir);
@@ -358,6 +363,10 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
 	if (ofs->config.metacopy != ovl_metacopy_def)
 		seq_printf(m, ",metacopy=%s",
 			   ofs->config.metacopy ? "on" : "off");
+	if (ofs->config.whiteout_link_max != ovl_whiteout_link_max_def)
+		seq_printf(m, ",whiteout_link_max=%u",
+			   ofs->config.whiteout_link_max);
+
 	return 0;
 }
 
@@ -398,6 +407,7 @@ enum {
 	OPT_XINO_AUTO,
 	OPT_METACOPY_ON,
 	OPT_METACOPY_OFF,
+	OPT_WHITEOUT_LINK_MAX,
 	OPT_ERR,
 };
 
@@ -416,6 +426,7 @@ static const match_table_t ovl_tokens = {
 	{OPT_XINO_AUTO,			"xino=auto"},
 	{OPT_METACOPY_ON,		"metacopy=on"},
 	{OPT_METACOPY_OFF,		"metacopy=off"},
+	{OPT_WHITEOUT_LINK_MAX,		"whiteout_link_max=%u"},
 	{OPT_ERR,			NULL}
 };
 
@@ -469,6 +480,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 {
 	char *p;
 	int err;
+	int link_max;
 	bool metacopy_opt = false, redirect_opt = false;
 	bool nfs_export_opt = false, index_opt = false;
 
@@ -560,6 +572,13 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 			metacopy_opt = true;
 			break;
 
+		case OPT_WHITEOUT_LINK_MAX:
+			if (match_int(&args[0], &link_max))
+				return -EINVAL;
+			if (link_max < ovl_whiteout_link_max_def)
+				config->whiteout_link_max = link_max;
+			break;
+
 		default:
 			pr_err("unrecognized mount option \"%s\" or missing value\n",
 					p);
@@ -1269,6 +1288,10 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
 		pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n");
 		ofs->config.nfs_export = false;
 	}
+
+	ofs->whiteout_link_max = min_not_zero(
+		ofs->workdir->d_sb->s_max_links,
+		ofs->config.whiteout_link_max ?: 1);
 out:
 	mnt_drop_write(mnt);
 	return err;
@@ -1768,6 +1791,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	ofs->config.nfs_export = ovl_nfs_export_def;
 	ofs->config.xino = ovl_xino_def();
 	ofs->config.metacopy = ovl_metacopy_def;
+	ofs->config.whiteout_link_max = ovl_whiteout_link_max_def;
 	err = ovl_parse_opt((char *) data, &ofs->config);
 	if (err)
 		goto out_err;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 36b60788ee47..18df65ee81a8 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -669,6 +669,7 @@ bool ovl_need_index(struct dentry *dentry)
 /* Caller must hold OVL_I(inode)->lock */
 static void ovl_cleanup_index(struct dentry *dentry)
 {
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 	struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
 	struct inode *dir = indexdir->d_inode;
 	struct dentry *lowerdentry = ovl_dentry_lower(dentry);
@@ -707,7 +708,7 @@ static void ovl_cleanup_index(struct dentry *dentry)
 		index = NULL;
 	} else if (ovl_index_all(dentry->d_sb)) {
 		/* Whiteout orphan index to block future open by handle */
-		err = ovl_cleanup_and_whiteout(indexdir, dir, index);
+		err = ovl_cleanup_and_whiteout(ofs, indexdir, dir, index);
 	} else {
 		/* Cleanup orphan index entries */
 		err = ovl_cleanup(dir, index);
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v4] ovl: whiteout inode sharing
  2020-04-22 10:27 [PATCH v4] ovl: whiteout inode sharing Chengguang Xu
@ 2020-04-22 11:50 ` Amir Goldstein
  2020-04-23  1:17   ` Chengguang Xu
  0 siblings, 1 reply; 10+ messages in thread
From: Amir Goldstein @ 2020-04-22 11:50 UTC (permalink / raw)
  To: Chengguang Xu; +Cc: Miklos Szeredi, overlayfs

On Wed, Apr 22, 2020 at 1:28 PM Chengguang Xu <cgxu519@mykernel.net> wrote:
>
> Sharing inode with different whiteout files for saving
> inode and speeding up deleting operation.
>
> Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>

Reviewed-by: Amir Goldstein <amir73il@gmail.com>

Just one question...

> ---
> v1->v2:
> - Address Amir's comments in v1.
>
> v2->v3:
> - Address Amir's comments in v2.
> - Rebase on Amir's "Overlayfs use index dir as work dir" patch set.
> - Keep at most one whiteout tmpfile in work dir.
>
> v3->v4:
> - Disable the feature after link failure.
> - Add mount option(whiteout link max) for overlayfs instance.
>
>  fs/overlayfs/dir.c       | 47 ++++++++++++++++++++++++++++++++++------
>  fs/overlayfs/overlayfs.h | 10 +++++++--
>  fs/overlayfs/ovl_entry.h |  5 +++++
>  fs/overlayfs/readdir.c   |  3 ++-
>  fs/overlayfs/super.c     | 24 ++++++++++++++++++++
>  fs/overlayfs/util.c      |  3 ++-
>  6 files changed, 81 insertions(+), 11 deletions(-)
>
> diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
> index 279009dee366..8b7d8854f31f 100644
> --- a/fs/overlayfs/dir.c
> +++ b/fs/overlayfs/dir.c
> @@ -62,35 +62,67 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir)
>  }
>
>  /* caller holds i_mutex on workdir */
> -static struct dentry *ovl_whiteout(struct dentry *workdir)
> +static struct dentry *ovl_whiteout(struct ovl_fs *ofs, struct dentry *workdir)
>  {
>         int err;
> +       bool retried = false;
> +       bool should_link = (ofs->whiteout_link_max > 1);
>         struct dentry *whiteout;
>         struct inode *wdir = workdir->d_inode;
>
> +retry:
>         whiteout = ovl_lookup_temp(workdir);
>         if (IS_ERR(whiteout))
>                 return whiteout;
>
> +       err = 0;
> +       if (should_link) {
> +               if (ovl_whiteout_linkable(ofs)) {
> +                       err = ovl_do_link(ofs->whiteout, wdir, whiteout);
> +                       if (!err)
> +                               return whiteout;
> +               } else if (ofs->whiteout) {
> +                       dput(whiteout);
> +                       whiteout = ofs->whiteout;
> +                       ofs->whiteout = NULL;
> +                       return whiteout;
> +               }
> +
> +               if (err) {
> +                       pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n",
> +                               ofs->whiteout->d_inode->i_nlink, err);
> +                       ofs->whiteout_link_max = 0;
> +                       should_link = false;
> +                       ovl_cleanup(wdir, ofs->whiteout);
> +                       dput(ofs->whiteout);
> +                       ofs->whiteout = NULL;
> +               }
> +       }
> +
>         err = ovl_do_whiteout(wdir, whiteout);
>         if (err) {
>                 dput(whiteout);
> -               whiteout = ERR_PTR(err);
> +               return ERR_PTR(err);
>         }
>
> -       return whiteout;
> +       if (!should_link || retried)
> +               return whiteout;
> +
> +       ofs->whiteout = whiteout;
> +       retried = true;
> +       goto retry;
>  }
>
>  /* Caller must hold i_mutex on both workdir and dir */
> -int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
> -                            struct dentry *dentry)
> +int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *workdir,
> +                            struct inode *dir, struct dentry *dentry)
>  {
>         struct inode *wdir = workdir->d_inode;
>         struct dentry *whiteout;
>         int err;
>         int flags = 0;
>
> -       whiteout = ovl_whiteout(workdir);
> +       whiteout = ovl_whiteout(ofs, workdir);
>         err = PTR_ERR(whiteout);
>         if (IS_ERR(whiteout))
>                 return err;
> @@ -715,6 +747,7 @@ static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
>  static int ovl_remove_and_whiteout(struct dentry *dentry,
>                                    struct list_head *list)
>  {
> +       struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
>         struct dentry *workdir = ovl_workdir(dentry);
>         struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
>         struct dentry *upper;
> @@ -748,7 +781,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
>                 goto out_dput_upper;
>         }
>
> -       err = ovl_cleanup_and_whiteout(workdir, d_inode(upperdir), upper);
> +       err = ovl_cleanup_and_whiteout(ofs, workdir, d_inode(upperdir), upper);
>         if (err)
>                 goto out_d_drop;
>
> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> index e00b1ff6dea9..3b127c997a6d 100644
> --- a/fs/overlayfs/overlayfs.h
> +++ b/fs/overlayfs/overlayfs.h
> @@ -225,6 +225,12 @@ static inline bool ovl_open_flags_need_copy_up(int flags)
>         return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC));
>  }
>
> +static inline bool ovl_whiteout_linkable(struct ovl_fs *ofs)
> +{
> +       return (ofs->whiteout &&
> +               ofs->whiteout->d_inode->i_nlink < ofs->whiteout_link_max);
> +}
> +
>  /* util.c */
>  int ovl_want_write(struct dentry *dentry);
>  void ovl_drop_write(struct dentry *dentry);
> @@ -455,8 +461,8 @@ static inline void ovl_copyflags(struct inode *from, struct inode *to)
>
>  /* dir.c */
>  extern const struct inode_operations ovl_dir_inode_operations;
> -int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
> -                            struct dentry *dentry);
> +int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *workdir,
> +                            struct inode *dir, struct dentry *dentry);
>  struct ovl_cattr {
>         dev_t rdev;
>         umode_t mode;
> diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
> index 5762d802fe01..c805c35e0594 100644
> --- a/fs/overlayfs/ovl_entry.h
> +++ b/fs/overlayfs/ovl_entry.h
> @@ -17,6 +17,7 @@ struct ovl_config {
>         bool nfs_export;
>         int xino;
>         bool metacopy;
> +       unsigned int whiteout_link_max;
>  };
>
>  struct ovl_sb {
> @@ -77,6 +78,10 @@ struct ovl_fs {
>         int xino_mode;
>         /* For allocation of non-persistent inode numbers */
>         atomic_long_t last_ino;
> +       /* Whiteout dentry cache */
> +       struct dentry *whiteout;
> +       /* Whiteout max link count */
> +       unsigned int whiteout_link_max;
>  };
>
>  static inline struct ovl_fs *OVL_FS(struct super_block *sb)
> diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
> index 20f5310d3ee4..bf22fb7792c1 100644
> --- a/fs/overlayfs/readdir.c
> +++ b/fs/overlayfs/readdir.c
> @@ -1154,7 +1154,8 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
>                          * Whiteout orphan index to block future open by
>                          * handle after overlay nlink dropped to zero.
>                          */
> -                       err = ovl_cleanup_and_whiteout(indexdir, dir, index);
> +                       err = ovl_cleanup_and_whiteout(ofs, indexdir, dir,
> +                                                      index);
>                 } else {
>                         /* Cleanup orphan index entries */
>                         err = ovl_cleanup(dir, index);
> diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
> index f57aa348dcd6..6bccab4d5596 100644
> --- a/fs/overlayfs/super.c
> +++ b/fs/overlayfs/super.c
> @@ -26,6 +26,10 @@ struct ovl_dir_cache;
>
>  #define OVL_MAX_STACK 500
>
> +static unsigned int ovl_whiteout_link_max_def = 60000;
> +module_param_named(whiteout_link_max, ovl_whiteout_link_max_def, uint, 0644);
> +MODULE_PARM_DESC(whiteout_link_max, "Maximum count of whiteout file link");
> +
>  static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
>  module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
>  MODULE_PARM_DESC(redirect_dir,
> @@ -219,6 +223,7 @@ static void ovl_free_fs(struct ovl_fs *ofs)
>         iput(ofs->upperdir_trap);
>         dput(ofs->indexdir);
>         dput(ofs->workdir);
> +       dput(ofs->whiteout);
>         if (ofs->workdir_locked)
>                 ovl_inuse_unlock(ofs->workbasedir);
>         dput(ofs->workbasedir);
> @@ -358,6 +363,10 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
>         if (ofs->config.metacopy != ovl_metacopy_def)
>                 seq_printf(m, ",metacopy=%s",
>                            ofs->config.metacopy ? "on" : "off");
> +       if (ofs->config.whiteout_link_max != ovl_whiteout_link_max_def)
> +               seq_printf(m, ",whiteout_link_max=%u",
> +                          ofs->config.whiteout_link_max);
> +
>         return 0;
>  }
>
> @@ -398,6 +407,7 @@ enum {
>         OPT_XINO_AUTO,
>         OPT_METACOPY_ON,
>         OPT_METACOPY_OFF,
> +       OPT_WHITEOUT_LINK_MAX,
>         OPT_ERR,
>  };
>
> @@ -416,6 +426,7 @@ static const match_table_t ovl_tokens = {
>         {OPT_XINO_AUTO,                 "xino=auto"},
>         {OPT_METACOPY_ON,               "metacopy=on"},
>         {OPT_METACOPY_OFF,              "metacopy=off"},
> +       {OPT_WHITEOUT_LINK_MAX,         "whiteout_link_max=%u"},
>         {OPT_ERR,                       NULL}
>  };
>
> @@ -469,6 +480,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
>  {
>         char *p;
>         int err;
> +       int link_max;
>         bool metacopy_opt = false, redirect_opt = false;
>         bool nfs_export_opt = false, index_opt = false;
>
> @@ -560,6 +572,13 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
>                         metacopy_opt = true;
>                         break;
>
> +               case OPT_WHITEOUT_LINK_MAX:
> +                       if (match_int(&args[0], &link_max))
> +                               return -EINVAL;
> +                       if (link_max < ovl_whiteout_link_max_def)
> +                               config->whiteout_link_max = link_max;

Why not allow link_max > ovl_whiteout_link_max_def?
admin may want to disable ovl_whiteout_link_max_def by default
in module parameter, but allow it for specific overlay instances.

In any case, if we do have a good reason to ignore user configurable
value we should pr_warn about it and explain to users what they
need to do to overcome the miss configuration issue.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4] ovl: whiteout inode sharing
  2020-04-22 11:50 ` Amir Goldstein
@ 2020-04-23  1:17   ` Chengguang Xu
  2020-04-24  6:02     ` Amir Goldstein
  0 siblings, 1 reply; 10+ messages in thread
From: Chengguang Xu @ 2020-04-23  1:17 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Miklos Szeredi, overlayfs

 ---- 在 星期三, 2020-04-22 19:50:30 Amir Goldstein <amir73il@gmail.com> 撰写 ----
 > On Wed, Apr 22, 2020 at 1:28 PM Chengguang Xu <cgxu519@mykernel.net> wrote:
 > >
 > > Sharing inode with different whiteout files for saving
 > > inode and speeding up deleting operation.
 > >
 > > Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
 > 
 > Reviewed-by: Amir Goldstein <amir73il@gmail.com>
 > 
 > Just one question...
 > 
 > > ---
 > > v1->v2:
 > > - Address Amir's comments in v1.
 > >
 > > v2->v3:
 > > - Address Amir's comments in v2.
 > > - Rebase on Amir's "Overlayfs use index dir as work dir" patch set.
 > > - Keep at most one whiteout tmpfile in work dir.
 > >
 > > v3->v4:
 > > - Disable the feature after link failure.
 > > - Add mount option(whiteout link max) for overlayfs instance.
 > >
 > >  fs/overlayfs/dir.c       | 47 ++++++++++++++++++++++++++++++++++------
 > >  fs/overlayfs/overlayfs.h | 10 +++++++--
 > >  fs/overlayfs/ovl_entry.h |  5 +++++
 > >  fs/overlayfs/readdir.c   |  3 ++-
 > >  fs/overlayfs/super.c     | 24 ++++++++++++++++++++
 > >  fs/overlayfs/util.c      |  3 ++-
 > >  6 files changed, 81 insertions(+), 11 deletions(-)
 > >
 > > diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
 > > index 279009dee366..8b7d8854f31f 100644
 > > --- a/fs/overlayfs/dir.c
 > > +++ b/fs/overlayfs/dir.c
 > > @@ -62,35 +62,67 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir)
 > >  }
 > >
 > >  /* caller holds i_mutex on workdir */
 > > -static struct dentry *ovl_whiteout(struct dentry *workdir)
 > > +static struct dentry *ovl_whiteout(struct ovl_fs *ofs, struct dentry *workdir)
 > >  {
 > >         int err;
 > > +       bool retried = false;
 > > +       bool should_link = (ofs->whiteout_link_max > 1);
 > >         struct dentry *whiteout;
 > >         struct inode *wdir = workdir->d_inode;
 > >
 > > +retry:
 > >         whiteout = ovl_lookup_temp(workdir);
 > >         if (IS_ERR(whiteout))
 > >                 return whiteout;
 > >
 > > +       err = 0;
 > > +       if (should_link) {
 > > +               if (ovl_whiteout_linkable(ofs)) {
 > > +                       err = ovl_do_link(ofs->whiteout, wdir, whiteout);
 > > +                       if (!err)
 > > +                               return whiteout;
 > > +               } else if (ofs->whiteout) {
 > > +                       dput(whiteout);
 > > +                       whiteout = ofs->whiteout;
 > > +                       ofs->whiteout = NULL;
 > > +                       return whiteout;
 > > +               }
 > > +
 > > +               if (err) {
 > > +                       pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n",
 > > +                               ofs->whiteout->d_inode->i_nlink, err);
 > > +                       ofs->whiteout_link_max = 0;
 > > +                       should_link = false;
 > > +                       ovl_cleanup(wdir, ofs->whiteout);
 > > +                       dput(ofs->whiteout);
 > > +                       ofs->whiteout = NULL;
 > > +               }
 > > +       }
 > > +
 > >         err = ovl_do_whiteout(wdir, whiteout);
 > >         if (err) {
 > >                 dput(whiteout);
 > > -               whiteout = ERR_PTR(err);
 > > +               return ERR_PTR(err);
 > >         }
 > >
 > > -       return whiteout;
 > > +       if (!should_link || retried)
 > > +               return whiteout;
 > > +
 > > +       ofs->whiteout = whiteout;
 > > +       retried = true;
 > > +       goto retry;
 > >  }
 > >
 > >  /* Caller must hold i_mutex on both workdir and dir */
 > > -int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
 > > -                            struct dentry *dentry)
 > > +int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *workdir,
 > > +                            struct inode *dir, struct dentry *dentry)
 > >  {
 > >         struct inode *wdir = workdir->d_inode;
 > >         struct dentry *whiteout;
 > >         int err;
 > >         int flags = 0;
 > >
 > > -       whiteout = ovl_whiteout(workdir);
 > > +       whiteout = ovl_whiteout(ofs, workdir);
 > >         err = PTR_ERR(whiteout);
 > >         if (IS_ERR(whiteout))
 > >                 return err;
 > > @@ -715,6 +747,7 @@ static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
 > >  static int ovl_remove_and_whiteout(struct dentry *dentry,
 > >                                    struct list_head *list)
 > >  {
 > > +       struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
 > >         struct dentry *workdir = ovl_workdir(dentry);
 > >         struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
 > >         struct dentry *upper;
 > > @@ -748,7 +781,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
 > >                 goto out_dput_upper;
 > >         }
 > >
 > > -       err = ovl_cleanup_and_whiteout(workdir, d_inode(upperdir), upper);
 > > +       err = ovl_cleanup_and_whiteout(ofs, workdir, d_inode(upperdir), upper);
 > >         if (err)
 > >                 goto out_d_drop;
 > >
 > > diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
 > > index e00b1ff6dea9..3b127c997a6d 100644
 > > --- a/fs/overlayfs/overlayfs.h
 > > +++ b/fs/overlayfs/overlayfs.h
 > > @@ -225,6 +225,12 @@ static inline bool ovl_open_flags_need_copy_up(int flags)
 > >         return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC));
 > >  }
 > >
 > > +static inline bool ovl_whiteout_linkable(struct ovl_fs *ofs)
 > > +{
 > > +       return (ofs->whiteout &&
 > > +               ofs->whiteout->d_inode->i_nlink < ofs->whiteout_link_max);
 > > +}
 > > +
 > >  /* util.c */
 > >  int ovl_want_write(struct dentry *dentry);
 > >  void ovl_drop_write(struct dentry *dentry);
 > > @@ -455,8 +461,8 @@ static inline void ovl_copyflags(struct inode *from, struct inode *to)
 > >
 > >  /* dir.c */
 > >  extern const struct inode_operations ovl_dir_inode_operations;
 > > -int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
 > > -                            struct dentry *dentry);
 > > +int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *workdir,
 > > +                            struct inode *dir, struct dentry *dentry);
 > >  struct ovl_cattr {
 > >         dev_t rdev;
 > >         umode_t mode;
 > > diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
 > > index 5762d802fe01..c805c35e0594 100644
 > > --- a/fs/overlayfs/ovl_entry.h
 > > +++ b/fs/overlayfs/ovl_entry.h
 > > @@ -17,6 +17,7 @@ struct ovl_config {
 > >         bool nfs_export;
 > >         int xino;
 > >         bool metacopy;
 > > +       unsigned int whiteout_link_max;
 > >  };
 > >
 > >  struct ovl_sb {
 > > @@ -77,6 +78,10 @@ struct ovl_fs {
 > >         int xino_mode;
 > >         /* For allocation of non-persistent inode numbers */
 > >         atomic_long_t last_ino;
 > > +       /* Whiteout dentry cache */
 > > +       struct dentry *whiteout;
 > > +       /* Whiteout max link count */
 > > +       unsigned int whiteout_link_max;
 > >  };
 > >
 > >  static inline struct ovl_fs *OVL_FS(struct super_block *sb)
 > > diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
 > > index 20f5310d3ee4..bf22fb7792c1 100644
 > > --- a/fs/overlayfs/readdir.c
 > > +++ b/fs/overlayfs/readdir.c
 > > @@ -1154,7 +1154,8 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
 > >                          * Whiteout orphan index to block future open by
 > >                          * handle after overlay nlink dropped to zero.
 > >                          */
 > > -                       err = ovl_cleanup_and_whiteout(indexdir, dir, index);
 > > +                       err = ovl_cleanup_and_whiteout(ofs, indexdir, dir,
 > > +                                                      index);
 > >                 } else {
 > >                         /* Cleanup orphan index entries */
 > >                         err = ovl_cleanup(dir, index);
 > > diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
 > > index f57aa348dcd6..6bccab4d5596 100644
 > > --- a/fs/overlayfs/super.c
 > > +++ b/fs/overlayfs/super.c
 > > @@ -26,6 +26,10 @@ struct ovl_dir_cache;
 > >
 > >  #define OVL_MAX_STACK 500
 > >
 > > +static unsigned int ovl_whiteout_link_max_def = 60000;
 > > +module_param_named(whiteout_link_max, ovl_whiteout_link_max_def, uint, 0644);
 > > +MODULE_PARM_DESC(whiteout_link_max, "Maximum count of whiteout file link");
 > > +
 > >  static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
 > >  module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
 > >  MODULE_PARM_DESC(redirect_dir,
 > > @@ -219,6 +223,7 @@ static void ovl_free_fs(struct ovl_fs *ofs)
 > >         iput(ofs->upperdir_trap);
 > >         dput(ofs->indexdir);
 > >         dput(ofs->workdir);
 > > +       dput(ofs->whiteout);
 > >         if (ofs->workdir_locked)
 > >                 ovl_inuse_unlock(ofs->workbasedir);
 > >         dput(ofs->workbasedir);
 > > @@ -358,6 +363,10 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
 > >         if (ofs->config.metacopy != ovl_metacopy_def)
 > >                 seq_printf(m, ",metacopy=%s",
 > >                            ofs->config.metacopy ? "on" : "off");
 > > +       if (ofs->config.whiteout_link_max != ovl_whiteout_link_max_def)
 > > +               seq_printf(m, ",whiteout_link_max=%u",
 > > +                          ofs->config.whiteout_link_max);
 > > +
 > >         return 0;
 > >  }
 > >
 > > @@ -398,6 +407,7 @@ enum {
 > >         OPT_XINO_AUTO,
 > >         OPT_METACOPY_ON,
 > >         OPT_METACOPY_OFF,
 > > +       OPT_WHITEOUT_LINK_MAX,
 > >         OPT_ERR,
 > >  };
 > >
 > > @@ -416,6 +426,7 @@ static const match_table_t ovl_tokens = {
 > >         {OPT_XINO_AUTO,                 "xino=auto"},
 > >         {OPT_METACOPY_ON,               "metacopy=on"},
 > >         {OPT_METACOPY_OFF,              "metacopy=off"},
 > > +       {OPT_WHITEOUT_LINK_MAX,         "whiteout_link_max=%u"},
 > >         {OPT_ERR,                       NULL}
 > >  };
 > >
 > > @@ -469,6 +480,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 > >  {
 > >         char *p;
 > >         int err;
 > > +       int link_max;
 > >         bool metacopy_opt = false, redirect_opt = false;
 > >         bool nfs_export_opt = false, index_opt = false;
 > >
 > > @@ -560,6 +572,13 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 > >                         metacopy_opt = true;
 > >                         break;
 > >
 > > +               case OPT_WHITEOUT_LINK_MAX:
 > > +                       if (match_int(&args[0], &link_max))
 > > +                               return -EINVAL;
 > > +                       if (link_max < ovl_whiteout_link_max_def)
 > > +                               config->whiteout_link_max = link_max;
 > 
 > Why not allow link_max > ovl_whiteout_link_max_def?
 > admin may want to disable ovl_whiteout_link_max_def by default
 > in module parameter, but allow it for specific overlay instances.
 > 

In this use case, seems we don't need module param any more, we just need to set  default value for option.

I would like to treate module param as a total switch, so that it could disable the feathre for all instances at the same time.
I think sometimes it's helpful for lazy admin(like me).


 > In any case, if we do have a good reason to ignore user configurable
 > value we should pr_warn about it and explain to users what they
 > need to do to overcome the miss configuration issue.
 > 

Agree, let me add a warn for this case.

Thanks,
cgxu

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4] ovl: whiteout inode sharing
  2020-04-23  1:17   ` Chengguang Xu
@ 2020-04-24  6:02     ` Amir Goldstein
  2020-04-24  6:26       ` Chengguang Xu
  0 siblings, 1 reply; 10+ messages in thread
From: Amir Goldstein @ 2020-04-24  6:02 UTC (permalink / raw)
  To: Chengguang Xu; +Cc: Miklos Szeredi, overlayfs

>  > > +               case OPT_WHITEOUT_LINK_MAX:
>  > > +                       if (match_int(&args[0], &link_max))
>  > > +                               return -EINVAL;
>  > > +                       if (link_max < ovl_whiteout_link_max_def)
>  > > +                               config->whiteout_link_max = link_max;
>  >
>  > Why not allow link_max > ovl_whiteout_link_max_def?
>  > admin may want to disable ovl_whiteout_link_max_def by default
>  > in module parameter, but allow it for specific overlay instances.
>  >
>
> In this use case, seems we don't need module param any more, we just need to set  default value for option.
>
> I would like to treate module param as a total switch, so that it could disable the feathre for all instances at the same time.
> I think sometimes it's helpful for lazy admin(like me).
>

I am not convinced.

lazy admin could very well want to disable whiteout_link_max by default,
but allow user to specify whiteout_link_max for a specific mount.

In fact, in order to preserve existing behavior and not cause regression with
some special filesystems, distros could decide that default disabled is
a reasonable choice.

I don't understand at all what the purpose of this limitation is.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4] ovl: whiteout inode sharing
  2020-04-24  6:02     ` Amir Goldstein
@ 2020-04-24  6:26       ` Chengguang Xu
  2020-04-24 14:49         ` Amir Goldstein
  0 siblings, 1 reply; 10+ messages in thread
From: Chengguang Xu @ 2020-04-24  6:26 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Miklos Szeredi, overlayfs

 ---- 在 星期五, 2020-04-24 14:02:00 Amir Goldstein <amir73il@gmail.com> 撰写 ----
 > >  > > +               case OPT_WHITEOUT_LINK_MAX:
 > >  > > +                       if (match_int(&args[0], &link_max))
 > >  > > +                               return -EINVAL;
 > >  > > +                       if (link_max < ovl_whiteout_link_max_def)
 > >  > > +                               config->whiteout_link_max = link_max;
 > >  >
 > >  > Why not allow link_max > ovl_whiteout_link_max_def?
 > >  > admin may want to disable ovl_whiteout_link_max_def by default
 > >  > in module parameter, but allow it for specific overlay instances.
 > >  >
 > >
 > > In this use case, seems we don't need module param any more, we just need to set  default value for option.
 > >
 > > I would like to treate module param as a total switch, so that it could disable the feathre for all instances at the same time.
 > > I think sometimes it's helpful for lazy admin(like me).
 > >
 > 
 > I am not convinced.
 > 
 > lazy admin could very well want to disable whiteout_link_max by default,
 > but allow user to specify whiteout_link_max for a specific mount.
 > 
 > In fact, in order to preserve existing behavior and not cause regression with
 > some special filesystems, distros could decide that default disabled is
 > a reasonable choice.
 > 
 > I don't understand at all what the purpose of this limitation is.
 > 

If user sets a ridiculous  link_max which is larger than valid range of upper fs, I think it is hard to verify in the stage of option parsing.
So I hope to fix the upper limit using module parameter, we can set default mount option to  0/1 for the use case you mentioned above.


Thanks,
cgxu



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4] ovl: whiteout inode sharing
  2020-04-24  6:26       ` Chengguang Xu
@ 2020-04-24 14:49         ` Amir Goldstein
  2020-04-28 12:21           ` Miklos Szeredi
  0 siblings, 1 reply; 10+ messages in thread
From: Amir Goldstein @ 2020-04-24 14:49 UTC (permalink / raw)
  To: Chengguang Xu; +Cc: Miklos Szeredi, overlayfs

On Fri, Apr 24, 2020 at 9:26 AM Chengguang Xu <cgxu519@mykernel.net> wrote:
>
>  ---- 在 星期五, 2020-04-24 14:02:00 Amir Goldstein <amir73il@gmail.com> 撰写 ----
>  > >  > > +               case OPT_WHITEOUT_LINK_MAX:
>  > >  > > +                       if (match_int(&args[0], &link_max))
>  > >  > > +                               return -EINVAL;
>  > >  > > +                       if (link_max < ovl_whiteout_link_max_def)
>  > >  > > +                               config->whiteout_link_max = link_max;
>  > >  >
>  > >  > Why not allow link_max > ovl_whiteout_link_max_def?
>  > >  > admin may want to disable ovl_whiteout_link_max_def by default
>  > >  > in module parameter, but allow it for specific overlay instances.
>  > >  >
>  > >
>  > > In this use case, seems we don't need module param any more, we just need to set  default value for option.
>  > >
>  > > I would like to treate module param as a total switch, so that it could disable the feathre for all instances at the same time.
>  > > I think sometimes it's helpful for lazy admin(like me).
>  > >
>  >
>  > I am not convinced.
>  >
>  > lazy admin could very well want to disable whiteout_link_max by default,
>  > but allow user to specify whiteout_link_max for a specific mount.
>  >
>  > In fact, in order to preserve existing behavior and not cause regression with
>  > some special filesystems, distros could decide that default disabled is
>  > a reasonable choice.
>  >
>  > I don't understand at all what the purpose of this limitation is.
>  >
>
> If user sets a ridiculous  link_max which is larger than valid range of upper fs, I think it is hard to verify in the stage of option parsing.
> So I hope to fix the upper limit using module parameter, we can set default mount option to  0/1 for the use case you mentioned above.
>

I didn't mean we need to check if link_max  is valid range for upper fs.
We anyway use minimum of user requested value and upper fs max.

Frankly, I think there are only two sane options for system wide configuration:
1. disable whiteout link
2. enable whiteout link with ofs->workdir->d_sb->s_max_links

So perhaps the module param should be a boolean ovl_whiteout_link_def?
Perhaps Kconfig should determine the build time default.

Setting whiteout_link smaller than d_sb->s_max_links should be
possible via mount option.

We may want to support the mount options:
whiteout_link_max=<N>
whiteout_link_max=auto

It should be simple to parse whiteout_link_max=auto, just
set config->whiteout_link_max to max uint and let later code
reduce it to upper fs max.

For ovl_show_options() is slightly more complicated to get right.

I am not hooked on any of the ideas above, but I find the current
configuration options in v4/v5 not good enough.

As an exercise you can try to document those options and
see how clear the text is.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4] ovl: whiteout inode sharing
  2020-04-24 14:49         ` Amir Goldstein
@ 2020-04-28 12:21           ` Miklos Szeredi
  2020-04-28 13:15             ` Amir Goldstein
  0 siblings, 1 reply; 10+ messages in thread
From: Miklos Szeredi @ 2020-04-28 12:21 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Chengguang Xu, overlayfs

On Fri, Apr 24, 2020 at 05:49:00PM +0300, Amir Goldstein wrote:

> I didn't mean we need to check if link_max  is valid range for upper fs.
> We anyway use minimum of user requested value and upper fs max.
> 
> Frankly, I think there are only two sane options for system wide configuration:
> 1. disable whiteout link
> 2. enable whiteout link with ofs->workdir->d_sb->s_max_links

And that one doesn't work, see for example ext4, which defines EXT4_LINK_MAX but
doesn't set s_max_links.  This could be fixed, but using EMLINK to detect the
max-link condition is simpler and more reliable.

And I don't really see a reason to disable whiteout hard links.  What scenario
would that be useful in?

Updated patch below.  Changes from v5:

 - fix a missing dput on shutdown
 - don't poass workdir to ovl_cleanup_and_whiteout/ovl_whiteout
 - flatten out retry loop in ovl_whiteout
 - use EMLINK to distinguish max-links from misc failure

Thanks,
Miklos

---
From: Chengguang Xu <cgxu519@mykernel.net>
Subject: ovl: whiteout inode sharing
Date: Fri, 24 Apr 2020 10:55:17 +0800

Share inode with different whiteout files for saving inode and speeding up
delete operation.

If EMLINK is encountered when linking a shared whiteout, create a new one.
In case of any other error, disable sharing for this super block.

Note: ofs->whiteout is protected by inode lock on workdir.

Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/dir.c       |   49 +++++++++++++++++++++++++++++++++++------------
 fs/overlayfs/overlayfs.h |    2 -
 fs/overlayfs/ovl_entry.h |    3 ++
 fs/overlayfs/readdir.c   |    2 -
 fs/overlayfs/super.c     |    4 +++
 fs/overlayfs/util.c      |    3 +-
 6 files changed, 48 insertions(+), 15 deletions(-)

--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -62,35 +62,59 @@ struct dentry *ovl_lookup_temp(struct de
 }
 
 /* caller holds i_mutex on workdir */
-static struct dentry *ovl_whiteout(struct dentry *workdir)
+static struct dentry *ovl_whiteout(struct ovl_fs *ofs)
 {
 	int err;
 	struct dentry *whiteout;
+	struct dentry *workdir = ofs->workdir;
 	struct inode *wdir = workdir->d_inode;
 
-	whiteout = ovl_lookup_temp(workdir);
-	if (IS_ERR(whiteout))
-		return whiteout;
+	if (!ofs->whiteout) {
+		whiteout = ovl_lookup_temp(workdir);
+		if (IS_ERR(whiteout))
+			return whiteout;
+
+		err = ovl_do_whiteout(wdir, whiteout);
+		if (err) {
+			dput(whiteout);
+			return ERR_PTR(err);
+		}
+		ofs->whiteout = whiteout;
+	}
 
-	err = ovl_do_whiteout(wdir, whiteout);
-	if (err) {
+	if (ofs->share_whiteout) {
+		whiteout = ovl_lookup_temp(workdir);
+		if (IS_ERR(whiteout))
+			goto fallback;
+
+		err = ovl_do_link(ofs->whiteout, wdir, whiteout);
+		if (!err)
+			goto success;
+
+		if (err != -EMLINK) {
+			pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n",
+				ofs->whiteout->d_inode->i_nlink, err);
+			ofs->share_whiteout = false;
+		}
 		dput(whiteout);
-		whiteout = ERR_PTR(err);
 	}
-
+fallback:
+	whiteout = ofs->whiteout;
+	ofs->whiteout = NULL;
+success:
 	return whiteout;
 }
 
 /* Caller must hold i_mutex on both workdir and dir */
-int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
+int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir,
 			     struct dentry *dentry)
 {
-	struct inode *wdir = workdir->d_inode;
+	struct inode *wdir = ofs->workdir->d_inode;
 	struct dentry *whiteout;
 	int err;
 	int flags = 0;
 
-	whiteout = ovl_whiteout(workdir);
+	whiteout = ovl_whiteout(ofs);
 	err = PTR_ERR(whiteout);
 	if (IS_ERR(whiteout))
 		return err;
@@ -715,6 +739,7 @@ static bool ovl_matches_upper(struct den
 static int ovl_remove_and_whiteout(struct dentry *dentry,
 				   struct list_head *list)
 {
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
 	struct dentry *workdir = ovl_workdir(dentry);
 	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
 	struct dentry *upper;
@@ -748,7 +773,7 @@ static int ovl_remove_and_whiteout(struc
 		goto out_dput_upper;
 	}
 
-	err = ovl_cleanup_and_whiteout(workdir, d_inode(upperdir), upper);
+	err = ovl_cleanup_and_whiteout(ofs, d_inode(upperdir), upper);
 	if (err)
 		goto out_d_drop;
 
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -455,7 +455,7 @@ static inline void ovl_copyflags(struct
 
 /* dir.c */
 extern const struct inode_operations ovl_dir_inode_operations;
-int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
+int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir,
 			     struct dentry *dentry);
 struct ovl_cattr {
 	dev_t rdev;
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -68,6 +68,7 @@ struct ovl_fs {
 	/* Did we take the inuse lock? */
 	bool upperdir_locked;
 	bool workdir_locked;
+	bool share_whiteout;
 	/* Traps in ovl inode cache */
 	struct inode *upperdir_trap;
 	struct inode *workbasedir_trap;
@@ -77,6 +78,8 @@ struct ovl_fs {
 	int xino_mode;
 	/* For allocation of non-persistent inode numbers */
 	atomic_long_t last_ino;
+	/* Whiteout dentry cache */
+	struct dentry *whiteout;
 };
 
 static inline struct ovl_fs *OVL_FS(struct super_block *sb)
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -1154,7 +1154,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *
 			 * Whiteout orphan index to block future open by
 			 * handle after overlay nlink dropped to zero.
 			 */
-			err = ovl_cleanup_and_whiteout(indexdir, dir, index);
+			err = ovl_cleanup_and_whiteout(ofs, dir, index);
 		} else {
 			/* Cleanup orphan index entries */
 			err = ovl_cleanup(dir, index);
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -707,7 +707,8 @@ static void ovl_cleanup_index(struct den
 		index = NULL;
 	} else if (ovl_index_all(dentry->d_sb)) {
 		/* Whiteout orphan index to block future open by handle */
-		err = ovl_cleanup_and_whiteout(indexdir, dir, index);
+		err = ovl_cleanup_and_whiteout(OVL_FS(dentry->d_sb),
+					       dir, index);
 	} else {
 		/* Cleanup orphan index entries */
 		err = ovl_cleanup(dir, index);
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -217,6 +217,7 @@ static void ovl_free_fs(struct ovl_fs *o
 	iput(ofs->indexdir_trap);
 	iput(ofs->workdir_trap);
 	iput(ofs->upperdir_trap);
+	dput(ofs->whiteout);
 	dput(ofs->indexdir);
 	dput(ofs->workdir);
 	if (ofs->workdir_locked)
@@ -1776,6 +1777,9 @@ static int ovl_fill_super(struct super_b
 	if (!cred)
 		goto out_err;
 
+	/* Is there a reason anyone would want not to share whiteouts? */
+	ofs->share_whiteout = true;
+
 	ofs->config.index = ovl_index_def;
 	ofs->config.nfs_export = ovl_nfs_export_def;
 	ofs->config.xino = ovl_xino_def();

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4] ovl: whiteout inode sharing
  2020-04-28 12:21           ` Miklos Szeredi
@ 2020-04-28 13:15             ` Amir Goldstein
  2020-04-28 13:32               ` Miklos Szeredi
  0 siblings, 1 reply; 10+ messages in thread
From: Amir Goldstein @ 2020-04-28 13:15 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: Chengguang Xu, overlayfs

On Tue, Apr 28, 2020 at 3:21 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
>
> On Fri, Apr 24, 2020 at 05:49:00PM +0300, Amir Goldstein wrote:
>
> > I didn't mean we need to check if link_max  is valid range for upper fs.
> > We anyway use minimum of user requested value and upper fs max.
> >
> > Frankly, I think there are only two sane options for system wide configuration:
> > 1. disable whiteout link
> > 2. enable whiteout link with ofs->workdir->d_sb->s_max_links
>
> And that one doesn't work, see for example ext4, which defines EXT4_LINK_MAX but
> doesn't set s_max_links.  This could be fixed, but using EMLINK to detect the
> max-link condition is simpler and more reliable.
>
> And I don't really see a reason to disable whiteout hard links.  What scenario
> would that be useful in?

I have a vague memory of e2fsck excessive memory consumption
in face of many hardlinks created by rsync backups.
Now I suppose it was a function of number of files with nlink > 1 and not
a function of nlink itself and could be a non issue for a long time, but I am
just being careful about introducing non-standard setups which may end up
exposing filesystem corner case bugs (near the edge of s_max_links).
Yeh that is very defensive, so I don't mind ignoring that concern and addressing
it in case somebody shouts.

>
> Updated patch below.  Changes from v5:
>
>  - fix a missing dput on shutdown
>  - don't poass workdir to ovl_cleanup_and_whiteout/ovl_whiteout
>  - flatten out retry loop in ovl_whiteout
>  - use EMLINK to distinguish max-links from misc failure
>
> Thanks,
> Miklos
>
> ---
> From: Chengguang Xu <cgxu519@mykernel.net>
> Subject: ovl: whiteout inode sharing
> Date: Fri, 24 Apr 2020 10:55:17 +0800
>
> Share inode with different whiteout files for saving inode and speeding up
> delete operation.
>
> If EMLINK is encountered when linking a shared whiteout, create a new one.
> In case of any other error, disable sharing for this super block.
>
> Note: ofs->whiteout is protected by inode lock on workdir.
>
> Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
> Reviewed-by: Amir Goldstein <amir73il@gmail.com>
> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> ---
>  fs/overlayfs/dir.c       |   49 +++++++++++++++++++++++++++++++++++------------
>  fs/overlayfs/overlayfs.h |    2 -
>  fs/overlayfs/ovl_entry.h |    3 ++
>  fs/overlayfs/readdir.c   |    2 -
>  fs/overlayfs/super.c     |    4 +++
>  fs/overlayfs/util.c      |    3 +-
>  6 files changed, 48 insertions(+), 15 deletions(-)
>
> --- a/fs/overlayfs/dir.c
> +++ b/fs/overlayfs/dir.c
> @@ -62,35 +62,59 @@ struct dentry *ovl_lookup_temp(struct de
>  }
>
>  /* caller holds i_mutex on workdir */
> -static struct dentry *ovl_whiteout(struct dentry *workdir)
> +static struct dentry *ovl_whiteout(struct ovl_fs *ofs)
>  {
>         int err;
>         struct dentry *whiteout;
> +       struct dentry *workdir = ofs->workdir;
>         struct inode *wdir = workdir->d_inode;
>
> -       whiteout = ovl_lookup_temp(workdir);
> -       if (IS_ERR(whiteout))
> -               return whiteout;
> +       if (!ofs->whiteout) {
> +               whiteout = ovl_lookup_temp(workdir);
> +               if (IS_ERR(whiteout))
> +                       return whiteout;
> +
> +               err = ovl_do_whiteout(wdir, whiteout);
> +               if (err) {
> +                       dput(whiteout);
> +                       return ERR_PTR(err);
> +               }
> +               ofs->whiteout = whiteout;
> +       }
>
> -       err = ovl_do_whiteout(wdir, whiteout);
> -       if (err) {
> +       if (ofs->share_whiteout) {
> +               whiteout = ovl_lookup_temp(workdir);
> +               if (IS_ERR(whiteout))
> +                       goto fallback;
> +
> +               err = ovl_do_link(ofs->whiteout, wdir, whiteout);
> +               if (!err)
> +                       goto success;
> +
> +               if (err != -EMLINK) {
> +                       pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n",
> +                               ofs->whiteout->d_inode->i_nlink, err);
> +                       ofs->share_whiteout = false;
> +               }
>                 dput(whiteout);
> -               whiteout = ERR_PTR(err);
>         }
> -
> +fallback:
> +       whiteout = ofs->whiteout;
> +       ofs->whiteout = NULL;
> +success:

This label is a bit strange, but fine.

>         return whiteout;
>  }
>

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4] ovl: whiteout inode sharing
  2020-04-28 13:15             ` Amir Goldstein
@ 2020-04-28 13:32               ` Miklos Szeredi
  2020-04-28 15:15                 ` Amir Goldstein
  0 siblings, 1 reply; 10+ messages in thread
From: Miklos Szeredi @ 2020-04-28 13:32 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Chengguang Xu, overlayfs

On Tue, Apr 28, 2020 at 3:16 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Tue, Apr 28, 2020 at 3:21 PM Miklos Szeredi <miklos@szeredi.hu> wrote:

> > And I don't really see a reason to disable whiteout hard links.  What scenario
> > would that be useful in?
>
> I have a vague memory of e2fsck excessive memory consumption
> in face of many hardlinks created by rsync backups.
> Now I suppose it was a function of number of files with nlink > 1 and not
> a function of nlink itself and could be a non issue for a long time, but I am
> just being careful about introducing non-standard setups which may end up
> exposing filesystem corner case bugs (near the edge of s_max_links).
> Yeh that is very defensive, so I don't mind ignoring that concern and addressing
> it in case somebody shouts.

Right, and even if such a corner case bug exists, it's still primarily
a bug in the underlying filesystem and should be fixed there. A
workaround in overlay would only make sense if for some reason the
primary fix is difficult or impossible.

> > +fallback:
> > +       whiteout = ofs->whiteout;
> > +       ofs->whiteout = NULL;
> > +success:
>
> This label is a bit strange, but fine.

Agreed, changed to "out:"

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v4] ovl: whiteout inode sharing
  2020-04-28 13:32               ` Miklos Szeredi
@ 2020-04-28 15:15                 ` Amir Goldstein
  0 siblings, 0 replies; 10+ messages in thread
From: Amir Goldstein @ 2020-04-28 15:15 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: Chengguang Xu, overlayfs

On Tue, Apr 28, 2020 at 4:32 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
>
> On Tue, Apr 28, 2020 at 3:16 PM Amir Goldstein <amir73il@gmail.com> wrote:
> >
> > On Tue, Apr 28, 2020 at 3:21 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
>
> > > And I don't really see a reason to disable whiteout hard links.  What scenario
> > > would that be useful in?
> >
> > I have a vague memory of e2fsck excessive memory consumption
> > in face of many hardlinks created by rsync backups.
> > Now I suppose it was a function of number of files with nlink > 1 and not
> > a function of nlink itself and could be a non issue for a long time, but I am
> > just being careful about introducing non-standard setups which may end up
> > exposing filesystem corner case bugs (near the edge of s_max_links).
> > Yeh that is very defensive, so I don't mind ignoring that concern and addressing
> > it in case somebody shouts.
>
> Right, and even if such a corner case bug exists, it's still primarily
> a bug in the underlying filesystem and should be fixed there. A
> workaround in overlay would only make sense if for some reason the
> primary fix is difficult or impossible.
>

Sure.

> > > +fallback:
> > > +       whiteout = ofs->whiteout;
> > > +       ofs->whiteout = NULL;
> > > +success:
> >
> > This label is a bit strange, but fine.
>
> Agreed, changed to "out:"
>

I meant no reason to have goto label when you can just return whiteout,
but it's fine either way.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-04-28 15:15 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-22 10:27 [PATCH v4] ovl: whiteout inode sharing Chengguang Xu
2020-04-22 11:50 ` Amir Goldstein
2020-04-23  1:17   ` Chengguang Xu
2020-04-24  6:02     ` Amir Goldstein
2020-04-24  6:26       ` Chengguang Xu
2020-04-24 14:49         ` Amir Goldstein
2020-04-28 12:21           ` Miklos Szeredi
2020-04-28 13:15             ` Amir Goldstein
2020-04-28 13:32               ` Miklos Szeredi
2020-04-28 15:15                 ` Amir Goldstein

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.