* [PATCH v4] ovl: whiteout inode sharing
@ 2020-04-22 10:27 Chengguang Xu
2020-04-22 11:50 ` Amir Goldstein
0 siblings, 1 reply; 10+ messages in thread
From: Chengguang Xu @ 2020-04-22 10:27 UTC (permalink / raw)
To: miklos, amir73il; +Cc: linux-unionfs, Chengguang Xu
Sharing inode with different whiteout files for saving
inode and speeding up deleting operation.
Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
---
v1->v2:
- Address Amir's comments in v1.
v2->v3:
- Address Amir's comments in v2.
- Rebase on Amir's "Overlayfs use index dir as work dir" patch set.
- Keep at most one whiteout tmpfile in work dir.
v3->v4:
- Disable the feature after link failure.
- Add mount option(whiteout link max) for overlayfs instance.
fs/overlayfs/dir.c | 47 ++++++++++++++++++++++++++++++++++------
fs/overlayfs/overlayfs.h | 10 +++++++--
fs/overlayfs/ovl_entry.h | 5 +++++
fs/overlayfs/readdir.c | 3 ++-
fs/overlayfs/super.c | 24 ++++++++++++++++++++
fs/overlayfs/util.c | 3 ++-
6 files changed, 81 insertions(+), 11 deletions(-)
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 279009dee366..8b7d8854f31f 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -62,35 +62,67 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir)
}
/* caller holds i_mutex on workdir */
-static struct dentry *ovl_whiteout(struct dentry *workdir)
+static struct dentry *ovl_whiteout(struct ovl_fs *ofs, struct dentry *workdir)
{
int err;
+ bool retried = false;
+ bool should_link = (ofs->whiteout_link_max > 1);
struct dentry *whiteout;
struct inode *wdir = workdir->d_inode;
+retry:
whiteout = ovl_lookup_temp(workdir);
if (IS_ERR(whiteout))
return whiteout;
+ err = 0;
+ if (should_link) {
+ if (ovl_whiteout_linkable(ofs)) {
+ err = ovl_do_link(ofs->whiteout, wdir, whiteout);
+ if (!err)
+ return whiteout;
+ } else if (ofs->whiteout) {
+ dput(whiteout);
+ whiteout = ofs->whiteout;
+ ofs->whiteout = NULL;
+ return whiteout;
+ }
+
+ if (err) {
+ pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n",
+ ofs->whiteout->d_inode->i_nlink, err);
+ ofs->whiteout_link_max = 0;
+ should_link = false;
+ ovl_cleanup(wdir, ofs->whiteout);
+ dput(ofs->whiteout);
+ ofs->whiteout = NULL;
+ }
+ }
+
err = ovl_do_whiteout(wdir, whiteout);
if (err) {
dput(whiteout);
- whiteout = ERR_PTR(err);
+ return ERR_PTR(err);
}
- return whiteout;
+ if (!should_link || retried)
+ return whiteout;
+
+ ofs->whiteout = whiteout;
+ retried = true;
+ goto retry;
}
/* Caller must hold i_mutex on both workdir and dir */
-int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
- struct dentry *dentry)
+int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *workdir,
+ struct inode *dir, struct dentry *dentry)
{
struct inode *wdir = workdir->d_inode;
struct dentry *whiteout;
int err;
int flags = 0;
- whiteout = ovl_whiteout(workdir);
+ whiteout = ovl_whiteout(ofs, workdir);
err = PTR_ERR(whiteout);
if (IS_ERR(whiteout))
return err;
@@ -715,6 +747,7 @@ static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
static int ovl_remove_and_whiteout(struct dentry *dentry,
struct list_head *list)
{
+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
struct dentry *workdir = ovl_workdir(dentry);
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
struct dentry *upper;
@@ -748,7 +781,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
goto out_dput_upper;
}
- err = ovl_cleanup_and_whiteout(workdir, d_inode(upperdir), upper);
+ err = ovl_cleanup_and_whiteout(ofs, workdir, d_inode(upperdir), upper);
if (err)
goto out_d_drop;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index e00b1ff6dea9..3b127c997a6d 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -225,6 +225,12 @@ static inline bool ovl_open_flags_need_copy_up(int flags)
return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC));
}
+static inline bool ovl_whiteout_linkable(struct ovl_fs *ofs)
+{
+ return (ofs->whiteout &&
+ ofs->whiteout->d_inode->i_nlink < ofs->whiteout_link_max);
+}
+
/* util.c */
int ovl_want_write(struct dentry *dentry);
void ovl_drop_write(struct dentry *dentry);
@@ -455,8 +461,8 @@ static inline void ovl_copyflags(struct inode *from, struct inode *to)
/* dir.c */
extern const struct inode_operations ovl_dir_inode_operations;
-int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
- struct dentry *dentry);
+int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *workdir,
+ struct inode *dir, struct dentry *dentry);
struct ovl_cattr {
dev_t rdev;
umode_t mode;
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 5762d802fe01..c805c35e0594 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -17,6 +17,7 @@ struct ovl_config {
bool nfs_export;
int xino;
bool metacopy;
+ unsigned int whiteout_link_max;
};
struct ovl_sb {
@@ -77,6 +78,10 @@ struct ovl_fs {
int xino_mode;
/* For allocation of non-persistent inode numbers */
atomic_long_t last_ino;
+ /* Whiteout dentry cache */
+ struct dentry *whiteout;
+ /* Whiteout max link count */
+ unsigned int whiteout_link_max;
};
static inline struct ovl_fs *OVL_FS(struct super_block *sb)
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 20f5310d3ee4..bf22fb7792c1 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -1154,7 +1154,8 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
* Whiteout orphan index to block future open by
* handle after overlay nlink dropped to zero.
*/
- err = ovl_cleanup_and_whiteout(indexdir, dir, index);
+ err = ovl_cleanup_and_whiteout(ofs, indexdir, dir,
+ index);
} else {
/* Cleanup orphan index entries */
err = ovl_cleanup(dir, index);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index f57aa348dcd6..6bccab4d5596 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -26,6 +26,10 @@ struct ovl_dir_cache;
#define OVL_MAX_STACK 500
+static unsigned int ovl_whiteout_link_max_def = 60000;
+module_param_named(whiteout_link_max, ovl_whiteout_link_max_def, uint, 0644);
+MODULE_PARM_DESC(whiteout_link_max, "Maximum count of whiteout file link");
+
static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
MODULE_PARM_DESC(redirect_dir,
@@ -219,6 +223,7 @@ static void ovl_free_fs(struct ovl_fs *ofs)
iput(ofs->upperdir_trap);
dput(ofs->indexdir);
dput(ofs->workdir);
+ dput(ofs->whiteout);
if (ofs->workdir_locked)
ovl_inuse_unlock(ofs->workbasedir);
dput(ofs->workbasedir);
@@ -358,6 +363,10 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
if (ofs->config.metacopy != ovl_metacopy_def)
seq_printf(m, ",metacopy=%s",
ofs->config.metacopy ? "on" : "off");
+ if (ofs->config.whiteout_link_max != ovl_whiteout_link_max_def)
+ seq_printf(m, ",whiteout_link_max=%u",
+ ofs->config.whiteout_link_max);
+
return 0;
}
@@ -398,6 +407,7 @@ enum {
OPT_XINO_AUTO,
OPT_METACOPY_ON,
OPT_METACOPY_OFF,
+ OPT_WHITEOUT_LINK_MAX,
OPT_ERR,
};
@@ -416,6 +426,7 @@ static const match_table_t ovl_tokens = {
{OPT_XINO_AUTO, "xino=auto"},
{OPT_METACOPY_ON, "metacopy=on"},
{OPT_METACOPY_OFF, "metacopy=off"},
+ {OPT_WHITEOUT_LINK_MAX, "whiteout_link_max=%u"},
{OPT_ERR, NULL}
};
@@ -469,6 +480,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
{
char *p;
int err;
+ int link_max;
bool metacopy_opt = false, redirect_opt = false;
bool nfs_export_opt = false, index_opt = false;
@@ -560,6 +572,13 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
metacopy_opt = true;
break;
+ case OPT_WHITEOUT_LINK_MAX:
+ if (match_int(&args[0], &link_max))
+ return -EINVAL;
+ if (link_max < ovl_whiteout_link_max_def)
+ config->whiteout_link_max = link_max;
+ break;
+
default:
pr_err("unrecognized mount option \"%s\" or missing value\n",
p);
@@ -1269,6 +1288,10 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n");
ofs->config.nfs_export = false;
}
+
+ ofs->whiteout_link_max = min_not_zero(
+ ofs->workdir->d_sb->s_max_links,
+ ofs->config.whiteout_link_max ?: 1);
out:
mnt_drop_write(mnt);
return err;
@@ -1768,6 +1791,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
ofs->config.nfs_export = ovl_nfs_export_def;
ofs->config.xino = ovl_xino_def();
ofs->config.metacopy = ovl_metacopy_def;
+ ofs->config.whiteout_link_max = ovl_whiteout_link_max_def;
err = ovl_parse_opt((char *) data, &ofs->config);
if (err)
goto out_err;
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 36b60788ee47..18df65ee81a8 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -669,6 +669,7 @@ bool ovl_need_index(struct dentry *dentry)
/* Caller must hold OVL_I(inode)->lock */
static void ovl_cleanup_index(struct dentry *dentry)
{
+ struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
struct inode *dir = indexdir->d_inode;
struct dentry *lowerdentry = ovl_dentry_lower(dentry);
@@ -707,7 +708,7 @@ static void ovl_cleanup_index(struct dentry *dentry)
index = NULL;
} else if (ovl_index_all(dentry->d_sb)) {
/* Whiteout orphan index to block future open by handle */
- err = ovl_cleanup_and_whiteout(indexdir, dir, index);
+ err = ovl_cleanup_and_whiteout(ofs, indexdir, dir, index);
} else {
/* Cleanup orphan index entries */
err = ovl_cleanup(dir, index);
--
2.20.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH v4] ovl: whiteout inode sharing
2020-04-22 10:27 [PATCH v4] ovl: whiteout inode sharing Chengguang Xu
@ 2020-04-22 11:50 ` Amir Goldstein
2020-04-23 1:17 ` Chengguang Xu
0 siblings, 1 reply; 10+ messages in thread
From: Amir Goldstein @ 2020-04-22 11:50 UTC (permalink / raw)
To: Chengguang Xu; +Cc: Miklos Szeredi, overlayfs
On Wed, Apr 22, 2020 at 1:28 PM Chengguang Xu <cgxu519@mykernel.net> wrote:
>
> Sharing inode with different whiteout files for saving
> inode and speeding up deleting operation.
>
> Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Just one question...
> ---
> v1->v2:
> - Address Amir's comments in v1.
>
> v2->v3:
> - Address Amir's comments in v2.
> - Rebase on Amir's "Overlayfs use index dir as work dir" patch set.
> - Keep at most one whiteout tmpfile in work dir.
>
> v3->v4:
> - Disable the feature after link failure.
> - Add mount option(whiteout link max) for overlayfs instance.
>
> fs/overlayfs/dir.c | 47 ++++++++++++++++++++++++++++++++++------
> fs/overlayfs/overlayfs.h | 10 +++++++--
> fs/overlayfs/ovl_entry.h | 5 +++++
> fs/overlayfs/readdir.c | 3 ++-
> fs/overlayfs/super.c | 24 ++++++++++++++++++++
> fs/overlayfs/util.c | 3 ++-
> 6 files changed, 81 insertions(+), 11 deletions(-)
>
> diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
> index 279009dee366..8b7d8854f31f 100644
> --- a/fs/overlayfs/dir.c
> +++ b/fs/overlayfs/dir.c
> @@ -62,35 +62,67 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir)
> }
>
> /* caller holds i_mutex on workdir */
> -static struct dentry *ovl_whiteout(struct dentry *workdir)
> +static struct dentry *ovl_whiteout(struct ovl_fs *ofs, struct dentry *workdir)
> {
> int err;
> + bool retried = false;
> + bool should_link = (ofs->whiteout_link_max > 1);
> struct dentry *whiteout;
> struct inode *wdir = workdir->d_inode;
>
> +retry:
> whiteout = ovl_lookup_temp(workdir);
> if (IS_ERR(whiteout))
> return whiteout;
>
> + err = 0;
> + if (should_link) {
> + if (ovl_whiteout_linkable(ofs)) {
> + err = ovl_do_link(ofs->whiteout, wdir, whiteout);
> + if (!err)
> + return whiteout;
> + } else if (ofs->whiteout) {
> + dput(whiteout);
> + whiteout = ofs->whiteout;
> + ofs->whiteout = NULL;
> + return whiteout;
> + }
> +
> + if (err) {
> + pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n",
> + ofs->whiteout->d_inode->i_nlink, err);
> + ofs->whiteout_link_max = 0;
> + should_link = false;
> + ovl_cleanup(wdir, ofs->whiteout);
> + dput(ofs->whiteout);
> + ofs->whiteout = NULL;
> + }
> + }
> +
> err = ovl_do_whiteout(wdir, whiteout);
> if (err) {
> dput(whiteout);
> - whiteout = ERR_PTR(err);
> + return ERR_PTR(err);
> }
>
> - return whiteout;
> + if (!should_link || retried)
> + return whiteout;
> +
> + ofs->whiteout = whiteout;
> + retried = true;
> + goto retry;
> }
>
> /* Caller must hold i_mutex on both workdir and dir */
> -int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
> - struct dentry *dentry)
> +int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *workdir,
> + struct inode *dir, struct dentry *dentry)
> {
> struct inode *wdir = workdir->d_inode;
> struct dentry *whiteout;
> int err;
> int flags = 0;
>
> - whiteout = ovl_whiteout(workdir);
> + whiteout = ovl_whiteout(ofs, workdir);
> err = PTR_ERR(whiteout);
> if (IS_ERR(whiteout))
> return err;
> @@ -715,6 +747,7 @@ static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
> static int ovl_remove_and_whiteout(struct dentry *dentry,
> struct list_head *list)
> {
> + struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
> struct dentry *workdir = ovl_workdir(dentry);
> struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
> struct dentry *upper;
> @@ -748,7 +781,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
> goto out_dput_upper;
> }
>
> - err = ovl_cleanup_and_whiteout(workdir, d_inode(upperdir), upper);
> + err = ovl_cleanup_and_whiteout(ofs, workdir, d_inode(upperdir), upper);
> if (err)
> goto out_d_drop;
>
> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> index e00b1ff6dea9..3b127c997a6d 100644
> --- a/fs/overlayfs/overlayfs.h
> +++ b/fs/overlayfs/overlayfs.h
> @@ -225,6 +225,12 @@ static inline bool ovl_open_flags_need_copy_up(int flags)
> return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC));
> }
>
> +static inline bool ovl_whiteout_linkable(struct ovl_fs *ofs)
> +{
> + return (ofs->whiteout &&
> + ofs->whiteout->d_inode->i_nlink < ofs->whiteout_link_max);
> +}
> +
> /* util.c */
> int ovl_want_write(struct dentry *dentry);
> void ovl_drop_write(struct dentry *dentry);
> @@ -455,8 +461,8 @@ static inline void ovl_copyflags(struct inode *from, struct inode *to)
>
> /* dir.c */
> extern const struct inode_operations ovl_dir_inode_operations;
> -int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
> - struct dentry *dentry);
> +int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *workdir,
> + struct inode *dir, struct dentry *dentry);
> struct ovl_cattr {
> dev_t rdev;
> umode_t mode;
> diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
> index 5762d802fe01..c805c35e0594 100644
> --- a/fs/overlayfs/ovl_entry.h
> +++ b/fs/overlayfs/ovl_entry.h
> @@ -17,6 +17,7 @@ struct ovl_config {
> bool nfs_export;
> int xino;
> bool metacopy;
> + unsigned int whiteout_link_max;
> };
>
> struct ovl_sb {
> @@ -77,6 +78,10 @@ struct ovl_fs {
> int xino_mode;
> /* For allocation of non-persistent inode numbers */
> atomic_long_t last_ino;
> + /* Whiteout dentry cache */
> + struct dentry *whiteout;
> + /* Whiteout max link count */
> + unsigned int whiteout_link_max;
> };
>
> static inline struct ovl_fs *OVL_FS(struct super_block *sb)
> diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
> index 20f5310d3ee4..bf22fb7792c1 100644
> --- a/fs/overlayfs/readdir.c
> +++ b/fs/overlayfs/readdir.c
> @@ -1154,7 +1154,8 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
> * Whiteout orphan index to block future open by
> * handle after overlay nlink dropped to zero.
> */
> - err = ovl_cleanup_and_whiteout(indexdir, dir, index);
> + err = ovl_cleanup_and_whiteout(ofs, indexdir, dir,
> + index);
> } else {
> /* Cleanup orphan index entries */
> err = ovl_cleanup(dir, index);
> diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
> index f57aa348dcd6..6bccab4d5596 100644
> --- a/fs/overlayfs/super.c
> +++ b/fs/overlayfs/super.c
> @@ -26,6 +26,10 @@ struct ovl_dir_cache;
>
> #define OVL_MAX_STACK 500
>
> +static unsigned int ovl_whiteout_link_max_def = 60000;
> +module_param_named(whiteout_link_max, ovl_whiteout_link_max_def, uint, 0644);
> +MODULE_PARM_DESC(whiteout_link_max, "Maximum count of whiteout file link");
> +
> static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
> module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
> MODULE_PARM_DESC(redirect_dir,
> @@ -219,6 +223,7 @@ static void ovl_free_fs(struct ovl_fs *ofs)
> iput(ofs->upperdir_trap);
> dput(ofs->indexdir);
> dput(ofs->workdir);
> + dput(ofs->whiteout);
> if (ofs->workdir_locked)
> ovl_inuse_unlock(ofs->workbasedir);
> dput(ofs->workbasedir);
> @@ -358,6 +363,10 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
> if (ofs->config.metacopy != ovl_metacopy_def)
> seq_printf(m, ",metacopy=%s",
> ofs->config.metacopy ? "on" : "off");
> + if (ofs->config.whiteout_link_max != ovl_whiteout_link_max_def)
> + seq_printf(m, ",whiteout_link_max=%u",
> + ofs->config.whiteout_link_max);
> +
> return 0;
> }
>
> @@ -398,6 +407,7 @@ enum {
> OPT_XINO_AUTO,
> OPT_METACOPY_ON,
> OPT_METACOPY_OFF,
> + OPT_WHITEOUT_LINK_MAX,
> OPT_ERR,
> };
>
> @@ -416,6 +426,7 @@ static const match_table_t ovl_tokens = {
> {OPT_XINO_AUTO, "xino=auto"},
> {OPT_METACOPY_ON, "metacopy=on"},
> {OPT_METACOPY_OFF, "metacopy=off"},
> + {OPT_WHITEOUT_LINK_MAX, "whiteout_link_max=%u"},
> {OPT_ERR, NULL}
> };
>
> @@ -469,6 +480,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
> {
> char *p;
> int err;
> + int link_max;
> bool metacopy_opt = false, redirect_opt = false;
> bool nfs_export_opt = false, index_opt = false;
>
> @@ -560,6 +572,13 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
> metacopy_opt = true;
> break;
>
> + case OPT_WHITEOUT_LINK_MAX:
> + if (match_int(&args[0], &link_max))
> + return -EINVAL;
> + if (link_max < ovl_whiteout_link_max_def)
> + config->whiteout_link_max = link_max;
Why not allow link_max > ovl_whiteout_link_max_def?
admin may want to disable ovl_whiteout_link_max_def by default
in module parameter, but allow it for specific overlay instances.
In any case, if we do have a good reason to ignore user configurable
value we should pr_warn about it and explain to users what they
need to do to overcome the miss configuration issue.
Thanks,
Amir.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v4] ovl: whiteout inode sharing
2020-04-22 11:50 ` Amir Goldstein
@ 2020-04-23 1:17 ` Chengguang Xu
2020-04-24 6:02 ` Amir Goldstein
0 siblings, 1 reply; 10+ messages in thread
From: Chengguang Xu @ 2020-04-23 1:17 UTC (permalink / raw)
To: Amir Goldstein; +Cc: Miklos Szeredi, overlayfs
---- 在 星期三, 2020-04-22 19:50:30 Amir Goldstein <amir73il@gmail.com> 撰写 ----
> On Wed, Apr 22, 2020 at 1:28 PM Chengguang Xu <cgxu519@mykernel.net> wrote:
> >
> > Sharing inode with different whiteout files for saving
> > inode and speeding up deleting operation.
> >
> > Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
>
> Reviewed-by: Amir Goldstein <amir73il@gmail.com>
>
> Just one question...
>
> > ---
> > v1->v2:
> > - Address Amir's comments in v1.
> >
> > v2->v3:
> > - Address Amir's comments in v2.
> > - Rebase on Amir's "Overlayfs use index dir as work dir" patch set.
> > - Keep at most one whiteout tmpfile in work dir.
> >
> > v3->v4:
> > - Disable the feature after link failure.
> > - Add mount option(whiteout link max) for overlayfs instance.
> >
> > fs/overlayfs/dir.c | 47 ++++++++++++++++++++++++++++++++++------
> > fs/overlayfs/overlayfs.h | 10 +++++++--
> > fs/overlayfs/ovl_entry.h | 5 +++++
> > fs/overlayfs/readdir.c | 3 ++-
> > fs/overlayfs/super.c | 24 ++++++++++++++++++++
> > fs/overlayfs/util.c | 3 ++-
> > 6 files changed, 81 insertions(+), 11 deletions(-)
> >
> > diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
> > index 279009dee366..8b7d8854f31f 100644
> > --- a/fs/overlayfs/dir.c
> > +++ b/fs/overlayfs/dir.c
> > @@ -62,35 +62,67 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir)
> > }
> >
> > /* caller holds i_mutex on workdir */
> > -static struct dentry *ovl_whiteout(struct dentry *workdir)
> > +static struct dentry *ovl_whiteout(struct ovl_fs *ofs, struct dentry *workdir)
> > {
> > int err;
> > + bool retried = false;
> > + bool should_link = (ofs->whiteout_link_max > 1);
> > struct dentry *whiteout;
> > struct inode *wdir = workdir->d_inode;
> >
> > +retry:
> > whiteout = ovl_lookup_temp(workdir);
> > if (IS_ERR(whiteout))
> > return whiteout;
> >
> > + err = 0;
> > + if (should_link) {
> > + if (ovl_whiteout_linkable(ofs)) {
> > + err = ovl_do_link(ofs->whiteout, wdir, whiteout);
> > + if (!err)
> > + return whiteout;
> > + } else if (ofs->whiteout) {
> > + dput(whiteout);
> > + whiteout = ofs->whiteout;
> > + ofs->whiteout = NULL;
> > + return whiteout;
> > + }
> > +
> > + if (err) {
> > + pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n",
> > + ofs->whiteout->d_inode->i_nlink, err);
> > + ofs->whiteout_link_max = 0;
> > + should_link = false;
> > + ovl_cleanup(wdir, ofs->whiteout);
> > + dput(ofs->whiteout);
> > + ofs->whiteout = NULL;
> > + }
> > + }
> > +
> > err = ovl_do_whiteout(wdir, whiteout);
> > if (err) {
> > dput(whiteout);
> > - whiteout = ERR_PTR(err);
> > + return ERR_PTR(err);
> > }
> >
> > - return whiteout;
> > + if (!should_link || retried)
> > + return whiteout;
> > +
> > + ofs->whiteout = whiteout;
> > + retried = true;
> > + goto retry;
> > }
> >
> > /* Caller must hold i_mutex on both workdir and dir */
> > -int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
> > - struct dentry *dentry)
> > +int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *workdir,
> > + struct inode *dir, struct dentry *dentry)
> > {
> > struct inode *wdir = workdir->d_inode;
> > struct dentry *whiteout;
> > int err;
> > int flags = 0;
> >
> > - whiteout = ovl_whiteout(workdir);
> > + whiteout = ovl_whiteout(ofs, workdir);
> > err = PTR_ERR(whiteout);
> > if (IS_ERR(whiteout))
> > return err;
> > @@ -715,6 +747,7 @@ static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
> > static int ovl_remove_and_whiteout(struct dentry *dentry,
> > struct list_head *list)
> > {
> > + struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
> > struct dentry *workdir = ovl_workdir(dentry);
> > struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
> > struct dentry *upper;
> > @@ -748,7 +781,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
> > goto out_dput_upper;
> > }
> >
> > - err = ovl_cleanup_and_whiteout(workdir, d_inode(upperdir), upper);
> > + err = ovl_cleanup_and_whiteout(ofs, workdir, d_inode(upperdir), upper);
> > if (err)
> > goto out_d_drop;
> >
> > diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> > index e00b1ff6dea9..3b127c997a6d 100644
> > --- a/fs/overlayfs/overlayfs.h
> > +++ b/fs/overlayfs/overlayfs.h
> > @@ -225,6 +225,12 @@ static inline bool ovl_open_flags_need_copy_up(int flags)
> > return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC));
> > }
> >
> > +static inline bool ovl_whiteout_linkable(struct ovl_fs *ofs)
> > +{
> > + return (ofs->whiteout &&
> > + ofs->whiteout->d_inode->i_nlink < ofs->whiteout_link_max);
> > +}
> > +
> > /* util.c */
> > int ovl_want_write(struct dentry *dentry);
> > void ovl_drop_write(struct dentry *dentry);
> > @@ -455,8 +461,8 @@ static inline void ovl_copyflags(struct inode *from, struct inode *to)
> >
> > /* dir.c */
> > extern const struct inode_operations ovl_dir_inode_operations;
> > -int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
> > - struct dentry *dentry);
> > +int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct dentry *workdir,
> > + struct inode *dir, struct dentry *dentry);
> > struct ovl_cattr {
> > dev_t rdev;
> > umode_t mode;
> > diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
> > index 5762d802fe01..c805c35e0594 100644
> > --- a/fs/overlayfs/ovl_entry.h
> > +++ b/fs/overlayfs/ovl_entry.h
> > @@ -17,6 +17,7 @@ struct ovl_config {
> > bool nfs_export;
> > int xino;
> > bool metacopy;
> > + unsigned int whiteout_link_max;
> > };
> >
> > struct ovl_sb {
> > @@ -77,6 +78,10 @@ struct ovl_fs {
> > int xino_mode;
> > /* For allocation of non-persistent inode numbers */
> > atomic_long_t last_ino;
> > + /* Whiteout dentry cache */
> > + struct dentry *whiteout;
> > + /* Whiteout max link count */
> > + unsigned int whiteout_link_max;
> > };
> >
> > static inline struct ovl_fs *OVL_FS(struct super_block *sb)
> > diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
> > index 20f5310d3ee4..bf22fb7792c1 100644
> > --- a/fs/overlayfs/readdir.c
> > +++ b/fs/overlayfs/readdir.c
> > @@ -1154,7 +1154,8 @@ int ovl_indexdir_cleanup(struct ovl_fs *ofs)
> > * Whiteout orphan index to block future open by
> > * handle after overlay nlink dropped to zero.
> > */
> > - err = ovl_cleanup_and_whiteout(indexdir, dir, index);
> > + err = ovl_cleanup_and_whiteout(ofs, indexdir, dir,
> > + index);
> > } else {
> > /* Cleanup orphan index entries */
> > err = ovl_cleanup(dir, index);
> > diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
> > index f57aa348dcd6..6bccab4d5596 100644
> > --- a/fs/overlayfs/super.c
> > +++ b/fs/overlayfs/super.c
> > @@ -26,6 +26,10 @@ struct ovl_dir_cache;
> >
> > #define OVL_MAX_STACK 500
> >
> > +static unsigned int ovl_whiteout_link_max_def = 60000;
> > +module_param_named(whiteout_link_max, ovl_whiteout_link_max_def, uint, 0644);
> > +MODULE_PARM_DESC(whiteout_link_max, "Maximum count of whiteout file link");
> > +
> > static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
> > module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
> > MODULE_PARM_DESC(redirect_dir,
> > @@ -219,6 +223,7 @@ static void ovl_free_fs(struct ovl_fs *ofs)
> > iput(ofs->upperdir_trap);
> > dput(ofs->indexdir);
> > dput(ofs->workdir);
> > + dput(ofs->whiteout);
> > if (ofs->workdir_locked)
> > ovl_inuse_unlock(ofs->workbasedir);
> > dput(ofs->workbasedir);
> > @@ -358,6 +363,10 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
> > if (ofs->config.metacopy != ovl_metacopy_def)
> > seq_printf(m, ",metacopy=%s",
> > ofs->config.metacopy ? "on" : "off");
> > + if (ofs->config.whiteout_link_max != ovl_whiteout_link_max_def)
> > + seq_printf(m, ",whiteout_link_max=%u",
> > + ofs->config.whiteout_link_max);
> > +
> > return 0;
> > }
> >
> > @@ -398,6 +407,7 @@ enum {
> > OPT_XINO_AUTO,
> > OPT_METACOPY_ON,
> > OPT_METACOPY_OFF,
> > + OPT_WHITEOUT_LINK_MAX,
> > OPT_ERR,
> > };
> >
> > @@ -416,6 +426,7 @@ static const match_table_t ovl_tokens = {
> > {OPT_XINO_AUTO, "xino=auto"},
> > {OPT_METACOPY_ON, "metacopy=on"},
> > {OPT_METACOPY_OFF, "metacopy=off"},
> > + {OPT_WHITEOUT_LINK_MAX, "whiteout_link_max=%u"},
> > {OPT_ERR, NULL}
> > };
> >
> > @@ -469,6 +480,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
> > {
> > char *p;
> > int err;
> > + int link_max;
> > bool metacopy_opt = false, redirect_opt = false;
> > bool nfs_export_opt = false, index_opt = false;
> >
> > @@ -560,6 +572,13 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
> > metacopy_opt = true;
> > break;
> >
> > + case OPT_WHITEOUT_LINK_MAX:
> > + if (match_int(&args[0], &link_max))
> > + return -EINVAL;
> > + if (link_max < ovl_whiteout_link_max_def)
> > + config->whiteout_link_max = link_max;
>
> Why not allow link_max > ovl_whiteout_link_max_def?
> admin may want to disable ovl_whiteout_link_max_def by default
> in module parameter, but allow it for specific overlay instances.
>
In this use case, seems we don't need module param any more, we just need to set default value for option.
I would like to treate module param as a total switch, so that it could disable the feathre for all instances at the same time.
I think sometimes it's helpful for lazy admin(like me).
> In any case, if we do have a good reason to ignore user configurable
> value we should pr_warn about it and explain to users what they
> need to do to overcome the miss configuration issue.
>
Agree, let me add a warn for this case.
Thanks,
cgxu
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v4] ovl: whiteout inode sharing
2020-04-23 1:17 ` Chengguang Xu
@ 2020-04-24 6:02 ` Amir Goldstein
2020-04-24 6:26 ` Chengguang Xu
0 siblings, 1 reply; 10+ messages in thread
From: Amir Goldstein @ 2020-04-24 6:02 UTC (permalink / raw)
To: Chengguang Xu; +Cc: Miklos Szeredi, overlayfs
> > > + case OPT_WHITEOUT_LINK_MAX:
> > > + if (match_int(&args[0], &link_max))
> > > + return -EINVAL;
> > > + if (link_max < ovl_whiteout_link_max_def)
> > > + config->whiteout_link_max = link_max;
> >
> > Why not allow link_max > ovl_whiteout_link_max_def?
> > admin may want to disable ovl_whiteout_link_max_def by default
> > in module parameter, but allow it for specific overlay instances.
> >
>
> In this use case, seems we don't need module param any more, we just need to set default value for option.
>
> I would like to treate module param as a total switch, so that it could disable the feathre for all instances at the same time.
> I think sometimes it's helpful for lazy admin(like me).
>
I am not convinced.
lazy admin could very well want to disable whiteout_link_max by default,
but allow user to specify whiteout_link_max for a specific mount.
In fact, in order to preserve existing behavior and not cause regression with
some special filesystems, distros could decide that default disabled is
a reasonable choice.
I don't understand at all what the purpose of this limitation is.
Thanks,
Amir.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v4] ovl: whiteout inode sharing
2020-04-24 6:02 ` Amir Goldstein
@ 2020-04-24 6:26 ` Chengguang Xu
2020-04-24 14:49 ` Amir Goldstein
0 siblings, 1 reply; 10+ messages in thread
From: Chengguang Xu @ 2020-04-24 6:26 UTC (permalink / raw)
To: Amir Goldstein; +Cc: Miklos Szeredi, overlayfs
---- 在 星期五, 2020-04-24 14:02:00 Amir Goldstein <amir73il@gmail.com> 撰写 ----
> > > > + case OPT_WHITEOUT_LINK_MAX:
> > > > + if (match_int(&args[0], &link_max))
> > > > + return -EINVAL;
> > > > + if (link_max < ovl_whiteout_link_max_def)
> > > > + config->whiteout_link_max = link_max;
> > >
> > > Why not allow link_max > ovl_whiteout_link_max_def?
> > > admin may want to disable ovl_whiteout_link_max_def by default
> > > in module parameter, but allow it for specific overlay instances.
> > >
> >
> > In this use case, seems we don't need module param any more, we just need to set default value for option.
> >
> > I would like to treate module param as a total switch, so that it could disable the feathre for all instances at the same time.
> > I think sometimes it's helpful for lazy admin(like me).
> >
>
> I am not convinced.
>
> lazy admin could very well want to disable whiteout_link_max by default,
> but allow user to specify whiteout_link_max for a specific mount.
>
> In fact, in order to preserve existing behavior and not cause regression with
> some special filesystems, distros could decide that default disabled is
> a reasonable choice.
>
> I don't understand at all what the purpose of this limitation is.
>
If user sets a ridiculous link_max which is larger than valid range of upper fs, I think it is hard to verify in the stage of option parsing.
So I hope to fix the upper limit using module parameter, we can set default mount option to 0/1 for the use case you mentioned above.
Thanks,
cgxu
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v4] ovl: whiteout inode sharing
2020-04-24 6:26 ` Chengguang Xu
@ 2020-04-24 14:49 ` Amir Goldstein
2020-04-28 12:21 ` Miklos Szeredi
0 siblings, 1 reply; 10+ messages in thread
From: Amir Goldstein @ 2020-04-24 14:49 UTC (permalink / raw)
To: Chengguang Xu; +Cc: Miklos Szeredi, overlayfs
On Fri, Apr 24, 2020 at 9:26 AM Chengguang Xu <cgxu519@mykernel.net> wrote:
>
> ---- 在 星期五, 2020-04-24 14:02:00 Amir Goldstein <amir73il@gmail.com> 撰写 ----
> > > > > + case OPT_WHITEOUT_LINK_MAX:
> > > > > + if (match_int(&args[0], &link_max))
> > > > > + return -EINVAL;
> > > > > + if (link_max < ovl_whiteout_link_max_def)
> > > > > + config->whiteout_link_max = link_max;
> > > >
> > > > Why not allow link_max > ovl_whiteout_link_max_def?
> > > > admin may want to disable ovl_whiteout_link_max_def by default
> > > > in module parameter, but allow it for specific overlay instances.
> > > >
> > >
> > > In this use case, seems we don't need module param any more, we just need to set default value for option.
> > >
> > > I would like to treate module param as a total switch, so that it could disable the feathre for all instances at the same time.
> > > I think sometimes it's helpful for lazy admin(like me).
> > >
> >
> > I am not convinced.
> >
> > lazy admin could very well want to disable whiteout_link_max by default,
> > but allow user to specify whiteout_link_max for a specific mount.
> >
> > In fact, in order to preserve existing behavior and not cause regression with
> > some special filesystems, distros could decide that default disabled is
> > a reasonable choice.
> >
> > I don't understand at all what the purpose of this limitation is.
> >
>
> If user sets a ridiculous link_max which is larger than valid range of upper fs, I think it is hard to verify in the stage of option parsing.
> So I hope to fix the upper limit using module parameter, we can set default mount option to 0/1 for the use case you mentioned above.
>
I didn't mean we need to check if link_max is valid range for upper fs.
We anyway use minimum of user requested value and upper fs max.
Frankly, I think there are only two sane options for system wide configuration:
1. disable whiteout link
2. enable whiteout link with ofs->workdir->d_sb->s_max_links
So perhaps the module param should be a boolean ovl_whiteout_link_def?
Perhaps Kconfig should determine the build time default.
Setting whiteout_link smaller than d_sb->s_max_links should be
possible via mount option.
We may want to support the mount options:
whiteout_link_max=<N>
whiteout_link_max=auto
It should be simple to parse whiteout_link_max=auto, just
set config->whiteout_link_max to max uint and let later code
reduce it to upper fs max.
For ovl_show_options() is slightly more complicated to get right.
I am not hooked on any of the ideas above, but I find the current
configuration options in v4/v5 not good enough.
As an exercise you can try to document those options and
see how clear the text is.
Thanks,
Amir.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v4] ovl: whiteout inode sharing
2020-04-24 14:49 ` Amir Goldstein
@ 2020-04-28 12:21 ` Miklos Szeredi
2020-04-28 13:15 ` Amir Goldstein
0 siblings, 1 reply; 10+ messages in thread
From: Miklos Szeredi @ 2020-04-28 12:21 UTC (permalink / raw)
To: Amir Goldstein; +Cc: Chengguang Xu, overlayfs
On Fri, Apr 24, 2020 at 05:49:00PM +0300, Amir Goldstein wrote:
> I didn't mean we need to check if link_max is valid range for upper fs.
> We anyway use minimum of user requested value and upper fs max.
>
> Frankly, I think there are only two sane options for system wide configuration:
> 1. disable whiteout link
> 2. enable whiteout link with ofs->workdir->d_sb->s_max_links
And that one doesn't work, see for example ext4, which defines EXT4_LINK_MAX but
doesn't set s_max_links. This could be fixed, but using EMLINK to detect the
max-link condition is simpler and more reliable.
And I don't really see a reason to disable whiteout hard links. What scenario
would that be useful in?
Updated patch below. Changes from v5:
- fix a missing dput on shutdown
- don't poass workdir to ovl_cleanup_and_whiteout/ovl_whiteout
- flatten out retry loop in ovl_whiteout
- use EMLINK to distinguish max-links from misc failure
Thanks,
Miklos
---
From: Chengguang Xu <cgxu519@mykernel.net>
Subject: ovl: whiteout inode sharing
Date: Fri, 24 Apr 2020 10:55:17 +0800
Share inode with different whiteout files for saving inode and speeding up
delete operation.
If EMLINK is encountered when linking a shared whiteout, create a new one.
In case of any other error, disable sharing for this super block.
Note: ofs->whiteout is protected by inode lock on workdir.
Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
fs/overlayfs/dir.c | 49 +++++++++++++++++++++++++++++++++++------------
fs/overlayfs/overlayfs.h | 2 -
fs/overlayfs/ovl_entry.h | 3 ++
fs/overlayfs/readdir.c | 2 -
fs/overlayfs/super.c | 4 +++
fs/overlayfs/util.c | 3 +-
6 files changed, 48 insertions(+), 15 deletions(-)
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -62,35 +62,59 @@ struct dentry *ovl_lookup_temp(struct de
}
/* caller holds i_mutex on workdir */
-static struct dentry *ovl_whiteout(struct dentry *workdir)
+static struct dentry *ovl_whiteout(struct ovl_fs *ofs)
{
int err;
struct dentry *whiteout;
+ struct dentry *workdir = ofs->workdir;
struct inode *wdir = workdir->d_inode;
- whiteout = ovl_lookup_temp(workdir);
- if (IS_ERR(whiteout))
- return whiteout;
+ if (!ofs->whiteout) {
+ whiteout = ovl_lookup_temp(workdir);
+ if (IS_ERR(whiteout))
+ return whiteout;
+
+ err = ovl_do_whiteout(wdir, whiteout);
+ if (err) {
+ dput(whiteout);
+ return ERR_PTR(err);
+ }
+ ofs->whiteout = whiteout;
+ }
- err = ovl_do_whiteout(wdir, whiteout);
- if (err) {
+ if (ofs->share_whiteout) {
+ whiteout = ovl_lookup_temp(workdir);
+ if (IS_ERR(whiteout))
+ goto fallback;
+
+ err = ovl_do_link(ofs->whiteout, wdir, whiteout);
+ if (!err)
+ goto success;
+
+ if (err != -EMLINK) {
+ pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n",
+ ofs->whiteout->d_inode->i_nlink, err);
+ ofs->share_whiteout = false;
+ }
dput(whiteout);
- whiteout = ERR_PTR(err);
}
-
+fallback:
+ whiteout = ofs->whiteout;
+ ofs->whiteout = NULL;
+success:
return whiteout;
}
/* Caller must hold i_mutex on both workdir and dir */
-int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
+int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir,
struct dentry *dentry)
{
- struct inode *wdir = workdir->d_inode;
+ struct inode *wdir = ofs->workdir->d_inode;
struct dentry *whiteout;
int err;
int flags = 0;
- whiteout = ovl_whiteout(workdir);
+ whiteout = ovl_whiteout(ofs);
err = PTR_ERR(whiteout);
if (IS_ERR(whiteout))
return err;
@@ -715,6 +739,7 @@ static bool ovl_matches_upper(struct den
static int ovl_remove_and_whiteout(struct dentry *dentry,
struct list_head *list)
{
+ struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *workdir = ovl_workdir(dentry);
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
struct dentry *upper;
@@ -748,7 +773,7 @@ static int ovl_remove_and_whiteout(struc
goto out_dput_upper;
}
- err = ovl_cleanup_and_whiteout(workdir, d_inode(upperdir), upper);
+ err = ovl_cleanup_and_whiteout(ofs, d_inode(upperdir), upper);
if (err)
goto out_d_drop;
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -455,7 +455,7 @@ static inline void ovl_copyflags(struct
/* dir.c */
extern const struct inode_operations ovl_dir_inode_operations;
-int ovl_cleanup_and_whiteout(struct dentry *workdir, struct inode *dir,
+int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir,
struct dentry *dentry);
struct ovl_cattr {
dev_t rdev;
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -68,6 +68,7 @@ struct ovl_fs {
/* Did we take the inuse lock? */
bool upperdir_locked;
bool workdir_locked;
+ bool share_whiteout;
/* Traps in ovl inode cache */
struct inode *upperdir_trap;
struct inode *workbasedir_trap;
@@ -77,6 +78,8 @@ struct ovl_fs {
int xino_mode;
/* For allocation of non-persistent inode numbers */
atomic_long_t last_ino;
+ /* Whiteout dentry cache */
+ struct dentry *whiteout;
};
static inline struct ovl_fs *OVL_FS(struct super_block *sb)
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -1154,7 +1154,7 @@ int ovl_indexdir_cleanup(struct ovl_fs *
* Whiteout orphan index to block future open by
* handle after overlay nlink dropped to zero.
*/
- err = ovl_cleanup_and_whiteout(indexdir, dir, index);
+ err = ovl_cleanup_and_whiteout(ofs, dir, index);
} else {
/* Cleanup orphan index entries */
err = ovl_cleanup(dir, index);
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -707,7 +707,8 @@ static void ovl_cleanup_index(struct den
index = NULL;
} else if (ovl_index_all(dentry->d_sb)) {
/* Whiteout orphan index to block future open by handle */
- err = ovl_cleanup_and_whiteout(indexdir, dir, index);
+ err = ovl_cleanup_and_whiteout(OVL_FS(dentry->d_sb),
+ dir, index);
} else {
/* Cleanup orphan index entries */
err = ovl_cleanup(dir, index);
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -217,6 +217,7 @@ static void ovl_free_fs(struct ovl_fs *o
iput(ofs->indexdir_trap);
iput(ofs->workdir_trap);
iput(ofs->upperdir_trap);
+ dput(ofs->whiteout);
dput(ofs->indexdir);
dput(ofs->workdir);
if (ofs->workdir_locked)
@@ -1776,6 +1777,9 @@ static int ovl_fill_super(struct super_b
if (!cred)
goto out_err;
+ /* Is there a reason anyone would want not to share whiteouts? */
+ ofs->share_whiteout = true;
+
ofs->config.index = ovl_index_def;
ofs->config.nfs_export = ovl_nfs_export_def;
ofs->config.xino = ovl_xino_def();
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v4] ovl: whiteout inode sharing
2020-04-28 12:21 ` Miklos Szeredi
@ 2020-04-28 13:15 ` Amir Goldstein
2020-04-28 13:32 ` Miklos Szeredi
0 siblings, 1 reply; 10+ messages in thread
From: Amir Goldstein @ 2020-04-28 13:15 UTC (permalink / raw)
To: Miklos Szeredi; +Cc: Chengguang Xu, overlayfs
On Tue, Apr 28, 2020 at 3:21 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
>
> On Fri, Apr 24, 2020 at 05:49:00PM +0300, Amir Goldstein wrote:
>
> > I didn't mean we need to check if link_max is valid range for upper fs.
> > We anyway use minimum of user requested value and upper fs max.
> >
> > Frankly, I think there are only two sane options for system wide configuration:
> > 1. disable whiteout link
> > 2. enable whiteout link with ofs->workdir->d_sb->s_max_links
>
> And that one doesn't work, see for example ext4, which defines EXT4_LINK_MAX but
> doesn't set s_max_links. This could be fixed, but using EMLINK to detect the
> max-link condition is simpler and more reliable.
>
> And I don't really see a reason to disable whiteout hard links. What scenario
> would that be useful in?
I have a vague memory of e2fsck excessive memory consumption
in face of many hardlinks created by rsync backups.
Now I suppose it was a function of number of files with nlink > 1 and not
a function of nlink itself and could be a non issue for a long time, but I am
just being careful about introducing non-standard setups which may end up
exposing filesystem corner case bugs (near the edge of s_max_links).
Yeh that is very defensive, so I don't mind ignoring that concern and addressing
it in case somebody shouts.
>
> Updated patch below. Changes from v5:
>
> - fix a missing dput on shutdown
> - don't poass workdir to ovl_cleanup_and_whiteout/ovl_whiteout
> - flatten out retry loop in ovl_whiteout
> - use EMLINK to distinguish max-links from misc failure
>
> Thanks,
> Miklos
>
> ---
> From: Chengguang Xu <cgxu519@mykernel.net>
> Subject: ovl: whiteout inode sharing
> Date: Fri, 24 Apr 2020 10:55:17 +0800
>
> Share inode with different whiteout files for saving inode and speeding up
> delete operation.
>
> If EMLINK is encountered when linking a shared whiteout, create a new one.
> In case of any other error, disable sharing for this super block.
>
> Note: ofs->whiteout is protected by inode lock on workdir.
>
> Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
> Reviewed-by: Amir Goldstein <amir73il@gmail.com>
> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> ---
> fs/overlayfs/dir.c | 49 +++++++++++++++++++++++++++++++++++------------
> fs/overlayfs/overlayfs.h | 2 -
> fs/overlayfs/ovl_entry.h | 3 ++
> fs/overlayfs/readdir.c | 2 -
> fs/overlayfs/super.c | 4 +++
> fs/overlayfs/util.c | 3 +-
> 6 files changed, 48 insertions(+), 15 deletions(-)
>
> --- a/fs/overlayfs/dir.c
> +++ b/fs/overlayfs/dir.c
> @@ -62,35 +62,59 @@ struct dentry *ovl_lookup_temp(struct de
> }
>
> /* caller holds i_mutex on workdir */
> -static struct dentry *ovl_whiteout(struct dentry *workdir)
> +static struct dentry *ovl_whiteout(struct ovl_fs *ofs)
> {
> int err;
> struct dentry *whiteout;
> + struct dentry *workdir = ofs->workdir;
> struct inode *wdir = workdir->d_inode;
>
> - whiteout = ovl_lookup_temp(workdir);
> - if (IS_ERR(whiteout))
> - return whiteout;
> + if (!ofs->whiteout) {
> + whiteout = ovl_lookup_temp(workdir);
> + if (IS_ERR(whiteout))
> + return whiteout;
> +
> + err = ovl_do_whiteout(wdir, whiteout);
> + if (err) {
> + dput(whiteout);
> + return ERR_PTR(err);
> + }
> + ofs->whiteout = whiteout;
> + }
>
> - err = ovl_do_whiteout(wdir, whiteout);
> - if (err) {
> + if (ofs->share_whiteout) {
> + whiteout = ovl_lookup_temp(workdir);
> + if (IS_ERR(whiteout))
> + goto fallback;
> +
> + err = ovl_do_link(ofs->whiteout, wdir, whiteout);
> + if (!err)
> + goto success;
> +
> + if (err != -EMLINK) {
> + pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n",
> + ofs->whiteout->d_inode->i_nlink, err);
> + ofs->share_whiteout = false;
> + }
> dput(whiteout);
> - whiteout = ERR_PTR(err);
> }
> -
> +fallback:
> + whiteout = ofs->whiteout;
> + ofs->whiteout = NULL;
> +success:
This label is a bit strange, but fine.
> return whiteout;
> }
>
Thanks,
Amir.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v4] ovl: whiteout inode sharing
2020-04-28 13:15 ` Amir Goldstein
@ 2020-04-28 13:32 ` Miklos Szeredi
2020-04-28 15:15 ` Amir Goldstein
0 siblings, 1 reply; 10+ messages in thread
From: Miklos Szeredi @ 2020-04-28 13:32 UTC (permalink / raw)
To: Amir Goldstein; +Cc: Chengguang Xu, overlayfs
On Tue, Apr 28, 2020 at 3:16 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Tue, Apr 28, 2020 at 3:21 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
> > And I don't really see a reason to disable whiteout hard links. What scenario
> > would that be useful in?
>
> I have a vague memory of e2fsck excessive memory consumption
> in face of many hardlinks created by rsync backups.
> Now I suppose it was a function of number of files with nlink > 1 and not
> a function of nlink itself and could be a non issue for a long time, but I am
> just being careful about introducing non-standard setups which may end up
> exposing filesystem corner case bugs (near the edge of s_max_links).
> Yeh that is very defensive, so I don't mind ignoring that concern and addressing
> it in case somebody shouts.
Right, and even if such a corner case bug exists, it's still primarily
a bug in the underlying filesystem and should be fixed there. A
workaround in overlay would only make sense if for some reason the
primary fix is difficult or impossible.
> > +fallback:
> > + whiteout = ofs->whiteout;
> > + ofs->whiteout = NULL;
> > +success:
>
> This label is a bit strange, but fine.
Agreed, changed to "out:"
Thanks,
Miklos
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v4] ovl: whiteout inode sharing
2020-04-28 13:32 ` Miklos Szeredi
@ 2020-04-28 15:15 ` Amir Goldstein
0 siblings, 0 replies; 10+ messages in thread
From: Amir Goldstein @ 2020-04-28 15:15 UTC (permalink / raw)
To: Miklos Szeredi; +Cc: Chengguang Xu, overlayfs
On Tue, Apr 28, 2020 at 4:32 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
>
> On Tue, Apr 28, 2020 at 3:16 PM Amir Goldstein <amir73il@gmail.com> wrote:
> >
> > On Tue, Apr 28, 2020 at 3:21 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
>
> > > And I don't really see a reason to disable whiteout hard links. What scenario
> > > would that be useful in?
> >
> > I have a vague memory of e2fsck excessive memory consumption
> > in face of many hardlinks created by rsync backups.
> > Now I suppose it was a function of number of files with nlink > 1 and not
> > a function of nlink itself and could be a non issue for a long time, but I am
> > just being careful about introducing non-standard setups which may end up
> > exposing filesystem corner case bugs (near the edge of s_max_links).
> > Yeh that is very defensive, so I don't mind ignoring that concern and addressing
> > it in case somebody shouts.
>
> Right, and even if such a corner case bug exists, it's still primarily
> a bug in the underlying filesystem and should be fixed there. A
> workaround in overlay would only make sense if for some reason the
> primary fix is difficult or impossible.
>
Sure.
> > > +fallback:
> > > + whiteout = ofs->whiteout;
> > > + ofs->whiteout = NULL;
> > > +success:
> >
> > This label is a bit strange, but fine.
>
> Agreed, changed to "out:"
>
I meant no reason to have goto label when you can just return whiteout,
but it's fine either way.
Thanks,
Amir.
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2020-04-28 15:15 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-22 10:27 [PATCH v4] ovl: whiteout inode sharing Chengguang Xu
2020-04-22 11:50 ` Amir Goldstein
2020-04-23 1:17 ` Chengguang Xu
2020-04-24 6:02 ` Amir Goldstein
2020-04-24 6:26 ` Chengguang Xu
2020-04-24 14:49 ` Amir Goldstein
2020-04-28 12:21 ` Miklos Szeredi
2020-04-28 13:15 ` Amir Goldstein
2020-04-28 13:32 ` Miklos Szeredi
2020-04-28 15:15 ` Amir Goldstein
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.