All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] ovl: require xwhiteout feature flag on layer roots
@ 2024-01-19 10:14 Miklos Szeredi
  2024-01-19 11:08 ` Amir Goldstein
  0 siblings, 1 reply; 14+ messages in thread
From: Miklos Szeredi @ 2024-01-19 10:14 UTC (permalink / raw)
  To: linux-unionfs; +Cc: Alexander Larsson, Amir Goldstein, linux-fsdevel, stable

Add a check on each lower layer for the xwhiteout feature.  This prevents
unnecessary checking the overlay.whiteouts xattr when reading a directory
if this feature is not enabled, i.e. most of the time.

Share the same xattr for the per-directory and the per-layer flag, which
has the effect that if this is enabled for a layer, then the optimization
to bypass checking of individual entries does not work on the root of the
layer.  This was deemed better, than having a separate xattr for the layer
and the directory.

Fixes: bc8df7a3dc03 ("ovl: Add an alternative type of whiteout")
Cc: <stable@vger.kernel.org> # v6.7
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
v2:
 - use overlay.whiteouts instead of overlay.feature_xwhiteout
 - move initialization to ovl_get_layers()
 - xwhiteouts can only be enabled on lower layer

 fs/overlayfs/namei.c     | 10 +++++++---
 fs/overlayfs/overlayfs.h |  7 +++++--
 fs/overlayfs/ovl_entry.h |  2 ++
 fs/overlayfs/readdir.c   | 11 ++++++++---
 fs/overlayfs/super.c     | 13 +++++++++++++
 fs/overlayfs/util.c      |  7 ++++++-
 6 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 03bc8d5dfa31..583cf56df66e 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -863,7 +863,8 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
  * Returns next layer in stack starting from top.
  * Returns -1 if this is the last layer.
  */
-int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
+int ovl_path_next(int idx, struct dentry *dentry, struct path *path,
+		  const struct ovl_layer **layer)
 {
 	struct ovl_entry *oe = OVL_E(dentry);
 	struct ovl_path *lowerstack = ovl_lowerstack(oe);
@@ -871,13 +872,16 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
 	BUG_ON(idx < 0);
 	if (idx == 0) {
 		ovl_path_upper(dentry, path);
-		if (path->dentry)
+		if (path->dentry) {
+			*layer = &OVL_FS(dentry->d_sb)->layers[0];
 			return ovl_numlower(oe) ? 1 : -1;
+		}
 		idx++;
 	}
 	BUG_ON(idx > ovl_numlower(oe));
 	path->dentry = lowerstack[idx - 1].dentry;
-	path->mnt = lowerstack[idx - 1].layer->mnt;
+	*layer = lowerstack[idx - 1].layer;
+	path->mnt = (*layer)->mnt;
 
 	return (idx < ovl_numlower(oe)) ? idx + 1 : -1;
 }
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 05c3dd597fa8..6359cf5c66ff 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -492,7 +492,9 @@ bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
 			      enum ovl_xattr ox);
 bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path);
 bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path);
-bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const struct path *path);
+bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs,
+				     const struct ovl_layer *layer,
+				     const struct path *path);
 bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs,
 			 const struct path *upperpath);
 
@@ -674,7 +676,8 @@ int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
 struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh);
 struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
 				struct dentry *origin, bool verify);
-int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
+int ovl_path_next(int idx, struct dentry *dentry, struct path *path,
+		  const struct ovl_layer **layer);
 int ovl_verify_lowerdata(struct dentry *dentry);
 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 			  unsigned int flags);
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index d82d2a043da2..33fcd3d3af30 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -40,6 +40,8 @@ struct ovl_layer {
 	int idx;
 	/* One fsid per unique underlying sb (upper fsid == 0) */
 	int fsid;
+	/* xwhiteouts are enabled on this layer*/
+	bool xwhiteouts;
 };
 
 struct ovl_path {
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index a490fc47c3e7..c2597075e3f8 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -305,8 +305,6 @@ static inline int ovl_dir_read(const struct path *realpath,
 	if (IS_ERR(realfile))
 		return PTR_ERR(realfile);
 
-	rdd->in_xwhiteouts_dir = rdd->dentry &&
-		ovl_path_check_xwhiteouts_xattr(OVL_FS(rdd->dentry->d_sb), realpath);
 	rdd->first_maybe_whiteout = NULL;
 	rdd->ctx.pos = 0;
 	do {
@@ -359,10 +357,14 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
 		.is_lowest = false,
 	};
 	int idx, next;
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+	const struct ovl_layer *layer;
 
 	for (idx = 0; idx != -1; idx = next) {
-		next = ovl_path_next(idx, dentry, &realpath);
+		next = ovl_path_next(idx, dentry, &realpath, &layer);
 		rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
+		if (ovl_path_check_xwhiteouts_xattr(ofs, layer, &realpath))
+			rdd.in_xwhiteouts_dir = true;
 
 		if (next != -1) {
 			err = ovl_dir_read(&realpath, &rdd);
@@ -568,6 +570,7 @@ static int ovl_dir_read_impure(const struct path *path,  struct list_head *list,
 	int err;
 	struct path realpath;
 	struct ovl_cache_entry *p, *n;
+	struct ovl_fs *ofs = OVL_FS(path->dentry->d_sb);
 	struct ovl_readdir_data rdd = {
 		.ctx.actor = ovl_fill_plain,
 		.list = list,
@@ -577,6 +580,8 @@ static int ovl_dir_read_impure(const struct path *path,  struct list_head *list,
 	INIT_LIST_HEAD(list);
 	*root = RB_ROOT;
 	ovl_path_upper(path->dentry, &realpath);
+	if (ovl_path_check_xwhiteouts_xattr(ofs, &ofs->layers[0], &realpath))
+		rdd.in_xwhiteouts_dir = true;
 
 	err = ovl_dir_read(&realpath, &rdd);
 	if (err)
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index a0967bb25003..04588721eb2a 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1027,6 +1027,7 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
 		struct ovl_fs_context_layer *l = &ctx->lower[i];
 		struct vfsmount *mnt;
 		struct inode *trap;
+		struct path root;
 		int fsid;
 
 		if (i < nr_merged_lower)
@@ -1069,6 +1070,16 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
 		 */
 		mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
 
+		/*
+		 * Check if xwhiteout (xattr whiteout) support is enabled on
+		 * this layer.
+		 */
+		root.mnt = mnt;
+		root.dentry = mnt->mnt_root;
+		err = ovl_path_getxattr(ofs, &root, OVL_XATTR_XWHITEOUTS, NULL, 0);
+		if (err >= 0)
+			layers[ofs->numlayer].xwhiteouts = true;
+
 		layers[ofs->numlayer].trap = trap;
 		layers[ofs->numlayer].mnt = mnt;
 		layers[ofs->numlayer].idx = ofs->numlayer;
@@ -1079,6 +1090,8 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
 		l->name = NULL;
 		ofs->numlayer++;
 		ofs->fs[fsid].is_lower = true;
+
+
 	}
 
 	/*
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index c3f020ca13a8..6c6e6f5893ea 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -739,11 +739,16 @@ bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path)
 	return res >= 0;
 }
 
-bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const struct path *path)
+bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs,
+				     const struct ovl_layer *layer,
+				     const struct path *path)
 {
 	struct dentry *dentry = path->dentry;
 	int res;
 
+	if (!layer->xwhiteouts)
+		return false;
+
 	/* xattr.whiteouts must be a directory */
 	if (!d_is_dir(dentry))
 		return false;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] ovl: require xwhiteout feature flag on layer roots
  2024-01-19 10:14 [PATCH v2] ovl: require xwhiteout feature flag on layer roots Miklos Szeredi
@ 2024-01-19 11:08 ` Amir Goldstein
  2024-01-19 11:20   ` Miklos Szeredi
                     ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Amir Goldstein @ 2024-01-19 11:08 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-unionfs, Alexander Larsson, linux-fsdevel, stable

On Fri, Jan 19, 2024 at 12:14 PM Miklos Szeredi <mszeredi@redhat.com> wrote:
>
> Add a check on each lower layer for the xwhiteout feature.  This prevents
> unnecessary checking the overlay.whiteouts xattr when reading a directory
> if this feature is not enabled, i.e. most of the time.
>
> Share the same xattr for the per-directory and the per-layer flag, which
> has the effect that if this is enabled for a layer, then the optimization
> to bypass checking of individual entries does not work on the root of the
> layer.  This was deemed better, than having a separate xattr for the layer
> and the directory.
>
> Fixes: bc8df7a3dc03 ("ovl: Add an alternative type of whiteout")
> Cc: <stable@vger.kernel.org> # v6.7
> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> ---
> v2:
>  - use overlay.whiteouts instead of overlay.feature_xwhiteout
>  - move initialization to ovl_get_layers()
>  - xwhiteouts can only be enabled on lower layer
>
>  fs/overlayfs/namei.c     | 10 +++++++---
>  fs/overlayfs/overlayfs.h |  7 +++++--
>  fs/overlayfs/ovl_entry.h |  2 ++
>  fs/overlayfs/readdir.c   | 11 ++++++++---
>  fs/overlayfs/super.c     | 13 +++++++++++++
>  fs/overlayfs/util.c      |  7 ++++++-
>  6 files changed, 41 insertions(+), 9 deletions(-)
>
> diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
> index 03bc8d5dfa31..583cf56df66e 100644
> --- a/fs/overlayfs/namei.c
> +++ b/fs/overlayfs/namei.c
> @@ -863,7 +863,8 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
>   * Returns next layer in stack starting from top.
>   * Returns -1 if this is the last layer.
>   */
> -int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
> +int ovl_path_next(int idx, struct dentry *dentry, struct path *path,
> +                 const struct ovl_layer **layer)
>  {
>         struct ovl_entry *oe = OVL_E(dentry);
>         struct ovl_path *lowerstack = ovl_lowerstack(oe);
> @@ -871,13 +872,16 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
>         BUG_ON(idx < 0);
>         if (idx == 0) {
>                 ovl_path_upper(dentry, path);
> -               if (path->dentry)
> +               if (path->dentry) {
> +                       *layer = &OVL_FS(dentry->d_sb)->layers[0];
>                         return ovl_numlower(oe) ? 1 : -1;
> +               }
>                 idx++;
>         }
>         BUG_ON(idx > ovl_numlower(oe));
>         path->dentry = lowerstack[idx - 1].dentry;
> -       path->mnt = lowerstack[idx - 1].layer->mnt;
> +       *layer = lowerstack[idx - 1].layer;
> +       path->mnt = (*layer)->mnt;
>
>         return (idx < ovl_numlower(oe)) ? idx + 1 : -1;
>  }
> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> index 05c3dd597fa8..6359cf5c66ff 100644
> --- a/fs/overlayfs/overlayfs.h
> +++ b/fs/overlayfs/overlayfs.h
> @@ -492,7 +492,9 @@ bool ovl_path_check_dir_xattr(struct ovl_fs *ofs, const struct path *path,
>                               enum ovl_xattr ox);
>  bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct path *path);
>  bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path);
> -bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const struct path *path);
> +bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs,
> +                                    const struct ovl_layer *layer,
> +                                    const struct path *path);
>  bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs *ofs,
>                          const struct path *upperpath);
>
> @@ -674,7 +676,8 @@ int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
>  struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh);
>  struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
>                                 struct dentry *origin, bool verify);
> -int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
> +int ovl_path_next(int idx, struct dentry *dentry, struct path *path,
> +                 const struct ovl_layer **layer);
>  int ovl_verify_lowerdata(struct dentry *dentry);
>  struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
>                           unsigned int flags);
> diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
> index d82d2a043da2..33fcd3d3af30 100644
> --- a/fs/overlayfs/ovl_entry.h
> +++ b/fs/overlayfs/ovl_entry.h
> @@ -40,6 +40,8 @@ struct ovl_layer {
>         int idx;
>         /* One fsid per unique underlying sb (upper fsid == 0) */
>         int fsid;
> +       /* xwhiteouts are enabled on this layer*/
> +       bool xwhiteouts;
>  };
>
>  struct ovl_path {
> diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
> index a490fc47c3e7..c2597075e3f8 100644
> --- a/fs/overlayfs/readdir.c
> +++ b/fs/overlayfs/readdir.c
> @@ -305,8 +305,6 @@ static inline int ovl_dir_read(const struct path *realpath,
>         if (IS_ERR(realfile))
>                 return PTR_ERR(realfile);
>
> -       rdd->in_xwhiteouts_dir = rdd->dentry &&
> -               ovl_path_check_xwhiteouts_xattr(OVL_FS(rdd->dentry->d_sb), realpath);
>         rdd->first_maybe_whiteout = NULL;
>         rdd->ctx.pos = 0;
>         do {
> @@ -359,10 +357,14 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
>                 .is_lowest = false,
>         };
>         int idx, next;
> +       struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
> +       const struct ovl_layer *layer;
>
>         for (idx = 0; idx != -1; idx = next) {
> -               next = ovl_path_next(idx, dentry, &realpath);
> +               next = ovl_path_next(idx, dentry, &realpath, &layer);
>                 rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
> +               if (ovl_path_check_xwhiteouts_xattr(ofs, layer, &realpath))
> +                       rdd.in_xwhiteouts_dir = true;
>
>                 if (next != -1) {
>                         err = ovl_dir_read(&realpath, &rdd);
> @@ -568,6 +570,7 @@ static int ovl_dir_read_impure(const struct path *path,  struct list_head *list,
>         int err;
>         struct path realpath;
>         struct ovl_cache_entry *p, *n;
> +       struct ovl_fs *ofs = OVL_FS(path->dentry->d_sb);
>         struct ovl_readdir_data rdd = {
>                 .ctx.actor = ovl_fill_plain,
>                 .list = list,
> @@ -577,6 +580,8 @@ static int ovl_dir_read_impure(const struct path *path,  struct list_head *list,
>         INIT_LIST_HEAD(list);
>         *root = RB_ROOT;
>         ovl_path_upper(path->dentry, &realpath);
> +       if (ovl_path_check_xwhiteouts_xattr(ofs, &ofs->layers[0], &realpath))
> +               rdd.in_xwhiteouts_dir = true;

Not needed since we do not support xwhiteouts on upper.

>
>         err = ovl_dir_read(&realpath, &rdd);
>         if (err)
> diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
> index a0967bb25003..04588721eb2a 100644
> --- a/fs/overlayfs/super.c
> +++ b/fs/overlayfs/super.c
> @@ -1027,6 +1027,7 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
>                 struct ovl_fs_context_layer *l = &ctx->lower[i];
>                 struct vfsmount *mnt;
>                 struct inode *trap;
> +               struct path root;
>                 int fsid;
>
>                 if (i < nr_merged_lower)
> @@ -1069,6 +1070,16 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
>                  */
>                 mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
>
> +               /*
> +                * Check if xwhiteout (xattr whiteout) support is enabled on
> +                * this layer.
> +                */
> +               root.mnt = mnt;
> +               root.dentry = mnt->mnt_root;
> +               err = ovl_path_getxattr(ofs, &root, OVL_XATTR_XWHITEOUTS, NULL, 0);
> +               if (err >= 0)
> +                       layers[ofs->numlayer].xwhiteouts = true;
> +
>                 layers[ofs->numlayer].trap = trap;
>                 layers[ofs->numlayer].mnt = mnt;
>                 layers[ofs->numlayer].idx = ofs->numlayer;
> @@ -1079,6 +1090,8 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
>                 l->name = NULL;
>                 ofs->numlayer++;
>                 ofs->fs[fsid].is_lower = true;
> +
> +

extra spaces.

>         }
>
>         /*
> diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
> index c3f020ca13a8..6c6e6f5893ea 100644
> --- a/fs/overlayfs/util.c
> +++ b/fs/overlayfs/util.c
> @@ -739,11 +739,16 @@ bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const struct path *path)
>         return res >= 0;
>  }
>
> -bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const struct path *path)
> +bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs,
> +                                    const struct ovl_layer *layer,
> +                                    const struct path *path)
>  {
>         struct dentry *dentry = path->dentry;
>         int res;
>
> +       if (!layer->xwhiteouts)
> +               return false;
> +
>         /* xattr.whiteouts must be a directory */
>         if (!d_is_dir(dentry))
>                 return false;
> --
> 2.43.0
>

Do you want me to fix/test and send this to Linus?

Alex, can we add your RVB to v2?

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] ovl: require xwhiteout feature flag on layer roots
  2024-01-19 11:08 ` Amir Goldstein
@ 2024-01-19 11:20   ` Miklos Szeredi
  2024-01-19 12:19     ` Amir Goldstein
  2024-01-19 12:29   ` Alexander Larsson
  2024-01-19 16:35   ` Alexander Larsson
  2 siblings, 1 reply; 14+ messages in thread
From: Miklos Szeredi @ 2024-01-19 11:20 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: Miklos Szeredi, linux-unionfs, Alexander Larsson, linux-fsdevel, stable

On Fri, 19 Jan 2024 at 12:08, Amir Goldstein <amir73il@gmail.com> wrote:

> > @@ -577,6 +580,8 @@ static int ovl_dir_read_impure(const struct path *path,  struct list_head *list,
> >         INIT_LIST_HEAD(list);
> >         *root = RB_ROOT;
> >         ovl_path_upper(path->dentry, &realpath);
> > +       if (ovl_path_check_xwhiteouts_xattr(ofs, &ofs->layers[0], &realpath))
> > +               rdd.in_xwhiteouts_dir = true;
>
> Not needed since we do not support xwhiteouts on upper.

Right.

> > @@ -1079,6 +1090,8 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
> >                 l->name = NULL;
> >                 ofs->numlayer++;
> >                 ofs->fs[fsid].is_lower = true;
> > +
> > +
>
> extra spaces.

Sorry, missing self review...

> Do you want me to fix/test and send this to Linus?

Yes please, if it's not a problem four you.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] ovl: require xwhiteout feature flag on layer roots
  2024-01-19 11:20   ` Miklos Szeredi
@ 2024-01-19 12:19     ` Amir Goldstein
  0 siblings, 0 replies; 14+ messages in thread
From: Amir Goldstein @ 2024-01-19 12:19 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: Miklos Szeredi, linux-unionfs, Alexander Larsson, linux-fsdevel, stable

On Fri, Jan 19, 2024 at 1:20 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
>
> On Fri, 19 Jan 2024 at 12:08, Amir Goldstein <amir73il@gmail.com> wrote:
>
> > > @@ -577,6 +580,8 @@ static int ovl_dir_read_impure(const struct path *path,  struct list_head *list,
> > >         INIT_LIST_HEAD(list);
> > >         *root = RB_ROOT;
> > >         ovl_path_upper(path->dentry, &realpath);
> > > +       if (ovl_path_check_xwhiteouts_xattr(ofs, &ofs->layers[0], &realpath))
> > > +               rdd.in_xwhiteouts_dir = true;
> >
> > Not needed since we do not support xwhiteouts on upper.
>
> Right.
>
> > > @@ -1079,6 +1090,8 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
> > >                 l->name = NULL;
> > >                 ofs->numlayer++;
> > >                 ofs->fs[fsid].is_lower = true;
> > > +
> > > +
> >
> > extra spaces.
>
> Sorry, missing self review...
>
> > Do you want me to fix/test and send this to Linus?
>
> Yes please, if it's not a problem four you.
>

ok. queued.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] ovl: require xwhiteout feature flag on layer roots
  2024-01-19 11:08 ` Amir Goldstein
  2024-01-19 11:20   ` Miklos Szeredi
@ 2024-01-19 12:29   ` Alexander Larsson
  2024-01-19 14:12     ` Amir Goldstein
  2024-01-19 16:35   ` Alexander Larsson
  2 siblings, 1 reply; 14+ messages in thread
From: Alexander Larsson @ 2024-01-19 12:29 UTC (permalink / raw)
  To: Amir Goldstein, Miklos Szeredi; +Cc: linux-unionfs, linux-fsdevel, stable

On Fri, 2024-01-19 at 13:08 +0200, Amir Goldstein wrote:
> On Fri, Jan 19, 2024 at 12:14 PM Miklos Szeredi <mszeredi@redhat.com>
> wrote:
> > 
> > Add a check on each lower layer for the xwhiteout feature.  This
> > prevents
> > unnecessary checking the overlay.whiteouts xattr when reading a
> > directory
> > if this feature is not enabled, i.e. most of the time.
> > 
> > Share the same xattr for the per-directory and the per-layer flag,
> > which
> > has the effect that if this is enabled for a layer, then the
> > optimization
> > to bypass checking of individual entries does not work on the root
> > of the
> > layer.  This was deemed better, than having a separate xattr for
> > the layer
> > and the directory.
> > 
> > Fixes: bc8df7a3dc03 ("ovl: Add an alternative type of whiteout")
> > Cc: <stable@vger.kernel.org> # v6.7
> > Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> > ---
> > v2:
> >  - use overlay.whiteouts instead of overlay.feature_xwhiteout
> >  - move initialization to ovl_get_layers()
> >  - xwhiteouts can only be enabled on lower layer
> > 
> >  fs/overlayfs/namei.c     | 10 +++++++---
> >  fs/overlayfs/overlayfs.h |  7 +++++--
> >  fs/overlayfs/ovl_entry.h |  2 ++
> >  fs/overlayfs/readdir.c   | 11 ++++++++---
> >  fs/overlayfs/super.c     | 13 +++++++++++++
> >  fs/overlayfs/util.c      |  7 ++++++-
> >  6 files changed, 41 insertions(+), 9 deletions(-)
> > 
> > diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
> > index 03bc8d5dfa31..583cf56df66e 100644
> > --- a/fs/overlayfs/namei.c
> > +++ b/fs/overlayfs/namei.c
> > @@ -863,7 +863,8 @@ struct dentry *ovl_lookup_index(struct ovl_fs
> > *ofs, struct dentry *upper,
> >   * Returns next layer in stack starting from top.
> >   * Returns -1 if this is the last layer.
> >   */
> > -int ovl_path_next(int idx, struct dentry *dentry, struct path
> > *path)
> > +int ovl_path_next(int idx, struct dentry *dentry, struct path
> > *path,
> > +                 const struct ovl_layer **layer)
> >  {
> >         struct ovl_entry *oe = OVL_E(dentry);
> >         struct ovl_path *lowerstack = ovl_lowerstack(oe);
> > @@ -871,13 +872,16 @@ int ovl_path_next(int idx, struct dentry
> > *dentry, struct path *path)
> >         BUG_ON(idx < 0);
> >         if (idx == 0) {
> >                 ovl_path_upper(dentry, path);
> > -               if (path->dentry)
> > +               if (path->dentry) {
> > +                       *layer = &OVL_FS(dentry->d_sb)->layers[0];
> >                         return ovl_numlower(oe) ? 1 : -1;
> > +               }
> >                 idx++;
> >         }
> >         BUG_ON(idx > ovl_numlower(oe));
> >         path->dentry = lowerstack[idx - 1].dentry;
> > -       path->mnt = lowerstack[idx - 1].layer->mnt;
> > +       *layer = lowerstack[idx - 1].layer;
> > +       path->mnt = (*layer)->mnt;
> > 
> >         return (idx < ovl_numlower(oe)) ? idx + 1 : -1;
> >  }
> > diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> > index 05c3dd597fa8..6359cf5c66ff 100644
> > --- a/fs/overlayfs/overlayfs.h
> > +++ b/fs/overlayfs/overlayfs.h
> > @@ -492,7 +492,9 @@ bool ovl_path_check_dir_xattr(struct ovl_fs
> > *ofs, const struct path *path,
> >                               enum ovl_xattr ox);
> >  bool ovl_path_check_origin_xattr(struct ovl_fs *ofs, const struct
> > path *path);
> >  bool ovl_path_check_xwhiteout_xattr(struct ovl_fs *ofs, const
> > struct path *path);
> > -bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const
> > struct path *path);
> > +bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs,
> > +                                    const struct ovl_layer *layer,
> > +                                    const struct path *path);
> >  bool ovl_init_uuid_xattr(struct super_block *sb, struct ovl_fs
> > *ofs,
> >                          const struct path *upperpath);
> > 
> > @@ -674,7 +676,8 @@ int ovl_get_index_name(struct ovl_fs *ofs,
> > struct dentry *origin,
> >  struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh
> > *fh);
> >  struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry
> > *upper,
> >                                 struct dentry *origin, bool
> > verify);
> > -int ovl_path_next(int idx, struct dentry *dentry, struct path
> > *path);
> > +int ovl_path_next(int idx, struct dentry *dentry, struct path
> > *path,
> > +                 const struct ovl_layer **layer);
> >  int ovl_verify_lowerdata(struct dentry *dentry);
> >  struct dentry *ovl_lookup(struct inode *dir, struct dentry
> > *dentry,
> >                           unsigned int flags);
> > diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
> > index d82d2a043da2..33fcd3d3af30 100644
> > --- a/fs/overlayfs/ovl_entry.h
> > +++ b/fs/overlayfs/ovl_entry.h
> > @@ -40,6 +40,8 @@ struct ovl_layer {
> >         int idx;
> >         /* One fsid per unique underlying sb (upper fsid == 0) */
> >         int fsid;
> > +       /* xwhiteouts are enabled on this layer*/
> > +       bool xwhiteouts;
> >  };
> > 
> >  struct ovl_path {
> > diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
> > index a490fc47c3e7..c2597075e3f8 100644
> > --- a/fs/overlayfs/readdir.c
> > +++ b/fs/overlayfs/readdir.c
> > @@ -305,8 +305,6 @@ static inline int ovl_dir_read(const struct
> > path *realpath,
> >         if (IS_ERR(realfile))
> >                 return PTR_ERR(realfile);
> > 
> > -       rdd->in_xwhiteouts_dir = rdd->dentry &&
> > -               ovl_path_check_xwhiteouts_xattr(OVL_FS(rdd->dentry-
> > >d_sb), realpath);
> >         rdd->first_maybe_whiteout = NULL;
> >         rdd->ctx.pos = 0;
> >         do {
> > @@ -359,10 +357,14 @@ static int ovl_dir_read_merged(struct dentry
> > *dentry, struct list_head *list,
> >                 .is_lowest = false,
> >         };
> >         int idx, next;
> > +       struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
> > +       const struct ovl_layer *layer;
> > 
> >         for (idx = 0; idx != -1; idx = next) {
> > -               next = ovl_path_next(idx, dentry, &realpath);
> > +               next = ovl_path_next(idx, dentry, &realpath,
> > &layer);
> >                 rdd.is_upper = ovl_dentry_upper(dentry) ==
> > realpath.dentry;
> > +               if (ovl_path_check_xwhiteouts_xattr(ofs, layer,
> > &realpath))
> > +                       rdd.in_xwhiteouts_dir = true;
> > 
> >                 if (next != -1) {
> >                         err = ovl_dir_read(&realpath, &rdd);
> > @@ -568,6 +570,7 @@ static int ovl_dir_read_impure(const struct
> > path *path,  struct list_head *list,
> >         int err;
> >         struct path realpath;
> >         struct ovl_cache_entry *p, *n;
> > +       struct ovl_fs *ofs = OVL_FS(path->dentry->d_sb);
> >         struct ovl_readdir_data rdd = {
> >                 .ctx.actor = ovl_fill_plain,
> >                 .list = list,
> > @@ -577,6 +580,8 @@ static int ovl_dir_read_impure(const struct
> > path *path,  struct list_head *list,
> >         INIT_LIST_HEAD(list);
> >         *root = RB_ROOT;
> >         ovl_path_upper(path->dentry, &realpath);
> > +       if (ovl_path_check_xwhiteouts_xattr(ofs, &ofs->layers[0],
> > &realpath))
> > +               rdd.in_xwhiteouts_dir = true;
> 
> Not needed since we do not support xwhiteouts on upper.
> 
> > 
> >         err = ovl_dir_read(&realpath, &rdd);
> >         if (err)
> > diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
> > index a0967bb25003..04588721eb2a 100644
> > --- a/fs/overlayfs/super.c
> > +++ b/fs/overlayfs/super.c
> > @@ -1027,6 +1027,7 @@ static int ovl_get_layers(struct super_block
> > *sb, struct ovl_fs *ofs,
> >                 struct ovl_fs_context_layer *l = &ctx->lower[i];
> >                 struct vfsmount *mnt;
> >                 struct inode *trap;
> > +               struct path root;
> >                 int fsid;
> > 
> >                 if (i < nr_merged_lower)
> > @@ -1069,6 +1070,16 @@ static int ovl_get_layers(struct super_block
> > *sb, struct ovl_fs *ofs,
> >                  */
> >                 mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
> > 
> > +               /*
> > +                * Check if xwhiteout (xattr whiteout) support is
> > enabled on
> > +                * this layer.
> > +                */
> > +               root.mnt = mnt;
> > +               root.dentry = mnt->mnt_root;
> > +               err = ovl_path_getxattr(ofs, &root,
> > OVL_XATTR_XWHITEOUTS, NULL, 0);
> > +               if (err >= 0)
> > +                       layers[ofs->numlayer].xwhiteouts = true;
> > +
> >                 layers[ofs->numlayer].trap = trap;
> >                 layers[ofs->numlayer].mnt = mnt;
> >                 layers[ofs->numlayer].idx = ofs->numlayer;
> > @@ -1079,6 +1090,8 @@ static int ovl_get_layers(struct super_block
> > *sb, struct ovl_fs *ofs,
> >                 l->name = NULL;
> >                 ofs->numlayer++;
> >                 ofs->fs[fsid].is_lower = true;
> > +
> > +
> 
> extra spaces.
> 
> >         }
> > 
> >         /*
> > diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
> > index c3f020ca13a8..6c6e6f5893ea 100644
> > --- a/fs/overlayfs/util.c
> > +++ b/fs/overlayfs/util.c
> > @@ -739,11 +739,16 @@ bool ovl_path_check_xwhiteout_xattr(struct
> > ovl_fs *ofs, const struct path *path)
> >         return res >= 0;
> >  }
> > 
> > -bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs, const
> > struct path *path)
> > +bool ovl_path_check_xwhiteouts_xattr(struct ovl_fs *ofs,
> > +                                    const struct ovl_layer *layer,
> > +                                    const struct path *path)
> >  {
> >         struct dentry *dentry = path->dentry;
> >         int res;
> > 
> > +       if (!layer->xwhiteouts)
> > +               return false;
> > +
> >         /* xattr.whiteouts must be a directory */
> >         if (!d_is_dir(dentry))
> >                 return false;
> > --
> > 2.43.0
> > 
> 
> Do you want me to fix/test and send this to Linus?
> 
> Alex, can we add your RVB to v2?
> 

Yeah, other that your comments this looks good to me (and I tested it
here too).

Reviewed-By: Alexander Larsson <alex@redhat.com>

I'll have a look at doing the required changes in composefs.

-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
=-=-=
 Alexander Larsson                                            Red Hat,
Inc 
       alexl@redhat.com            alexander.larsson@gmail.com 
He's a Nobel prize-winning voodoo matador haunted by memories of 'Nam. 
She's an orphaned gypsy bodyguard on the trail of a serial killer. They
fight crime! 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] ovl: require xwhiteout feature flag on layer roots
  2024-01-19 12:29   ` Alexander Larsson
@ 2024-01-19 14:12     ` Amir Goldstein
  0 siblings, 0 replies; 14+ messages in thread
From: Amir Goldstein @ 2024-01-19 14:12 UTC (permalink / raw)
  To: Alexander Larsson; +Cc: Miklos Szeredi, linux-unionfs, linux-fsdevel, stable

> > Alex, can we add your RVB to v2?
> >
>
> Yeah, other that your comments this looks good to me (and I tested it
> here too).
>
> Reviewed-By: Alexander Larsson <alex@redhat.com>
>

Added.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] ovl: require xwhiteout feature flag on layer roots
  2024-01-19 11:08 ` Amir Goldstein
  2024-01-19 11:20   ` Miklos Szeredi
  2024-01-19 12:29   ` Alexander Larsson
@ 2024-01-19 16:35   ` Alexander Larsson
  2024-01-19 19:06     ` Amir Goldstein
  2024-01-22  8:38     ` Amir Goldstein
  2 siblings, 2 replies; 14+ messages in thread
From: Alexander Larsson @ 2024-01-19 16:35 UTC (permalink / raw)
  To: Amir Goldstein, Miklos Szeredi; +Cc: linux-unionfs, linux-fsdevel, stable

On Fri, 2024-01-19 at 13:08 +0200, Amir Goldstein wrote:
> On Fri, Jan 19, 2024 at 12:14 PM Miklos Szeredi <mszeredi@redhat.com>
> wrote:
> 
> 
> Do you want me to fix/test and send this to Linus?
> 
> Alex, can we add your RVB to v2?

I ran into an issue converting composefs to use this.

Suppose we have a chroot of files containing some upper dirs and we
want to make a composefs of this. For example, say
/foo/lower/dir/whiteout is a traditional whiteout.

Previously, what happened is that I marked the whiteout file with
trusted.overlay.overlay.whiteout, and the /foo/lower/dir with
trusted.overlay.overlay.whiteouts.

Them when I mounted then entire chroot with overlayfs these xattrs
would get unescaped and I would get a $mnt/foo/lower/dir/whiteout with
a trusted.overlay.whiteout xattr, and a $mnt/foo/lower/dir with a
trusted.overlay.whiteout. When I then mounted another overlayfs with a
lowerdir of $mnt/foo/lower it would treat the whiteout as a xwhiteout.

However, now I need the lowerdir toplevel dir to also have a 
trusted.overlay.whiteouts xattr. But when I'm converting the entire
chroot I do not know which of the directories is going to be used as
the toplevel lower dir, so I don't know where to put this marker.

The only solution I see is to put it on *all* parent directories. Is
there a better approach here?

-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
=-=-=
 Alexander Larsson                                            Red Hat,
Inc 
       alexl@redhat.com            alexander.larsson@gmail.com 
He's a scrappy day-dreaming inventor looking for a cure to the poison 
coursing through his veins. She's a foxy snooty stripper with someone 
else's memories. They fight crime! 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] ovl: require xwhiteout feature flag on layer roots
  2024-01-19 16:35   ` Alexander Larsson
@ 2024-01-19 19:06     ` Amir Goldstein
  2024-01-19 20:29       ` Miklos Szeredi
  2024-01-22  8:38     ` Amir Goldstein
  1 sibling, 1 reply; 14+ messages in thread
From: Amir Goldstein @ 2024-01-19 19:06 UTC (permalink / raw)
  To: Alexander Larsson; +Cc: Miklos Szeredi, linux-unionfs, linux-fsdevel, stable

On Fri, Jan 19, 2024 at 6:35 PM Alexander Larsson <alexl@redhat.com> wrote:
>
> On Fri, 2024-01-19 at 13:08 +0200, Amir Goldstein wrote:
> > On Fri, Jan 19, 2024 at 12:14 PM Miklos Szeredi <mszeredi@redhat.com>
> > wrote:
> >
> >
> > Do you want me to fix/test and send this to Linus?
> >
> > Alex, can we add your RVB to v2?
>
> I ran into an issue converting composefs to use this.
>
> Suppose we have a chroot of files containing some upper dirs and we
> want to make a composefs of this. For example, say
> /foo/lower/dir/whiteout is a traditional whiteout.
>
> Previously, what happened is that I marked the whiteout file with
> trusted.overlay.overlay.whiteout, and the /foo/lower/dir with
> trusted.overlay.overlay.whiteouts.
>
> Them when I mounted then entire chroot with overlayfs these xattrs
> would get unescaped and I would get a $mnt/foo/lower/dir/whiteout with
> a trusted.overlay.whiteout xattr, and a $mnt/foo/lower/dir with a
> trusted.overlay.whiteout. When I then mounted another overlayfs with a
> lowerdir of $mnt/foo/lower it would treat the whiteout as a xwhiteout.
>
> However, now I need the lowerdir toplevel dir to also have a
> trusted.overlay.whiteouts xattr. But when I'm converting the entire
> chroot I do not know which of the directories is going to be used as
> the toplevel lower dir, so I don't know where to put this marker.
>
> The only solution I see is to put it on *all* parent directories. Is
> there a better approach here?

How about checking xwhiteouts xattrs along with impure and
origin xattrs in ovl_get_inode()?

Then there will be no overhead in readdir and no need for
marking the layer root?

Miklos, would that be acceptable?

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] ovl: require xwhiteout feature flag on layer roots
  2024-01-19 19:06     ` Amir Goldstein
@ 2024-01-19 20:29       ` Miklos Szeredi
  2024-01-20 10:32         ` Amir Goldstein
  0 siblings, 1 reply; 14+ messages in thread
From: Miklos Szeredi @ 2024-01-19 20:29 UTC (permalink / raw)
  To: Amir Goldstein
  Cc: Alexander Larsson, Miklos Szeredi, linux-unionfs, linux-fsdevel, stable

On Fri, 19 Jan 2024 at 20:06, Amir Goldstein <amir73il@gmail.com> wrote:

> How about checking xwhiteouts xattrs along with impure and
> origin xattrs in ovl_get_inode()?
>
> Then there will be no overhead in readdir and no need for
> marking the layer root?
>
> Miklos, would that be acceptable?

It's certainly a good idea, but doesn't really address my worry.  The
minor performance impact is not what bothers me most.  It's the fact
that in the common case the result of these calls are discarded.
That's just plain ugly, IMO.

My preferred alternative would be a mount option.  Amir, Alex, would
you both be okay with that?

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] ovl: require xwhiteout feature flag on layer roots
  2024-01-19 20:29       ` Miklos Szeredi
@ 2024-01-20 10:32         ` Amir Goldstein
  2024-01-22 10:01           ` Alexander Larsson
  0 siblings, 1 reply; 14+ messages in thread
From: Amir Goldstein @ 2024-01-20 10:32 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: Alexander Larsson, Miklos Szeredi, linux-unionfs, linux-fsdevel, stable

[-- Attachment #1: Type: text/plain, Size: 3082 bytes --]

On Fri, Jan 19, 2024 at 10:30 PM Miklos Szeredi <miklos@szeredi.hu> wrote:
>
> On Fri, 19 Jan 2024 at 20:06, Amir Goldstein <amir73il@gmail.com> wrote:
>
> > How about checking xwhiteouts xattrs along with impure and
> > origin xattrs in ovl_get_inode()?
> >

That was not a very good suggestion on my part.
ovl_get_inode() only checks impure xattr on upper dir and origin xattr
on non-merge non-multi lower dir.

If we change the location of xwhiteout xattr check, it should be in
ovl_lookup_single() next to checking opaque xattr, which makes
me think - hey, why don't we overload opaque xattr, just like we
did with metacopy xattr?

An overlay.opaque xattr with empty string means "may have xwhiteouts"
and that is backward compatible, because ovl_is_opaquedir() checks
for xattr of length 1.

The only extra getxattr() needed would be for the d->last case...

> > Then there will be no overhead in readdir and no need for
> > marking the layer root?
> >
> > Miklos, would that be acceptable?
>
> It's certainly a good idea, but doesn't really address my worry.  The
> minor performance impact is not what bothers me most.  It's the fact
> that in the common case the result of these calls are discarded.
> That's just plain ugly, IMO.

...so the question boils down to, whether you find it too ugly
to always getxattr(opaque) on lookup of the last lower layer and
whether you find the overloading of opaque xattr too hacky?

As a precedent, we *always* check metacopy xattr in last lower layer
to check for error conditions, even if user did not opt-in to metacopy
at all, while we could just as easily have ignored it.

>
> My preferred alternative would be a mount option.  Amir, Alex, would
> you both be okay with that?
>

I think I had suggested that escaped private xattrs would also require
an opt-in mount option, but Alex explained that the users mounting the
overlay are not always aware of the fact that the layers were composed
this way, but I admit that I do not remember all the exact details.

Alex, do I remember correctly that the overlay instance where xwhiteouts
needs to be checked does NOT necessarily have a lowerdata layers?
The composefs instance with lowerdata layers is the one exposing the
(escaped) xwhiteout entries as xwhiteouts. Is that correct?

Is there even a use case for xwhiteouts NOT inside another lower
overlayfs?

If we limit the check for xwhiteouts only to nested overlayfs, then maybe
Miklos will care less about an extra getxattr on lookup?

Attached patch implements both xwhiteout and opaque checks during
lookup - we can later choose only one of them to keep.

Note that is changes the optimization to per-dentry, not per-layer,
so in the common case (no layers have xwhiteouts) xwhiteouts will not
be checked, but if xwhiteouts exist in any lower layer in the stack, then
xwhiteouts will be checked in all the layers of the merged dir (*).

(*) still need to optimize away lookup of xwhiteouts in upperdir.

Let me know what you think.

Thanks,
Amir.

[-- Attachment #2: v3-0001-fsnotify-optimize-the-case-of-no-content-event-wa.patch --]
[-- Type: text/x-patch, Size: 8777 bytes --]

From 7fa21951647ae24296cd0602f1291d00a8fa84d0 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 9 Dec 2022 11:50:26 +0200
Subject: [PATCH v3] fsnotify: optimize the case of no content event watchers

Commit e43de7f0862b ("fsnotify: optimize the case of no marks of any type")
optimized the case where there are no fsnotify watchers on any of the
filesystem's objects.

It is quite common for a system to have a single local filesystem and
it is quite common for the system to have some inotify watches on some
config files or directories, so the optimization of no marks at all is
often not in effect.

Content event (i.e. access,modify) watchers on sb/mount more rare, so
optimizing the case of no sb/mount marks with content events can improve
performance for more systems, especially for performance sensitive io
workloads.

Unless a parent inode is watching, check for content events in masks of
sb/mount/inode masks early to optimize out the code in __fsnotify_parent()
and fsnotify() for fsnotify access/modify hooks.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/notify/fsnotify.c             | 26 ++++++++++++++++++++++++++
 fs/notify/mark.c                 | 26 +++++++++++++++++++++++++-
 include/linux/fs.h               |  1 +
 include/linux/fsnotify.h         | 21 ++++++++++++---------
 include/linux/fsnotify_backend.h | 13 +++++++++++++
 5 files changed, 77 insertions(+), 10 deletions(-)

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 7974e91ffe13..b96251cdd165 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -237,6 +237,32 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data,
 }
 EXPORT_SYMBOL_GPL(__fsnotify_parent);
 
+/* Notify sb/mount/inode/parent watchers of this path */
+int fsnotify_path(const struct path *path, __u32 mask)
+{
+	struct dentry *dentry = path->dentry;
+
+	if (!fsnotify_sb_has_watchers(dentry->d_sb))
+		return 0;
+
+	/* Optimize the likely case of sb/mount/inode not watching content */
+	if (mask & FSNOTIFY_CONTENT_EVENTS &&
+	    likely(!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))) {
+		__u32 marks_mask = d_inode(dentry)->i_fsnotify_mask |
+				   real_mount(path->mnt)->mnt_fsnotify_mask |
+				   dentry->d_sb->s_fsnotify_mask;
+
+		if (!(mask & marks_mask))
+			return 0;
+	}
+
+	/*
+	 * fsnotify_parent() checks the event masks of sb/mount/inode/parent.
+	 */
+	return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH);
+}
+EXPORT_SYMBOL_GPL(fsnotify_path);
+
 static int fsnotify_handle_inode_event(struct fsnotify_group *group,
 				       struct fsnotify_mark *inode_mark,
 				       u32 mask, const void *data, int data_type,
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index d6944ff86ffa..d0e208913881 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -153,9 +153,30 @@ static struct inode *fsnotify_update_iref(struct fsnotify_mark_connector *conn,
 	return inode;
 }
 
+/*
+ * To avoid the performance penalty of rare case of sb/mount content event
+ * watchers in the hot io path, taint sb if such watchers are added.
+ */
+static void fsnotify_update_sb_watchers(struct fsnotify_mark_connector *conn,
+					u32 old_mask, u32 new_mask)
+{
+	struct super_block *sb = fsnotify_connector_sb(conn);
+	u32 new_watchers = new_mask & ~old_mask & FSNOTIFY_CONTENT_EVENTS;
+
+	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE || !sb || !new_watchers)
+		return;
+
+	/*
+	 * TODO: We need to take sb conn->lock to set FS_MNT_CONTENT_WATCHED
+	 * in sb->s_fsnotify_mask, but if this is a recalc of mount mark mask,
+	 * it is not sure that we have an sb connector attached yet.
+	 */
+	sb->s_iflags |= SB_I_CONTENT_WATCHED;
+}
+
 static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 {
-	u32 new_mask = 0;
+	u32 old_mask, new_mask = 0;
 	bool want_iref = false;
 	struct fsnotify_mark *mark;
 
@@ -163,6 +184,7 @@ static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 	/* We can get detached connector here when inode is getting unlinked. */
 	if (!fsnotify_valid_obj_type(conn->type))
 		return NULL;
+	old_mask = fsnotify_conn_mask(conn);
 	hlist_for_each_entry(mark, &conn->list, obj_list) {
 		if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED))
 			continue;
@@ -173,6 +195,8 @@ static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 	}
 	*fsnotify_conn_mask_p(conn) = new_mask;
 
+	fsnotify_update_sb_watchers(conn, old_mask, new_mask);
+
 	return fsnotify_update_iref(conn, want_iref);
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e6ba0cc6f2ee..dac36fe139e1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1173,6 +1173,7 @@ extern int send_sigurg(struct fown_struct *fown);
 #define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */
 #define SB_I_RETIRED	0x00000800	/* superblock shouldn't be reused */
 #define SB_I_NOUMASK	0x00001000	/* VFS does not apply umask */
+#define SB_I_CONTENT_WATCHED 0x00002000 /* fsnotify file content monitor */
 
 /* Possible states of 'frozen' field */
 enum {
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 11e6434b8e71..2e0f47648a8b 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -17,6 +17,12 @@
 #include <linux/slab.h>
 #include <linux/bug.h>
 
+/* Are there any inode/mount/sb objects that are being watched? */
+static inline bool fsnotify_sb_has_watchers(struct super_block *sb)
+{
+	return atomic_long_read(&sb->s_fsnotify_connectors);
+}
+
 /*
  * Notify this @dir inode about a change in a child directory entry.
  * The directory entry may have turned positive or negative or its inode may
@@ -30,7 +36,7 @@ static inline int fsnotify_name(__u32 mask, const void *data, int data_type,
 				struct inode *dir, const struct qstr *name,
 				u32 cookie)
 {
-	if (atomic_long_read(&dir->i_sb->s_fsnotify_connectors) == 0)
+	if (!fsnotify_sb_has_watchers(dir->i_sb))
 		return 0;
 
 	return fsnotify(mask, data, data_type, dir, name, NULL, cookie);
@@ -44,7 +50,7 @@ static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry,
 
 static inline void fsnotify_inode(struct inode *inode, __u32 mask)
 {
-	if (atomic_long_read(&inode->i_sb->s_fsnotify_connectors) == 0)
+	if (!fsnotify_sb_has_watchers(inode->i_sb))
 		return;
 
 	if (S_ISDIR(inode->i_mode))
@@ -59,9 +65,6 @@ static inline int fsnotify_parent(struct dentry *dentry, __u32 mask,
 {
 	struct inode *inode = d_inode(dentry);
 
-	if (atomic_long_read(&inode->i_sb->s_fsnotify_connectors) == 0)
-		return 0;
-
 	if (S_ISDIR(inode->i_mode)) {
 		mask |= FS_ISDIR;
 
@@ -86,18 +89,18 @@ static inline int fsnotify_parent(struct dentry *dentry, __u32 mask,
  */
 static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask)
 {
+	if (!fsnotify_sb_has_watchers(dentry->d_sb))
+		return;
+
 	fsnotify_parent(dentry, mask, dentry, FSNOTIFY_EVENT_DENTRY);
 }
 
 static inline int fsnotify_file(struct file *file, __u32 mask)
 {
-	const struct path *path;
-
 	if (file->f_mode & FMODE_NONOTIFY)
 		return 0;
 
-	path = &file->f_path;
-	return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH);
+	return fsnotify_path(&file->f_path, mask);
 }
 
 /*
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 7f63be5ca0f1..4e098bdeb3ca 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -66,6 +66,11 @@
 #define FS_RENAME		0x10000000	/* File was renamed */
 #define FS_DN_MULTISHOT		0x20000000	/* dnotify multishot */
 #define FS_ISDIR		0x40000000	/* event occurred against dir */
+/*
+ * This flag is set in the object interest mask of sb to indicate that
+ * some mount mark is interested to get content events.
+ */
+#define FS_MNT_CONTENT_WATCHED	0x80000000
 
 #define FS_MOVE			(FS_MOVED_FROM | FS_MOVED_TO)
 
@@ -77,6 +82,13 @@
  */
 #define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | FS_RENAME)
 
+/* Content events can be used to inspect file content */
+#define FSNOTIFY_CONTENT_PERM_EVENTS	(FS_ACCESS_PERM)
+
+/* Content events can be used to monitor file content */
+#define FSNOTIFY_CONTENT_EVENTS		(FS_ACCESS | FS_MODIFY | \
+					 FSNOTIFY_CONTENT_PERM_EVENTS)
+
 #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \
 				  FS_OPEN_EXEC_PERM)
 
@@ -541,6 +553,7 @@ struct fsnotify_mark {
 extern int fsnotify(__u32 mask, const void *data, int data_type,
 		    struct inode *dir, const struct qstr *name,
 		    struct inode *inode, u32 cookie);
+extern int fsnotify_path(const struct path *path, __u32 mask);
 extern int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data,
 			   int data_type);
 extern void __fsnotify_inode_delete(struct inode *inode);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] ovl: require xwhiteout feature flag on layer roots
  2024-01-19 16:35   ` Alexander Larsson
  2024-01-19 19:06     ` Amir Goldstein
@ 2024-01-22  8:38     ` Amir Goldstein
  2024-01-22 10:44       ` Alexander Larsson
  1 sibling, 1 reply; 14+ messages in thread
From: Amir Goldstein @ 2024-01-22  8:38 UTC (permalink / raw)
  To: Alexander Larsson; +Cc: Miklos Szeredi, linux-unionfs, linux-fsdevel, stable

On Fri, Jan 19, 2024 at 6:35 PM Alexander Larsson <alexl@redhat.com> wrote:
>
> On Fri, 2024-01-19 at 13:08 +0200, Amir Goldstein wrote:
> > On Fri, Jan 19, 2024 at 12:14 PM Miklos Szeredi <mszeredi@redhat.com>
> > wrote:
> >
> >
> > Do you want me to fix/test and send this to Linus?
> >
> > Alex, can we add your RVB to v2?
>
> I ran into an issue converting composefs to use this.
>
> Suppose we have a chroot of files containing some upper dirs and we
> want to make a composefs of this. For example, say
> /foo/lower/dir/whiteout is a traditional whiteout.
>
> Previously, what happened is that I marked the whiteout file with
> trusted.overlay.overlay.whiteout, and the /foo/lower/dir with
> trusted.overlay.overlay.whiteouts.
>
> Them when I mounted then entire chroot with overlayfs these xattrs
> would get unescaped and I would get a $mnt/foo/lower/dir/whiteout with
> a trusted.overlay.whiteout xattr, and a $mnt/foo/lower/dir with a
> trusted.overlay.whiteout. When I then mounted another overlayfs with a
> lowerdir of $mnt/foo/lower it would treat the whiteout as a xwhiteout.
>
> However, now I need the lowerdir toplevel dir to also have a
> trusted.overlay.whiteouts xattr. But when I'm converting the entire
> chroot I do not know which of the directories is going to be used as
> the toplevel lower dir, so I don't know where to put this marker.
>
> The only solution I see is to put it on *all* parent directories. Is
> there a better approach here?
>

Alex,

As you can see, I posted v3 with an alternative approach that would not
require marking all possible lower layer roots.

However, I cannot help wondering if it wouldn't be better practice, when
composing layers, to always be explicit, per-directory about whether the
composed directory is a "base" or a "diff" layer.

Isn't this information always known at composing time?

In legacy overlayfs, there is an explicit mark for "this is a base dir" -
namely, the opaque xattr, but there is no such explicit mark on
directories without an entry with the same name in layers below them.

The lack of explicit mark "merge" vs. "opaque" in all directories in all
the layers had led to problems in the past, for example, this is the
reason that this fix was needed:

  b79e05aaa166 ovl: no direct iteration for dir with origin xattr

In conclusion, since composefs is the first tool, that I know of, to
compose "non-legacy" overlayfs layers (i.e. with overlay xattrs),
I think the correct design decision would mark every directory in
every layer explicitly as at exactly one of "merge"/"opaque".

Note that non-dir are always marked explicitly as "metacopy",
so there is no ambiguity with non-dirs and we also error out
if a non-dir stack does not end with an "opaque" entry.

Additionally, when composing layers, since all the children of
a directory should be explicitly marked as "merge" vs. "opaque"
then the parent's "impure" (meaning contains "merge" children)
can also be set at composing time.

Failing to set "impure" correctly when composing layers could
result in wrong readdir d_ino results.

My proposition in v3 for an explicit mark was to
"reinterpret the opaque xattr from boolean to enum".
My proposal included only the states 'y' (opaque) and 'x' (contains
xwhiteouts), but for composefs, I would extend this to also mark
a merge dir explicitly with opaque='n' and explicitly mark all the
directories in a "base layer" with opaque='y'.

Implementation-wise, composefs could start by marking each directory
with either 'y'/'n' state based on the lowerstack, and if xwhiteout entries
are added, 'n' state could be changed to 'x' state.

What do you think?

Does it make sense from composefs POV?
Am I correct to assume that at composing time, every directory
state is known (base 'y' vs. diff 'n')?

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] ovl: require xwhiteout feature flag on layer roots
  2024-01-20 10:32         ` Amir Goldstein
@ 2024-01-22 10:01           ` Alexander Larsson
  0 siblings, 0 replies; 14+ messages in thread
From: Alexander Larsson @ 2024-01-22 10:01 UTC (permalink / raw)
  To: Amir Goldstein, Miklos Szeredi
  Cc: Miklos Szeredi, linux-unionfs, linux-fsdevel, stable

On Sat, 2024-01-20 at 12:32 +0200, Amir Goldstein wrote:
> On Fri, Jan 19, 2024 at 10:30 PM Miklos Szeredi <miklos@szeredi.hu>
> wrote:
> > 
> > On Fri, 19 Jan 2024 at 20:06, Amir Goldstein <amir73il@gmail.com>
> > wrote:
> > 
> > > How about checking xwhiteouts xattrs along with impure and
> > > origin xattrs in ovl_get_inode()?
> > > 
> 
> That was not a very good suggestion on my part.
> ovl_get_inode() only checks impure xattr on upper dir and origin
> xattr
> on non-merge non-multi lower dir.
> 
> If we change the location of xwhiteout xattr check, it should be in
> ovl_lookup_single() next to checking opaque xattr, which makes
> me think - hey, why don't we overload opaque xattr, just like we
> did with metacopy xattr?
> 
> An overlay.opaque xattr with empty string means "may have xwhiteouts"
> and that is backward compatible, because ovl_is_opaquedir() checks
> for xattr of length 1.
> 
> The only extra getxattr() needed would be for the d->last case...
> 
> > > Then there will be no overhead in readdir and no need for
> > > marking the layer root?
> > > 
> > > Miklos, would that be acceptable?
> > 
> > It's certainly a good idea, but doesn't really address my worry. 
> > The
> > minor performance impact is not what bothers me most.  It's the
> > fact
> > that in the common case the result of these calls are discarded.
> > That's just plain ugly, IMO.
> 
> ...so the question boils down to, whether you find it too ugly
> to always getxattr(opaque) on lookup of the last lower layer and
> whether you find the overloading of opaque xattr too hacky?
> 
> As a precedent, we *always* check metacopy xattr in last lower layer
> to check for error conditions, even if user did not opt-in to
> metacopy
> at all, while we could just as easily have ignored it.
> 
> > 
> > My preferred alternative would be a mount option.  Amir, Alex,
> > would
> > you both be okay with that?
> > 
> 
> I think I had suggested that escaped private xattrs would also
> require
> an opt-in mount option, but Alex explained that the users mounting
> the
> overlay are not always aware of the fact that the layers were
> composed
> this way, but I admit that I do not remember all the exact details.
> 
> Alex, do I remember correctly that the overlay instance where
> xwhiteouts
> needs to be checked does NOT necessarily have a lowerdata layers?
> The composefs instance with lowerdata layers is the one exposing the
> (escaped) xwhiteout entries as xwhiteouts. Is that correct?
> 
> Is there even a use case for xwhiteouts NOT inside another lower
> overlayfs?

No. Strictly speaking regular whiteouts are always preferable to
xwhiteouts (as they work for both user and system ovl mounts and are
supported by older kernels and existing software, etc). The only place
where xwhiteouts are useful is when we need to escape them to put
inside an overlayfs mount.

The best way to think about the composefs usecase is: 

  Suppose you had a pre-existing system image that someone installed a
multi layer container image in. This means that somewhere inside this
image is a set of lowerdirs, and one of them may have a traditional
whiteout. Now we want to create an overlayfs mount that when used,
works just like the above image would work, including when you mount
the sub-overlayfs mounts from the container image lowerdirs. 


Fallout of this:

The composefs overlay lowerdir will need to contain escaped xwhiteouts,
so that the mount will have unescaped xwhiteouts. These escaped
whiteouts can be anywhere, even in a "single layer" overlayfs. But the
unescaped xwhiteouts will never be in a lowermost lowerdir.

In the composefs case we will be using lowerdata for the outer
overlayfs, but the actual unescaped xwhiteouts in the container image
lowerdirs don't need to have a lowerdata involved at all.

The container mount will be done by pre-existing software (say docker)
that isn't aware that we converted the regular whiteouts to xwhiteouts,
so having to use a mount option is not ideal (would require docker
changes).

> If we limit the check for xwhiteouts only to nested overlayfs, then
> maybe
> Miklos will care less about an extra getxattr on lookup?
> 
> Attached patch implements both xwhiteout and opaque checks during
> lookup - we can later choose only one of them to keep.

Seems like you attached the wrong patch, but I will comment on the
other patch you sent to the list.

> Note that is changes the optimization to per-dentry, not per-layer,
> so in the common case (no layers have xwhiteouts) xwhiteouts will not
> be checked, but if xwhiteouts exist in any lower layer in the stack,
> then
> xwhiteouts will be checked in all the layers of the merged dir (*).
> 
> (*) still need to optimize away lookup of xwhiteouts in upperdir.
> 
> Let me know what you think.
> 
> Thanks,
> Amir.

-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
=-=-=
 Alexander Larsson                                            Red Hat,
Inc 
       alexl@redhat.com            alexander.larsson@gmail.com 
He's a benighted bohemian waffle chef on the wrong side of the law.
She's 
an enchanted junkie safe cracker from the wrong side of the tracks.
They 
fight crime! 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] ovl: require xwhiteout feature flag on layer roots
  2024-01-22  8:38     ` Amir Goldstein
@ 2024-01-22 10:44       ` Alexander Larsson
  2024-01-22 11:30         ` Amir Goldstein
  0 siblings, 1 reply; 14+ messages in thread
From: Alexander Larsson @ 2024-01-22 10:44 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Miklos Szeredi, linux-unionfs, linux-fsdevel, stable

On Mon, 2024-01-22 at 10:38 +0200, Amir Goldstein wrote:
> On Fri, Jan 19, 2024 at 6:35 PM Alexander Larsson <alexl@redhat.com>
> wrote:
> > 
> > On Fri, 2024-01-19 at 13:08 +0200, Amir Goldstein wrote:
> > > On Fri, Jan 19, 2024 at 12:14 PM Miklos Szeredi
> > > <mszeredi@redhat.com>
> > > wrote:
> > > 
> > > 
> > > Do you want me to fix/test and send this to Linus?
> > > 
> > > Alex, can we add your RVB to v2?
> > 
> > I ran into an issue converting composefs to use this.
> > 
> > Suppose we have a chroot of files containing some upper dirs and we
> > want to make a composefs of this. For example, say
> > /foo/lower/dir/whiteout is a traditional whiteout.
> > 
> > Previously, what happened is that I marked the whiteout file with
> > trusted.overlay.overlay.whiteout, and the /foo/lower/dir with
> > trusted.overlay.overlay.whiteouts.
> > 
> > Them when I mounted then entire chroot with overlayfs these xattrs
> > would get unescaped and I would get a $mnt/foo/lower/dir/whiteout
> > with
> > a trusted.overlay.whiteout xattr, and a $mnt/foo/lower/dir with a
> > trusted.overlay.whiteout. When I then mounted another overlayfs
> > with a
> > lowerdir of $mnt/foo/lower it would treat the whiteout as a
> > xwhiteout.
> > 
> > However, now I need the lowerdir toplevel dir to also have a
> > trusted.overlay.whiteouts xattr. But when I'm converting the entire
> > chroot I do not know which of the directories is going to be used
> > as
> > the toplevel lower dir, so I don't know where to put this marker.
> > 
> > The only solution I see is to put it on *all* parent directories.
> > Is
> > there a better approach here?
> > 
> 
> Alex,
> 
> As you can see, I posted v3 with an alternative approach that would
> not
> require marking all possible lower layer roots.
> 
> However, I cannot help wondering if it wouldn't be better practice,
> when
> composing layers, to always be explicit, per-directory about whether
> the
> composed directory is a "base" or a "diff" layer.
> 
> Isn't this information always known at composing time?

Currently, composefs images are not layered as such. They normally only
have one or more lowerdata layers, and then the actual image as a
single lowerdir, and on top of that an optional upper if you want some
kind of writability. 

But, when composing the composefs the content of the image is opaque to
us. We're just given a directory with some files in it for the image.
It might contain some other lowerdirs, but the details are not know to
us at compose time.

That said, I can see that in some cases it may make sense to use
multiple lower dirs, for example when using composefs for multi-layer
container images. When generating such layers we typically have the
lower levels available, so we could probably extract the base/dir
status for each directory.

> In legacy overlayfs, there is an explicit mark for "this is a base
> dir" -
> namely, the opaque xattr, but there is no such explicit mark on
> directories without an entry with the same name in layers below them.
> 
> The lack of explicit mark "merge" vs. "opaque" in all directories in
> all
> the layers had led to problems in the past, for example, this is the
> reason that this fix was needed:
> 
>   b79e05aaa166 ovl: no direct iteration for dir with origin xattr
> 
> In conclusion, since composefs is the first tool, that I know of, to
> compose "non-legacy" overlayfs layers (i.e. with overlay xattrs),
> I think the correct design decision would mark every directory in
> every layer explicitly as at exactly one of "merge"/"opaque".
> 
> Note that non-dir are always marked explicitly as "metacopy",
> so there is no ambiguity with non-dirs and we also error out
> if a non-dir stack does not end with an "opaque" entry.
> 
> Additionally, when composing layers, since all the children of
> a directory should be explicitly marked as "merge" vs. "opaque"
> then the parent's "impure" (meaning contains "merge" children)
> can also be set at composing time.
> 
> Failing to set "impure" correctly when composing layers could
> result in wrong readdir d_ino results.
> 
> My proposition in v3 for an explicit mark was to
> "reinterpret the opaque xattr from boolean to enum".
> My proposal included only the states 'y' (opaque) and 'x' (contains
> xwhiteouts), but for composefs, I would extend this to also mark
> a merge dir explicitly with opaque='n' and explicitly mark all the
> directories in a "base layer" with opaque='y'.
> 
> Implementation-wise, composefs could start by marking each directory
> with either 'y'/'n' state based on the lowerstack, and if xwhiteout
> entries
> are added, 'n' state could be changed to 'x' state.

We never add xwhiteout entries to the composefs, all directories in the
base layer would be overlay=n or y, and any directory containing an
escaped xwhiteout would have an escaped opaque xattr with 'x'.

> What do you think?

I feel it may be overkill, as most composefs images would be a one-
layer thing, and adding opaque=n to every directory in the lowermost
layer would just waste space and makes little sense (being in the
lowest layer means we already know if the directory is merged or not).
However, I think it may make sense to be able to mark non-lowest-layer
directories with either n or y.

> Does it make sense from composefs POV?
> Am I correct to assume that at composing time, every directory
> state is known (base 'y' vs. diff 'n')?
> 
> Thanks,
> Amir.
> 

-- 
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
=-=-=
 Alexander Larsson                                            Red Hat,
Inc 
       alexl@redhat.com            alexander.larsson@gmail.com 
He's a globe-trotting crooked cyborg in drag. She's an elegant 
streetsmart socialite prone to fits of savage, blood-crazed rage. They 
fight crime! 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] ovl: require xwhiteout feature flag on layer roots
  2024-01-22 10:44       ` Alexander Larsson
@ 2024-01-22 11:30         ` Amir Goldstein
  0 siblings, 0 replies; 14+ messages in thread
From: Amir Goldstein @ 2024-01-22 11:30 UTC (permalink / raw)
  To: Alexander Larsson; +Cc: Miklos Szeredi, linux-unionfs, linux-fsdevel, stable

> > Alex,
> >
> > As you can see, I posted v3 with an alternative approach that would
> > not
> > require marking all possible lower layer roots.
> >
> > However, I cannot help wondering if it wouldn't be better practice,
> > when
> > composing layers, to always be explicit, per-directory about whether
> > the
> > composed directory is a "base" or a "diff" layer.
> >
> > Isn't this information always known at composing time?
>
> Currently, composefs images are not layered as such. They normally only
> have one or more lowerdata layers, and then the actual image as a
> single lowerdir, and on top of that an optional upper if you want some
> kind of writability.
>
> But, when composing the composefs the content of the image is opaque to
> us. We're just given a directory with some files in it for the image.
> It might contain some other lowerdirs, but the details are not know to
> us at compose time.
>

Got it, though I may need you to explain this again to me next time ;)

If we were to change the tools that pack/extract overlayfs images to
mark directories more explicitly, we would need to change other tools,
not composefs.

composefs has no knowledge of the fact that it is packing an overlayfs
image, until it is asked to pack a whiteout or a file/dir that happens to
have an overlay.* xattr, but lower layers do not typically have to contain
files/dir with overlay.* xattrs.

> However, I think it may make sense to be able to mark non-lowest-layer
> directories with either n or y.

There is nothing stopping you from setting opaque=y on lowest layer dirs
or setting opaque=n on merge dirs when packing composefs.
Old kernels will not be bothered by these marks.

However, I forgot about the consideration of xattr lookup performance that
was the drive for erofs xattr bloom filter support.

I guess erofs will pack much smaller without those explicit annotations and
getxattr(opaque) will have better performance for no xattr case then with
opaque=n.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2024-01-22 11:30 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-19 10:14 [PATCH v2] ovl: require xwhiteout feature flag on layer roots Miklos Szeredi
2024-01-19 11:08 ` Amir Goldstein
2024-01-19 11:20   ` Miklos Szeredi
2024-01-19 12:19     ` Amir Goldstein
2024-01-19 12:29   ` Alexander Larsson
2024-01-19 14:12     ` Amir Goldstein
2024-01-19 16:35   ` Alexander Larsson
2024-01-19 19:06     ` Amir Goldstein
2024-01-19 20:29       ` Miklos Szeredi
2024-01-20 10:32         ` Amir Goldstein
2024-01-22 10:01           ` Alexander Larsson
2024-01-22  8:38     ` Amir Goldstein
2024-01-22 10:44       ` Alexander Larsson
2024-01-22 11:30         ` Amir Goldstein

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.