linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* dcache: abstract take_name_snapshot() interface
@ 2020-01-14 15:40 Amir Goldstein
  2020-01-14 16:22 ` Al Viro
  0 siblings, 1 reply; 9+ messages in thread
From: Amir Goldstein @ 2020-01-14 15:40 UTC (permalink / raw)
  To: Jan Kara; +Cc: Al Viro, linux-fsdevel

Generalize the take_name_snapshot()/release_name_snapshot() interface
so it is possible to snapshot either a dentry d_name or its snapshot.

The order of fields d_name and d_inode in struct dentry is swapped
so d_name is adjacent to d_iname.  This does not change struct size
nor cache lines alignment.

Currently, we snapshot the old name in vfs_rename() and we snapshot the
snapshot the dentry name in __fsnotify_parent() and then we pass qstr
to inotify which allocated a variable length event struct and copied the
name.

This new interface allows us to snapshot the name directly into an
fanotify event struct instead of allocating a variable length struct
and copying the name to it.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---

Jan,

This vfs patch is a pre-requisite to fanotify name event patches [1].

I wanted to get it out in advance for wider audience review and in the
hope that you or Al could pick up this patch for v5.6-rc1 in preparation
for the fanotify patches.

Al, any objections?

Thanks,
Amir.


[1] https://github.com/amir73il/linux/commits/fanotify_name

 fs/dcache.c            | 50 ++++++++++++++++++++++++++++++++----------
 fs/debugfs/inode.c     |  4 ++--
 fs/namei.c             |  2 +-
 fs/notify/fsnotify.c   |  2 +-
 fs/overlayfs/export.c  |  2 +-
 include/linux/dcache.h | 29 ++++++++++++++++++------
 6 files changed, 66 insertions(+), 23 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index b280e07e162b..a2b9ff77db18 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -259,9 +259,15 @@ struct external_name {
 	unsigned char name[];
 };
 
+static inline struct external_name *external_name_snap(
+					const struct name_snapshot *name)
+{
+	return container_of(name->name.name, struct external_name, name[0]);
+}
+
 static inline struct external_name *external_name(struct dentry *dentry)
 {
-	return container_of(dentry->d_name.name, struct external_name, name[0]);
+	return external_name_snap(&dentry->d_name_snap);
 }
 
 static void __d_free(struct rcu_head *head)
@@ -278,27 +284,49 @@ static void __d_free_external(struct rcu_head *head)
 	kmem_cache_free(dentry_cache, dentry);
 }
 
+static inline int name_snap_is_external(const struct name_snapshot *name)
+{
+	return name->name.name != name->inline_name;
+}
+
 static inline int dname_external(const struct dentry *dentry)
 {
-	return dentry->d_name.name != dentry->d_iname;
+	return name_snap_is_external(&dentry->d_name_snap);
 }
 
-void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry)
+/*
+ * Snapshot either a dentry d_name or it's snapshot.  When snapshotting a dentry
+ * d_name, caller is responsible that d_name is stable.
+ */
+void take_name_snapshot(struct name_snapshot *name,
+			const struct name_snapshot *orig)
 {
-	spin_lock(&dentry->d_lock);
-	name->name = dentry->d_name;
-	if (unlikely(dname_external(dentry))) {
-		atomic_inc(&external_name(dentry)->u.count);
+	name->name = orig->name;
+	if (unlikely(name_snap_is_external(orig))) {
+		atomic_inc(&external_name_snap(orig)->u.count);
 	} else {
-		memcpy(name->inline_name, dentry->d_iname,
-		       dentry->d_name.len + 1);
+		memcpy(name->inline_name, orig->inline_name,
+		       orig->name.len + 1);
 		name->name.name = name->inline_name;
 	}
+}
+EXPORT_SYMBOL(take_name_snapshot);
+
+void take_dentry_name_snapshot(struct name_snapshot *name,
+			       struct dentry *dentry)
+{
+	BUILD_BUG_ON(offsetof(struct dentry, d_iname) !=
+		     offsetof(struct dentry, d_name_snap.inline_name));
+	BUILD_BUG_ON(sizeof_field(struct dentry, d_iname) !=
+		     sizeof_field(struct dentry, d_name_snap.inline_name));
+
+	spin_lock(&dentry->d_lock);
+	take_name_snapshot(name, &dentry->d_name_snap);
 	spin_unlock(&dentry->d_lock);
 }
 EXPORT_SYMBOL(take_dentry_name_snapshot);
 
-void release_dentry_name_snapshot(struct name_snapshot *name)
+void release_name_snapshot(struct name_snapshot *name)
 {
 	if (unlikely(name->name.name != name->inline_name)) {
 		struct external_name *p;
@@ -307,7 +335,7 @@ void release_dentry_name_snapshot(struct name_snapshot *name)
 			kfree_rcu(p, u.head);
 	}
 }
-EXPORT_SYMBOL(release_dentry_name_snapshot);
+EXPORT_SYMBOL(release_name_snapshot);
 
 static inline void __d_set_inode_and_type(struct dentry *dentry,
 					  struct inode *inode,
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index f4d8df5e4714..149128da7c4c 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -859,14 +859,14 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
 	error = simple_rename(d_inode(old_dir), old_dentry, d_inode(new_dir),
 			      dentry, 0);
 	if (error) {
-		release_dentry_name_snapshot(&old_name);
+		release_name_snapshot(&old_name);
 		goto exit;
 	}
 	d_move(old_dentry, dentry);
 	fsnotify_move(d_inode(old_dir), d_inode(new_dir), &old_name.name,
 		d_is_dir(old_dentry),
 		NULL, old_dentry);
-	release_dentry_name_snapshot(&old_name);
+	release_name_snapshot(&old_name);
 	unlock_rename(new_dir, old_dir);
 	dput(dentry);
 	return old_dentry;
diff --git a/fs/namei.c b/fs/namei.c
index d6c91d1e88cb..de9cb22a95b0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4511,7 +4511,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 				      new_is_dir, NULL, new_dentry);
 		}
 	}
-	release_dentry_name_snapshot(&old_name);
+	release_name_snapshot(&old_name);
 
 	return error;
 }
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index a8b281569bbf..b7665b0e7edd 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -173,7 +173,7 @@ int fsnotify_parent(__u32 mask, const void *data, int data_type)
 
 		take_dentry_name_snapshot(&name, dentry);
 		ret = fsnotify(p_inode, mask, data, data_type, &name.name, 0);
-		release_dentry_name_snapshot(&name);
+		release_name_snapshot(&name);
 	}
 
 	dput(parent);
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 70e55588aedc..fd2856075373 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -400,7 +400,7 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected,
 	}
 
 out:
-	release_dentry_name_snapshot(&name);
+	release_name_snapshot(&name);
 	dput(parent);
 	inode_unlock(dir);
 	return this;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index c1488cc84fd9..8aebca3830a5 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -86,16 +86,34 @@ extern struct dentry_stat_t dentry_stat;
 
 #define d_lock	d_lockref.lock
 
+struct name_snapshot {
+	struct qstr name;
+	unsigned char inline_name[DNAME_INLINE_LEN];
+};
+
 struct dentry {
 	/* RCU lookup touched fields */
 	unsigned int d_flags;		/* protected by d_lock */
 	seqcount_t d_seq;		/* per dentry seqlock */
 	struct hlist_bl_node d_hash;	/* lookup hash list */
 	struct dentry *d_parent;	/* parent directory */
-	struct qstr d_name;
 	struct inode *d_inode;		/* Where the name belongs to - NULL is
 					 * negative */
-	unsigned char d_iname[DNAME_INLINE_LEN];	/* small names */
+	union {
+		struct name_snapshot d_name_snap;
+		/*
+		 * Unfolded replica of the above struct instead of:
+		 *
+		 * #define d_name d_name_snap.name
+		 *
+		 * which isn't popssible anyway because d_name keyword is also
+		 * in use by coda and msdos structs in uapi.
+		 */
+		struct {
+			struct qstr d_name;
+			unsigned char d_iname[DNAME_INLINE_LEN];
+		};
+	};
 
 	/* Ref lookup also touches following */
 	struct lockref d_lockref;	/* per-dentry lock and refcount */
@@ -596,11 +614,8 @@ static inline struct inode *d_real_inode(const struct dentry *dentry)
 	return d_backing_inode(d_real((struct dentry *) dentry, NULL));
 }
 
-struct name_snapshot {
-	struct qstr name;
-	unsigned char inline_name[DNAME_INLINE_LEN];
-};
 void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *);
-void release_dentry_name_snapshot(struct name_snapshot *);
+void take_name_snapshot(struct name_snapshot *, const struct name_snapshot *);
+void release_name_snapshot(struct name_snapshot *);
 
 #endif	/* __LINUX_DCACHE_H */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: dcache: abstract take_name_snapshot() interface
  2020-01-14 15:40 dcache: abstract take_name_snapshot() interface Amir Goldstein
@ 2020-01-14 16:22 ` Al Viro
  2020-01-14 18:06   ` Amir Goldstein
  2020-01-15  5:51   ` Amir Goldstein
  0 siblings, 2 replies; 9+ messages in thread
From: Al Viro @ 2020-01-14 16:22 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Jan Kara, linux-fsdevel

On Tue, Jan 14, 2020 at 05:40:34PM +0200, Amir Goldstein wrote:
> Generalize the take_name_snapshot()/release_name_snapshot() interface
> so it is possible to snapshot either a dentry d_name or its snapshot.
> 
> The order of fields d_name and d_inode in struct dentry is swapped
> so d_name is adjacent to d_iname.  This does not change struct size
> nor cache lines alignment.
> 
> Currently, we snapshot the old name in vfs_rename() and we snapshot the
> snapshot the dentry name in __fsnotify_parent() and then we pass qstr
> to inotify which allocated a variable length event struct and copied the
> name.
> 
> This new interface allows us to snapshot the name directly into an
> fanotify event struct instead of allocating a variable length struct
> and copying the name to it.

Ugh...  That looks like being too damn cute for no good reason.  That
trick with union is _probably_ safe, but I wouldn't bet a dime on
e.g. randomize_layout crap not screwing it over in random subset of
gcc versions.  You are relying upon fairly subtle reading of 6.2.7
and it feels like just the place for layout-mangling plugins to fuck
up.

With -fplan9-extensions we could go for renaming struct name_snapshot
fields and using an anon member in struct dentry -
	...
	struct inode *d_inode;
	struct name_snapshot;	// d_name and d_iname
	struct lockref d_lockref;
	...

but IMO it's much safer to have an explicit

// NOTE: release_dentry_name_snapshot() will be needed for both copies.
clone_name_snapshot(struct name_snapshot *to, const struct name_snapshot *from)
{
	to->name = from->name;
	if (likely(to->name.name == from->inline_name)) {
		memcpy(to->inline_name, from->inline_name,
			to->name.len);
		to->name.name = to->inline_name;
	} else {
                struct external_name *p;
                p = container_of(to->name.name, struct external_name, name[0]);
		atomic_inc(&p->u.count);
	}
}

and be done with that.  Avoids any extensions or tree-wide renamings, etc.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: dcache: abstract take_name_snapshot() interface
  2020-01-14 16:22 ` Al Viro
@ 2020-01-14 18:06   ` Amir Goldstein
  2020-01-14 19:19     ` Al Viro
  2020-01-15  5:51   ` Amir Goldstein
  1 sibling, 1 reply; 9+ messages in thread
From: Amir Goldstein @ 2020-01-14 18:06 UTC (permalink / raw)
  To: Al Viro; +Cc: Jan Kara, linux-fsdevel

On Tue, Jan 14, 2020 at 6:22 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> On Tue, Jan 14, 2020 at 05:40:34PM +0200, Amir Goldstein wrote:
> > Generalize the take_name_snapshot()/release_name_snapshot() interface
> > so it is possible to snapshot either a dentry d_name or its snapshot.
> >
> > The order of fields d_name and d_inode in struct dentry is swapped
> > so d_name is adjacent to d_iname.  This does not change struct size
> > nor cache lines alignment.
> >
> > Currently, we snapshot the old name in vfs_rename() and we snapshot the
> > snapshot the dentry name in __fsnotify_parent() and then we pass qstr
> > to inotify which allocated a variable length event struct and copied the
> > name.
> >
> > This new interface allows us to snapshot the name directly into an
> > fanotify event struct instead of allocating a variable length struct
> > and copying the name to it.
>
> Ugh...  That looks like being too damn cute for no good reason.  That
> trick with union is _probably_ safe, but I wouldn't bet a dime on
> e.g. randomize_layout crap not screwing it over in random subset of
> gcc versions.  You are relying upon fairly subtle reading of 6.2.7
> and it feels like just the place for layout-mangling plugins to fuck
> up.
>
> With -fplan9-extensions we could go for renaming struct name_snapshot
> fields and using an anon member in struct dentry -
>         ...
>         struct inode *d_inode;
>         struct name_snapshot;   // d_name and d_iname
>         struct lockref d_lockref;
>         ...
>
> but IMO it's much safer to have an explicit
>
> // NOTE: release_dentry_name_snapshot() will be needed for both copies.
> clone_name_snapshot(struct name_snapshot *to, const struct name_snapshot *from)
> {
>         to->name = from->name;
>         if (likely(to->name.name == from->inline_name)) {
>                 memcpy(to->inline_name, from->inline_name,
>                         to->name.len);
>                 to->name.name = to->inline_name;
>         } else {
>                 struct external_name *p;
>                 p = container_of(to->name.name, struct external_name, name[0]);
>                 atomic_inc(&p->u.count);
>         }
> }
>
> and be done with that.  Avoids any extensions or tree-wide renamings, etc.

I started with something like this but than in one of the early
revisions I needed
to pass some abstract reference around before cloning the name into the event,
but with my current patches I can get away with a simple:

if (data_type == FANOTIFY_EVENT_NAME)
    clone_name_snapshot(&event->name, data);
else if (dentry)
    take_dentry_name_snapshot(&event->name, dentry);

So that simple interface should be good enough for my needs.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: dcache: abstract take_name_snapshot() interface
  2020-01-14 18:06   ` Amir Goldstein
@ 2020-01-14 19:19     ` Al Viro
  2020-01-14 19:58       ` Amir Goldstein
  0 siblings, 1 reply; 9+ messages in thread
From: Al Viro @ 2020-01-14 19:19 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Jan Kara, linux-fsdevel

On Tue, Jan 14, 2020 at 08:06:56PM +0200, Amir Goldstein wrote:
> > // NOTE: release_dentry_name_snapshot() will be needed for both copies.
> > clone_name_snapshot(struct name_snapshot *to, const struct name_snapshot *from)
> > {
> >         to->name = from->name;
> >         if (likely(to->name.name == from->inline_name)) {
> >                 memcpy(to->inline_name, from->inline_name,
> >                         to->name.len);
> >                 to->name.name = to->inline_name;
> >         } else {
> >                 struct external_name *p;
> >                 p = container_of(to->name.name, struct external_name, name[0]);
> >                 atomic_inc(&p->u.count);
> >         }
> > }
> >
> > and be done with that.  Avoids any extensions or tree-wide renamings, etc.
> 
> I started with something like this but than in one of the early
> revisions I needed
> to pass some abstract reference around before cloning the name into the event,
> but with my current patches I can get away with a simple:
> 
> if (data_type == FANOTIFY_EVENT_NAME)
>     clone_name_snapshot(&event->name, data);
> else if (dentry)
>     take_dentry_name_snapshot(&event->name, dentry);
> 
> So that simple interface should be good enough for my needs.

I really think it would be safer that way; do you want me to throw that into
vfs.git (#work.dcache, perhaps)?  I don't have anything going on in the
vicinity, so it's not likely to cause conflicts either way and nothing I'd
seen posted on fsdevel seems to be likely to step into it, IIRC, so...
Up to you.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: dcache: abstract take_name_snapshot() interface
  2020-01-14 19:19     ` Al Viro
@ 2020-01-14 19:58       ` Amir Goldstein
  2020-01-15  0:03         ` Amir Goldstein
  0 siblings, 1 reply; 9+ messages in thread
From: Amir Goldstein @ 2020-01-14 19:58 UTC (permalink / raw)
  To: Al Viro; +Cc: Jan Kara, linux-fsdevel

On Tue, Jan 14, 2020 at 9:19 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> On Tue, Jan 14, 2020 at 08:06:56PM +0200, Amir Goldstein wrote:
> > > // NOTE: release_dentry_name_snapshot() will be needed for both copies.
> > > clone_name_snapshot(struct name_snapshot *to, const struct name_snapshot *from)
> > > {
> > >         to->name = from->name;
> > >         if (likely(to->name.name == from->inline_name)) {
> > >                 memcpy(to->inline_name, from->inline_name,
> > >                         to->name.len);
> > >                 to->name.name = to->inline_name;
> > >         } else {
> > >                 struct external_name *p;
> > >                 p = container_of(to->name.name, struct external_name, name[0]);
> > >                 atomic_inc(&p->u.count);
> > >         }
> > > }
> > >
> > > and be done with that.  Avoids any extensions or tree-wide renamings, etc.
> >
> > I started with something like this but than in one of the early
> > revisions I needed
> > to pass some abstract reference around before cloning the name into the event,
> > but with my current patches I can get away with a simple:
> >
> > if (data_type == FANOTIFY_EVENT_NAME)
> >     clone_name_snapshot(&event->name, data);
> > else if (dentry)
> >     take_dentry_name_snapshot(&event->name, dentry);
> >
> > So that simple interface should be good enough for my needs.
>
> I really think it would be safer that way; do you want me to throw that into
> vfs.git (#work.dcache, perhaps)?  I don't have anything going on in the
> vicinity, so it's not likely to cause conflicts either way and nothing I'd
> seen posted on fsdevel seems to be likely to step into it, IIRC, so...
> Up to you.

Sure, that would be great.

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: dcache: abstract take_name_snapshot() interface
  2020-01-14 19:58       ` Amir Goldstein
@ 2020-01-15  0:03         ` Amir Goldstein
  2020-01-15  0:09           ` Al Viro
  0 siblings, 1 reply; 9+ messages in thread
From: Amir Goldstein @ 2020-01-15  0:03 UTC (permalink / raw)
  To: Al Viro; +Cc: Jan Kara, linux-fsdevel

On Tue, Jan 14, 2020 at 9:58 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Tue, Jan 14, 2020 at 9:19 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
> >
> > On Tue, Jan 14, 2020 at 08:06:56PM +0200, Amir Goldstein wrote:
> > > > // NOTE: release_dentry_name_snapshot() will be needed for both copies.
> > > > clone_name_snapshot(struct name_snapshot *to, const struct name_snapshot *from)
> > > > {
> > > >         to->name = from->name;
> > > >         if (likely(to->name.name == from->inline_name)) {
> > > >                 memcpy(to->inline_name, from->inline_name,
> > > >                         to->name.len);
> > > >                 to->name.name = to->inline_name;
> > > >         } else {
> > > >                 struct external_name *p;
> > > >                 p = container_of(to->name.name, struct external_name, name[0]);
> > > >                 atomic_inc(&p->u.count);
> > > >         }
> > > > }
> > > >
> > > > and be done with that.  Avoids any extensions or tree-wide renamings, etc.
> > >
> > > I started with something like this but than in one of the early
> > > revisions I needed
> > > to pass some abstract reference around before cloning the name into the event,
> > > but with my current patches I can get away with a simple:
> > >
> > > if (data_type == FANOTIFY_EVENT_NAME)
> > >     clone_name_snapshot(&event->name, data);
> > > else if (dentry)
> > >     take_dentry_name_snapshot(&event->name, dentry);
> > >
> > > So that simple interface should be good enough for my needs.
> >
> > I really think it would be safer that way; do you want me to throw that into
> > vfs.git (#work.dcache, perhaps)?  I don't have anything going on in the
> > vicinity, so it's not likely to cause conflicts either way and nothing I'd
> > seen posted on fsdevel seems to be likely to step into it, IIRC, so...
> > Up to you.
>
> Sure, that would be great.
>

Only problem I forgot about is that I need to propagate name_snapshot
into fsnotify_move() instead of qstr (in order to snapshot it).
That means I will need to snapshot also new_dentry in vfs_rename(), so
I have a name_snapshot to pass into the RENAME_EXCHANGE
fsnotify_move() hook.

My current patch passes the cute &old_dentry->d_name_snap abstract
to fsnotify_move().

What shall I do about that?

        take_dentry_name_snapshot(&new_name, new_dentry);
???

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: dcache: abstract take_name_snapshot() interface
  2020-01-15  0:03         ` Amir Goldstein
@ 2020-01-15  0:09           ` Al Viro
  2020-01-15  5:44             ` Amir Goldstein
  0 siblings, 1 reply; 9+ messages in thread
From: Al Viro @ 2020-01-15  0:09 UTC (permalink / raw)
  To: Amir Goldstein; +Cc: Jan Kara, linux-fsdevel

On Wed, Jan 15, 2020 at 02:03:35AM +0200, Amir Goldstein wrote:

> Only problem I forgot about is that I need to propagate name_snapshot
> into fsnotify_move() instead of qstr (in order to snapshot it).
> That means I will need to snapshot also new_dentry in vfs_rename(), so
> I have a name_snapshot to pass into the RENAME_EXCHANGE
> fsnotify_move() hook.
> 
> My current patch passes the cute &old_dentry->d_name_snap abstract
> to fsnotify_move().
> 
> What shall I do about that?
> 
>         take_dentry_name_snapshot(&new_name, new_dentry);
> ???

Wait a sec...  How long is that thing going to be using the snapshot?
And I bloody well hope that this is _not_ going to be unconditional -
having each rename() pay for *notify is a bad idea, for obvious
reasons.  Details, please...

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: dcache: abstract take_name_snapshot() interface
  2020-01-15  0:09           ` Al Viro
@ 2020-01-15  5:44             ` Amir Goldstein
  0 siblings, 0 replies; 9+ messages in thread
From: Amir Goldstein @ 2020-01-15  5:44 UTC (permalink / raw)
  To: Al Viro; +Cc: Jan Kara, linux-fsdevel

On Wed, Jan 15, 2020 at 2:09 AM Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> On Wed, Jan 15, 2020 at 02:03:35AM +0200, Amir Goldstein wrote:
>
> > Only problem I forgot about is that I need to propagate name_snapshot
> > into fsnotify_move() instead of qstr (in order to snapshot it).
> > That means I will need to snapshot also new_dentry in vfs_rename(), so
> > I have a name_snapshot to pass into the RENAME_EXCHANGE
> > fsnotify_move() hook.
> >
> > My current patch passes the cute &old_dentry->d_name_snap abstract
> > to fsnotify_move().
> >
> > What shall I do about that?
> >
> >         take_dentry_name_snapshot(&new_name, new_dentry);
> > ???
>
> Wait a sec...  How long is that thing going to be using the snapshot?
> And I bloody well hope that this is _not_ going to be unconditional -
> having each rename() pay for *notify is a bad idea, for obvious
> reasons.  Details, please...

Well, if nobody asked to get notifications about entry name changes,
the snapshot is going to stay alive until fsnotify(), same as old_name
with current code.

The addition of new_name snapshot is only needed for the
RENAME_EXCHANGE case and will be conditional to the flag.

If anyone asked for notifications about entry name changes via the new
FAN_DIR_MODIFY [1][2] event, then event will be queued with a clone
of the name_snapshot that will live until user reads the event.

This is similar to the way that fanotify events keep the dentry refcount
elevated until user reads an event (for reporting object fd)

The advantage of snapshotting the name instead of copy, beyond the
obvious is that events can be allocated from a dedicated kmem_cache
and avoid spurious small variable sized allocations.

An argument in favor of allocating a variable size buffer by fanotify to
copy the name is that fanotify accounts event allocations to the group->memcg
of the watcher, so the risk of DoS on the system gets smaller.
One of the less advertised improvements of FAN_REPORT_FID in v5.1
is that events do not keep an elevated refcount on dentries, the way that
they do with 'legacy' fanotify (reporting fd as object id).

FAN_REPORT_FID_NAME will be a little step back in that sense if it
holds a reference on the name.

As you can see, I am not fixed into snapshot or copy the name.
Looking for opinions is one of the reasons I posted this early bird.

Thanks,
Amir.

[1] https://lore.kernel.org/linux-fsdevel/20200108120422.GD20521@quack2.suse.cz/
[2] https://github.com/amir73il/linux/commits/fanotify_name

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: dcache: abstract take_name_snapshot() interface
  2020-01-14 16:22 ` Al Viro
  2020-01-14 18:06   ` Amir Goldstein
@ 2020-01-15  5:51   ` Amir Goldstein
  1 sibling, 0 replies; 9+ messages in thread
From: Amir Goldstein @ 2020-01-15  5:51 UTC (permalink / raw)
  To: Al Viro; +Cc: Jan Kara, linux-fsdevel

On Tue, Jan 14, 2020 at 6:22 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> On Tue, Jan 14, 2020 at 05:40:34PM +0200, Amir Goldstein wrote:
> > Generalize the take_name_snapshot()/release_name_snapshot() interface
> > so it is possible to snapshot either a dentry d_name or its snapshot.
> >
> > The order of fields d_name and d_inode in struct dentry is swapped
> > so d_name is adjacent to d_iname.  This does not change struct size
> > nor cache lines alignment.
> >
> > Currently, we snapshot the old name in vfs_rename() and we snapshot the
> > snapshot the dentry name in __fsnotify_parent() and then we pass qstr
> > to inotify which allocated a variable length event struct and copied the
> > name.
> >
> > This new interface allows us to snapshot the name directly into an
> > fanotify event struct instead of allocating a variable length struct
> > and copying the name to it.
>
> Ugh...  That looks like being too damn cute for no good reason.  That
> trick with union is _probably_ safe, but I wouldn't bet a dime on
> e.g. randomize_layout crap not screwing it over in random subset of
> gcc versions.  You are relying upon fairly subtle reading of 6.2.7
> and it feels like just the place for layout-mangling plugins to fuck
> up.
>
> With -fplan9-extensions we could go for renaming struct name_snapshot
> fields and using an anon member in struct dentry -
>         ...
>         struct inode *d_inode;
>         struct name_snapshot;   // d_name and d_iname
>         struct lockref d_lockref;
>         ...
>
> but IMO it's much safer to have an explicit
>
> // NOTE: release_dentry_name_snapshot() will be needed for both copies.
> clone_name_snapshot(struct name_snapshot *to, const struct name_snapshot *from)
> {
>         to->name = from->name;
>         if (likely(to->name.name == from->inline_name)) {
>                 memcpy(to->inline_name, from->inline_name,
>                         to->name.len);

to->name.len + 1);

>                 to->name.name = to->inline_name;
>         } else {
>                 struct external_name *p;
>                 p = container_of(to->name.name, struct external_name, name[0]);
>                 atomic_inc(&p->u.count);
>         }
> }
>

Thanks,
Amir.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-01-15  5:51 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-14 15:40 dcache: abstract take_name_snapshot() interface Amir Goldstein
2020-01-14 16:22 ` Al Viro
2020-01-14 18:06   ` Amir Goldstein
2020-01-14 19:19     ` Al Viro
2020-01-14 19:58       ` Amir Goldstein
2020-01-15  0:03         ` Amir Goldstein
2020-01-15  0:09           ` Al Viro
2020-01-15  5:44             ` Amir Goldstein
2020-01-15  5:51   ` Amir Goldstein

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).