linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/5] kernfs: add exportfs operations
@ 2017-05-22 22:53 Shaohua Li
  2017-05-22 22:53 ` [PATCH 1/5] kernfs: implement i_generation Shaohua Li
                   ` (6 more replies)
  0 siblings, 7 replies; 22+ messages in thread
From: Shaohua Li @ 2017-05-22 22:53 UTC (permalink / raw)
  To: linux-kernel; +Cc: tj, gregkh, viro, Kernel-team

Hi,

The goal isn't to export kernfs to NFS. The intention is to make tracing cgroup
aware. To do this, tracing will record an id for cgroup and use the id to find
cgroup name later. The best id is the cgroup directory inode number. Further to
filter out stale cgroup directory, fhandle is the best to identify a cgroup. So
this is what this series try to do.

Thanks,
Shaohua

Shaohua Li (5):
  kernfs: implement i_generation
  kernfs: use idr instead of ida to manage inode number
  kernfs: add an API to get kernfs node from inode number
  kernfs: don't set dentry->d_fsdata
  kernfs: add exportfs operations

 fs/kernfs/dir.c             | 76 +++++++++++++++++++++++++++++++++------------
 fs/kernfs/file.c            |  6 ++--
 fs/kernfs/inode.c           |  7 +++--
 fs/kernfs/kernfs-internal.h |  4 +++
 fs/kernfs/mount.c           | 67 ++++++++++++++++++++++++++++++++++-----
 fs/kernfs/symlink.c         |  2 +-
 include/linux/kernfs.h      |  4 ++-
 7 files changed, 131 insertions(+), 35 deletions(-)

-- 
2.9.3

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 1/5] kernfs: implement i_generation
  2017-05-22 22:53 [PATCH 0/5] kernfs: add exportfs operations Shaohua Li
@ 2017-05-22 22:53 ` Shaohua Li
  2017-05-23  7:41   ` Christoph Hellwig
  2017-05-22 22:53 ` [PATCH 2/5] kernfs: use idr instead of ida to manage inode number Shaohua Li
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 22+ messages in thread
From: Shaohua Li @ 2017-05-22 22:53 UTC (permalink / raw)
  To: linux-kernel; +Cc: tj, gregkh, viro, Kernel-team

Set i_generation for kernfs inod. This is required to implement exportfs
operations.

Signed-off-by: Shaohua Li <shli@fb.com>
---
 fs/kernfs/dir.c        | 2 ++
 fs/kernfs/inode.c      | 1 +
 include/linux/kernfs.h | 2 ++
 3 files changed, 5 insertions(+)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index db5900aaa..09d093e 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -634,6 +634,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
 	if (ret < 0)
 		goto err_out2;
 	kn->ino = ret;
+	kn->generation = atomic_inc_return(&root->next_generation);
 
 	atomic_set(&kn->count, 1);
 	atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
@@ -877,6 +878,7 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
 
 	ida_init(&root->ino_ida);
 	INIT_LIST_HEAD(&root->supers);
+	atomic_set(&root->next_generation, 0);
 
 	kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO,
 			       KERNFS_DIR);
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index fb4b4a7..79cdae4 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -220,6 +220,7 @@ static void kernfs_init_inode(struct kernfs_node *kn, struct inode *inode)
 	inode->i_private = kn;
 	inode->i_mapping->a_ops = &kernfs_aops;
 	inode->i_op = &kernfs_iops;
+	inode->i_generation = kn->generation;
 
 	set_default_inode_attr(inode, kn->mode);
 	kernfs_refresh_inode(kn, inode);
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index a9b11b8..c5f0fa7 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -135,6 +135,7 @@ struct kernfs_node {
 	umode_t			mode;
 	unsigned int		ino;
 	struct kernfs_iattrs	*iattr;
+	u32			generation;
 };
 
 /*
@@ -170,6 +171,7 @@ struct kernfs_root {
 	struct list_head	supers;
 
 	wait_queue_head_t	deactivate_waitq;
+	atomic_t		next_generation;
 };
 
 struct kernfs_open_file {
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 2/5] kernfs: use idr instead of ida to manage inode number
  2017-05-22 22:53 [PATCH 0/5] kernfs: add exportfs operations Shaohua Li
  2017-05-22 22:53 ` [PATCH 1/5] kernfs: implement i_generation Shaohua Li
@ 2017-05-22 22:53 ` Shaohua Li
  2017-05-22 22:53 ` [PATCH 3/5] kernfs: add an API to get kernfs node from " Shaohua Li
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 22+ messages in thread
From: Shaohua Li @ 2017-05-22 22:53 UTC (permalink / raw)
  To: linux-kernel; +Cc: tj, gregkh, viro, Kernel-team

kernfs uses ida to manage inode number. The problem is we can't get
kernfs_node from inode number with ida. Switching to use idr, next patch
will add an API to get kernfs_node from inode number.

Signed-off-by: Shaohua Li <shli@fb.com>
---
 fs/kernfs/dir.c        | 17 ++++++++++++-----
 include/linux/kernfs.h |  2 +-
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 09d093e..8e8545a 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -21,6 +21,7 @@
 DEFINE_MUTEX(kernfs_mutex);
 static DEFINE_SPINLOCK(kernfs_rename_lock);	/* kn->parent and ->name */
 static char kernfs_pr_cont_buf[PATH_MAX];	/* protected by rename_lock */
+static DEFINE_SPINLOCK(kernfs_idr_lock);	/* root->ino_idr */
 
 #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
 
@@ -533,7 +534,9 @@ void kernfs_put(struct kernfs_node *kn)
 		simple_xattrs_free(&kn->iattr->xattrs);
 	}
 	kfree(kn->iattr);
-	ida_simple_remove(&root->ino_ida, kn->ino);
+	spin_lock(&kernfs_idr_lock);
+	idr_remove(&root->ino_idr, kn->ino);
+	spin_unlock(&kernfs_idr_lock);
 	kmem_cache_free(kernfs_node_cache, kn);
 
 	kn = parent;
@@ -542,7 +545,7 @@ void kernfs_put(struct kernfs_node *kn)
 			goto repeat;
 	} else {
 		/* just released the root kn, free @root too */
-		ida_destroy(&root->ino_ida);
+		idr_destroy(&root->ino_idr);
 		kfree(root);
 	}
 }
@@ -630,7 +633,11 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
 	if (!kn)
 		goto err_out1;
 
-	ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL);
+	idr_preload(GFP_KERNEL);
+	spin_lock(&kernfs_idr_lock);
+	ret = idr_alloc(&root->ino_idr, kn, 1, 0, GFP_ATOMIC);
+	spin_unlock(&kernfs_idr_lock);
+	idr_preload_end();
 	if (ret < 0)
 		goto err_out2;
 	kn->ino = ret;
@@ -876,14 +883,14 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
 	if (!root)
 		return ERR_PTR(-ENOMEM);
 
-	ida_init(&root->ino_ida);
+	idr_init(&root->ino_idr);
 	INIT_LIST_HEAD(&root->supers);
 	atomic_set(&root->next_generation, 0);
 
 	kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO,
 			       KERNFS_DIR);
 	if (!kn) {
-		ida_destroy(&root->ino_ida);
+		idr_destroy(&root->ino_idr);
 		kfree(root);
 		return ERR_PTR(-ENOMEM);
 	}
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index c5f0fa7..61668d1 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -164,7 +164,7 @@ struct kernfs_root {
 	unsigned int		flags;	/* KERNFS_ROOT_* flags */
 
 	/* private fields, do not use outside kernfs proper */
-	struct ida		ino_ida;
+	struct idr		ino_idr;
 	struct kernfs_syscall_ops *syscall_ops;
 
 	/* list of kernfs_super_info of this root, protected by kernfs_mutex */
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 3/5] kernfs: add an API to get kernfs node from inode number
  2017-05-22 22:53 [PATCH 0/5] kernfs: add exportfs operations Shaohua Li
  2017-05-22 22:53 ` [PATCH 1/5] kernfs: implement i_generation Shaohua Li
  2017-05-22 22:53 ` [PATCH 2/5] kernfs: use idr instead of ida to manage inode number Shaohua Li
@ 2017-05-22 22:53 ` Shaohua Li
  2017-05-22 22:53 ` [PATCH 4/5] kernfs: don't set dentry->d_fsdata Shaohua Li
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 22+ messages in thread
From: Shaohua Li @ 2017-05-22 22:53 UTC (permalink / raw)
  To: linux-kernel; +Cc: tj, gregkh, viro, Kernel-team

Add an API to get kernfs node from inode number. We will need this to
implement exportfs operations.

To make the API lock free, kernfs node is freed in RCU context. And we
depend on kernfs_node count/ino number to filter stale kernfs nodes.

Signed-off-by: Shaohua Li <shli@fb.com>
---
 fs/kernfs/dir.c             | 35 +++++++++++++++++++++++++++++++++++
 fs/kernfs/kernfs-internal.h |  2 ++
 fs/kernfs/mount.c           |  4 +++-
 3 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 8e8545a..4c86e4c 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -643,6 +643,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
 	kn->ino = ret;
 	kn->generation = atomic_inc_return(&root->next_generation);
 
+	/* set ino first. Above atomic_inc_return has a barrier */
 	atomic_set(&kn->count, 1);
 	atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
 	RB_CLEAR_NODE(&kn->rb);
@@ -674,6 +675,40 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
 	return kn;
 }
 
+/*
+ * kernfs_get_node_by_ino - get kernfs_node from inode number
+ * @root: the kernfs root
+ * @ino: inode number
+ *
+ * RETURNS:
+ * NULL on failure. Return a kernfs node with reference counter incremented
+ */
+struct kernfs_node *kernfs_get_node_by_ino(struct kernfs_root *root,
+					   unsigned int ino)
+{
+	struct kernfs_node *kn;
+
+	rcu_read_lock();
+	kn = idr_find(&root->ino_idr, ino);
+	if (!kn)
+		goto out;
+	/* kernfs_put removes the ino after count is 0 */
+	if (!atomic_inc_not_zero(&kn->count)) {
+		kn = NULL;
+		goto out;
+	}
+	/* If this node is reused, __kernfs_new_node sets ino before count */
+	if (kn->ino != ino)
+		goto out;
+	rcu_read_unlock();
+
+	return kn;
+out:
+	rcu_read_unlock();
+	kernfs_put(kn);
+	return NULL;
+}
+
 /**
  *	kernfs_add_one - add kernfs_node to parent without warning
  *	@kn: kernfs_node to be added
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 2d5144a..3534cfe 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -98,6 +98,8 @@ int kernfs_add_one(struct kernfs_node *kn);
 struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
 				    const char *name, umode_t mode,
 				    unsigned flags);
+struct kernfs_node *kernfs_get_node_by_ino(struct kernfs_root *root,
+					   unsigned int ino);
 
 /*
  * file.c
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index d5b149a..343dfeb 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -332,5 +332,7 @@ void __init kernfs_init(void)
 {
 	kernfs_node_cache = kmem_cache_create("kernfs_node_cache",
 					      sizeof(struct kernfs_node),
-					      0, SLAB_PANIC, NULL);
+					      0,
+					      SLAB_PANIC | SLAB_TYPESAFE_BY_RCU,
+					      NULL);
 }
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 4/5] kernfs: don't set dentry->d_fsdata
  2017-05-22 22:53 [PATCH 0/5] kernfs: add exportfs operations Shaohua Li
                   ` (2 preceding siblings ...)
  2017-05-22 22:53 ` [PATCH 3/5] kernfs: add an API to get kernfs node from " Shaohua Li
@ 2017-05-22 22:53 ` Shaohua Li
  2017-05-23 18:37   ` Tejun Heo
  2017-05-22 22:53 ` [PATCH 5/5] kernfs: add exportfs operations Shaohua Li
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 22+ messages in thread
From: Shaohua Li @ 2017-05-22 22:53 UTC (permalink / raw)
  To: linux-kernel; +Cc: tj, gregkh, viro, Kernel-team

When working on adding exportfs operations in kernfs, I found it's hard
to initialize dentry->d_fsdata in the exportfs operations. Looks there
is no way to do it without race condition. Look at the kernfs code
closely, there is no point to set dentry->d_fsdata. inode->i_private
already points to kernfs_node, and we can get inode from a dentry. So
this patch just delete the d_fsdata usage.

Signed-off-by: Shaohua Li <shli@fb.com>
---
 fs/kernfs/dir.c             | 22 +++++++---------------
 fs/kernfs/file.c            |  6 +++---
 fs/kernfs/inode.c           |  6 +++---
 fs/kernfs/kernfs-internal.h |  2 ++
 fs/kernfs/mount.c           |  8 ++------
 fs/kernfs/symlink.c         |  2 +-
 6 files changed, 18 insertions(+), 28 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 4c86e4c..d5721c8 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -562,7 +562,7 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
 	if (d_really_is_negative(dentry))
 		goto out_bad_unlocked;
 
-	kn = dentry->d_fsdata;
+	kn = kernfs_dentry_node(dentry);
 	mutex_lock(&kernfs_mutex);
 
 	/* The kernfs node has been deactivated */
@@ -570,7 +570,7 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
 		goto out_bad;
 
 	/* The kernfs node has been moved? */
-	if (dentry->d_parent->d_fsdata != kn->parent)
+	if (kernfs_dentry_node(dentry->d_parent) != kn->parent)
 		goto out_bad;
 
 	/* The kernfs node has been renamed */
@@ -590,14 +590,8 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
 	return 0;
 }
 
-static void kernfs_dop_release(struct dentry *dentry)
-{
-	kernfs_put(dentry->d_fsdata);
-}
-
 const struct dentry_operations kernfs_dops = {
 	.d_revalidate	= kernfs_dop_revalidate,
-	.d_release	= kernfs_dop_release,
 };
 
 /**
@@ -614,7 +608,7 @@ const struct dentry_operations kernfs_dops = {
 struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
 {
 	if (dentry->d_sb->s_op == &kernfs_sops)
-		return dentry->d_fsdata;
+		return kernfs_dentry_node(dentry);
 	return NULL;
 }
 
@@ -1028,7 +1022,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
 					unsigned int flags)
 {
 	struct dentry *ret;
-	struct kernfs_node *parent = dentry->d_parent->d_fsdata;
+	struct kernfs_node *parent = kernfs_dentry_node(dentry->d_parent);
 	struct kernfs_node *kn;
 	struct inode *inode;
 	const void *ns = NULL;
@@ -1045,8 +1039,6 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
 		ret = NULL;
 		goto out_unlock;
 	}
-	kernfs_get(kn);
-	dentry->d_fsdata = kn;
 
 	/* attach dentry and inode */
 	inode = kernfs_get_inode(dir->i_sb, kn);
@@ -1083,7 +1075,7 @@ static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry,
 
 static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
 {
-	struct kernfs_node *kn  = dentry->d_fsdata;
+	struct kernfs_node *kn  = kernfs_dentry_node(dentry);
 	struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
 	int ret;
 
@@ -1103,7 +1095,7 @@ static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
 			     struct inode *new_dir, struct dentry *new_dentry,
 			     unsigned int flags)
 {
-	struct kernfs_node *kn  = old_dentry->d_fsdata;
+	struct kernfs_node *kn = kernfs_dentry_node(old_dentry);
 	struct kernfs_node *new_parent = new_dir->i_private;
 	struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
 	int ret;
@@ -1616,7 +1608,7 @@ static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
 static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
 {
 	struct dentry *dentry = file->f_path.dentry;
-	struct kernfs_node *parent = dentry->d_fsdata;
+	struct kernfs_node *parent = kernfs_dentry_node(dentry);
 	struct kernfs_node *pos = file->private_data;
 	const void *ns = NULL;
 
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index ac2dfe0..4a795ba 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -616,7 +616,7 @@ static void kernfs_put_open_node(struct kernfs_node *kn,
 
 static int kernfs_fop_open(struct inode *inode, struct file *file)
 {
-	struct kernfs_node *kn = file->f_path.dentry->d_fsdata;
+	struct kernfs_node *kn = kernfs_dentry_node(file->f_path.dentry);
 	struct kernfs_root *root = kernfs_root(kn);
 	const struct kernfs_ops *ops;
 	struct kernfs_open_file *of;
@@ -768,7 +768,7 @@ static void kernfs_release_file(struct kernfs_node *kn,
 
 static int kernfs_fop_release(struct inode *inode, struct file *filp)
 {
-	struct kernfs_node *kn = filp->f_path.dentry->d_fsdata;
+	struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry);
 	struct kernfs_open_file *of = kernfs_of(filp);
 
 	if (kn->flags & KERNFS_HAS_RELEASE) {
@@ -835,7 +835,7 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
 static unsigned int kernfs_fop_poll(struct file *filp, poll_table *wait)
 {
 	struct kernfs_open_file *of = kernfs_of(filp);
-	struct kernfs_node *kn = filp->f_path.dentry->d_fsdata;
+	struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry);
 	struct kernfs_open_node *on = kn->attr.open;
 
 	if (!kernfs_get_active(kn))
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index 79cdae4..4c8b510 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -112,7 +112,7 @@ int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
 int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr)
 {
 	struct inode *inode = d_inode(dentry);
-	struct kernfs_node *kn = dentry->d_fsdata;
+	struct kernfs_node *kn = inode->i_private;
 	int error;
 
 	if (!kn)
@@ -154,7 +154,7 @@ static int kernfs_node_setsecdata(struct kernfs_iattrs *attrs, void **secdata,
 
 ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size)
 {
-	struct kernfs_node *kn = dentry->d_fsdata;
+	struct kernfs_node *kn = kernfs_dentry_node(dentry);
 	struct kernfs_iattrs *attrs;
 
 	attrs = kernfs_iattrs(kn);
@@ -203,8 +203,8 @@ static void kernfs_refresh_inode(struct kernfs_node *kn, struct inode *inode)
 int kernfs_iop_getattr(const struct path *path, struct kstat *stat,
 		       u32 request_mask, unsigned int query_flags)
 {
-	struct kernfs_node *kn = path->dentry->d_fsdata;
 	struct inode *inode = d_inode(path->dentry);
+	struct kernfs_node *kn = inode->i_private;
 
 	mutex_lock(&kernfs_mutex);
 	kernfs_refresh_inode(kn, inode);
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 3534cfe..82e11fa 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -70,6 +70,8 @@ struct kernfs_super_info {
 };
 #define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info))
 
+#define kernfs_dentry_node(d)  ((d_inode(d))->i_private)
+
 extern const struct super_operations kernfs_sops;
 extern struct kmem_cache *kernfs_node_cache;
 
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 343dfeb..462a40c 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -33,7 +33,7 @@ static int kernfs_sop_remount_fs(struct super_block *sb, int *flags, char *data)
 
 static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry)
 {
-	struct kernfs_root *root = kernfs_root(dentry->d_fsdata);
+	struct kernfs_root *root = kernfs_root(kernfs_dentry_node(dentry));
 	struct kernfs_syscall_ops *scops = root->syscall_ops;
 
 	if (scops && scops->show_options)
@@ -43,7 +43,7 @@ static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry)
 
 static int kernfs_sop_show_path(struct seq_file *sf, struct dentry *dentry)
 {
-	struct kernfs_node *node = dentry->d_fsdata;
+	struct kernfs_node *node = kernfs_dentry_node(dentry);
 	struct kernfs_root *root = kernfs_root(node);
 	struct kernfs_syscall_ops *scops = root->syscall_ops;
 
@@ -176,8 +176,6 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
 		pr_debug("%s: could not get root dentry!\n", __func__);
 		return -ENOMEM;
 	}
-	kernfs_get(info->root->kn);
-	root->d_fsdata = info->root->kn;
 	sb->s_root = root;
 	sb->s_d_op = &kernfs_dops;
 	return 0;
@@ -283,7 +281,6 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
 void kernfs_kill_sb(struct super_block *sb)
 {
 	struct kernfs_super_info *info = kernfs_info(sb);
-	struct kernfs_node *root_kn = sb->s_root->d_fsdata;
 
 	mutex_lock(&kernfs_mutex);
 	list_del(&info->node);
@@ -295,7 +292,6 @@ void kernfs_kill_sb(struct super_block *sb)
 	 */
 	kill_anon_super(sb);
 	kfree(info);
-	kernfs_put(root_kn);
 }
 
 /**
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
index 1684af4..efcfd7a 100644
--- a/fs/kernfs/symlink.c
+++ b/fs/kernfs/symlink.c
@@ -100,7 +100,7 @@ static int kernfs_get_target_path(struct kernfs_node *parent,
 
 static int kernfs_getlink(struct dentry *dentry, char *path)
 {
-	struct kernfs_node *kn = dentry->d_fsdata;
+	struct kernfs_node *kn = kernfs_dentry_node(dentry);
 	struct kernfs_node *parent = kn->parent;
 	struct kernfs_node *target = kn->symlink.target_kn;
 	int error;
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 5/5] kernfs: add exportfs operations
  2017-05-22 22:53 [PATCH 0/5] kernfs: add exportfs operations Shaohua Li
                   ` (3 preceding siblings ...)
  2017-05-22 22:53 ` [PATCH 4/5] kernfs: don't set dentry->d_fsdata Shaohua Li
@ 2017-05-22 22:53 ` Shaohua Li
  2017-05-23  7:40   ` Christoph Hellwig
  2017-05-23 18:59   ` Tejun Heo
  2017-05-23  7:39 ` [PATCH 0/5] " Christoph Hellwig
  2017-05-23 19:06 ` Tejun Heo
  6 siblings, 2 replies; 22+ messages in thread
From: Shaohua Li @ 2017-05-22 22:53 UTC (permalink / raw)
  To: linux-kernel; +Cc: tj, gregkh, viro, Kernel-team

Now we have the facilities to implement exportfs operations.

Signed-off-by: Shaohua Li <shli@fb.com>
---
 fs/kernfs/mount.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 462a40c..5af88cc 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -16,6 +16,7 @@
 #include <linux/pagemap.h>
 #include <linux/namei.h>
 #include <linux/seq_file.h>
+#include <linux/exportfs.h>
 
 #include "kernfs-internal.h"
 
@@ -64,6 +65,59 @@ const struct super_operations kernfs_sops = {
 	.show_path	= kernfs_sop_show_path,
 };
 
+static struct inode *kernfs_nfs_get_inode(struct super_block *sb,
+		u64 ino, u32 generation)
+{
+	struct kernfs_super_info *info = kernfs_info(sb);
+	struct inode *inode;
+	struct kernfs_node *kn;
+
+	if (ino == 0)
+		return ERR_PTR(-ESTALE);
+
+	kn = kernfs_get_node_by_ino(info->root, ino);
+	if (!kn)
+		return ERR_PTR(-ESTALE);
+	inode = kernfs_get_inode(sb, kn);
+	kernfs_put(kn);
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+
+	if (generation && inode->i_generation != generation) {
+		/* we didn't find the right inode.. */
+		iput(inode);
+		return ERR_PTR(-ESTALE);
+	}
+	return inode;
+}
+
+static struct dentry *kernfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+		int fh_len, int fh_type)
+{
+	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
+				    kernfs_nfs_get_inode);
+}
+
+static struct dentry *kernfs_fh_to_parent(struct super_block *sb, struct fid *fid,
+		int fh_len, int fh_type)
+{
+	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
+				    kernfs_nfs_get_inode);
+}
+
+static struct dentry *kernfs_get_parent_dentry(struct dentry *child)
+{
+	struct kernfs_node *kn = kernfs_dentry_node(child);
+
+	return d_obtain_alias(kernfs_get_inode(child->d_sb, kn->parent));
+}
+
+static const struct export_operations kernfs_export_ops = {
+	.fh_to_dentry = kernfs_fh_to_dentry,
+	.fh_to_parent = kernfs_fh_to_parent,
+	.get_parent = kernfs_get_parent_dentry,
+};
+
 /**
  * kernfs_root_from_sb - determine kernfs_root associated with a super_block
  * @sb: the super_block in question
@@ -159,6 +213,7 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
 	sb->s_magic = magic;
 	sb->s_op = &kernfs_sops;
 	sb->s_xattr = kernfs_xattr_handlers;
+	sb->s_export_op = &kernfs_export_ops;
 	sb->s_time_gran = 1;
 
 	/* get root inode, initialize and unlock it */
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/5] kernfs: add exportfs operations
  2017-05-22 22:53 [PATCH 0/5] kernfs: add exportfs operations Shaohua Li
                   ` (4 preceding siblings ...)
  2017-05-22 22:53 ` [PATCH 5/5] kernfs: add exportfs operations Shaohua Li
@ 2017-05-23  7:39 ` Christoph Hellwig
  2017-05-23 15:13   ` Shaohua Li
  2017-05-23 19:06 ` Tejun Heo
  6 siblings, 1 reply; 22+ messages in thread
From: Christoph Hellwig @ 2017-05-23  7:39 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-kernel, tj, gregkh, viro, Kernel-team

On Mon, May 22, 2017 at 03:53:04PM -0700, Shaohua Li wrote:
> Hi,
> 
> The goal isn't to export kernfs to NFS. The intention is to make tracing cgroup
> aware. To do this, tracing will record an id for cgroup and use the id to find
> cgroup name later. The best id is the cgroup directory inode number. Further to
> filter out stale cgroup directory, fhandle is the best to identify a cgroup. So
> this is what this series try to do.

Eww.  Even if you need to maintain i_generation for that please don't
add the full export_operations.  People will just get stupid ideas based
on that.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 5/5] kernfs: add exportfs operations
  2017-05-22 22:53 ` [PATCH 5/5] kernfs: add exportfs operations Shaohua Li
@ 2017-05-23  7:40   ` Christoph Hellwig
  2017-05-23 18:57     ` Tejun Heo
  2017-05-23 18:59   ` Tejun Heo
  1 sibling, 1 reply; 22+ messages in thread
From: Christoph Hellwig @ 2017-05-23  7:40 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-kernel, tj, gregkh, viro, Kernel-team

On Mon, May 22, 2017 at 03:53:09PM -0700, Shaohua Li wrote:
> Now we have the facilities to implement exportfs operations.

But do we have a use case?  I'd rather avoid this..

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/5] kernfs: implement i_generation
  2017-05-22 22:53 ` [PATCH 1/5] kernfs: implement i_generation Shaohua Li
@ 2017-05-23  7:41   ` Christoph Hellwig
  2017-05-23 15:09     ` Shaohua Li
  0 siblings, 1 reply; 22+ messages in thread
From: Christoph Hellwig @ 2017-05-23  7:41 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-kernel, tj, gregkh, viro, Kernel-team

On Mon, May 22, 2017 at 03:53:05PM -0700, Shaohua Li wrote:
> Set i_generation for kernfs inod. This is required to implement exportfs
> operations.
> 
> Signed-off-by: Shaohua Li <shli@fb.com>
> ---
>  fs/kernfs/dir.c        | 2 ++
>  fs/kernfs/inode.c      | 1 +
>  include/linux/kernfs.h | 2 ++
>  3 files changed, 5 insertions(+)
> 
> diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> index db5900aaa..09d093e 100644
> --- a/fs/kernfs/dir.c
> +++ b/fs/kernfs/dir.c
> @@ -634,6 +634,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
>  	if (ret < 0)
>  		goto err_out2;
>  	kn->ino = ret;
> +	kn->generation = atomic_inc_return(&root->next_generation);

i_generation is only supposed to be valid on a per-inode basis, so this
global counter seems really odd.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/5] kernfs: implement i_generation
  2017-05-23  7:41   ` Christoph Hellwig
@ 2017-05-23 15:09     ` Shaohua Li
  2017-05-23 19:20       ` Tejun Heo
  2017-05-24 17:40       ` Christoph Hellwig
  0 siblings, 2 replies; 22+ messages in thread
From: Shaohua Li @ 2017-05-23 15:09 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-kernel, tj, gregkh, viro, Kernel-team

On Tue, May 23, 2017 at 12:41:12AM -0700, Christoph Hellwig wrote:
> On Mon, May 22, 2017 at 03:53:05PM -0700, Shaohua Li wrote:
> > Set i_generation for kernfs inod. This is required to implement exportfs
> > operations.
> > 
> > Signed-off-by: Shaohua Li <shli@fb.com>
> > ---
> >  fs/kernfs/dir.c        | 2 ++
> >  fs/kernfs/inode.c      | 1 +
> >  include/linux/kernfs.h | 2 ++
> >  3 files changed, 5 insertions(+)
> > 
> > diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> > index db5900aaa..09d093e 100644
> > --- a/fs/kernfs/dir.c
> > +++ b/fs/kernfs/dir.c
> > @@ -634,6 +634,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
> >  	if (ret < 0)
> >  		goto err_out2;
> >  	kn->ino = ret;
> > +	kn->generation = atomic_inc_return(&root->next_generation);
> 
> i_generation is only supposed to be valid on a per-inode basis, so this
> global counter seems really odd.

What's the difference between per-inode or per-super? The i_generation doesn't
need to be consecutive for an inode. I checked other fs, a lot of filesystems
implement i_generation in this way, for example, f2fs, ext4.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/5] kernfs: add exportfs operations
  2017-05-23  7:39 ` [PATCH 0/5] " Christoph Hellwig
@ 2017-05-23 15:13   ` Shaohua Li
  0 siblings, 0 replies; 22+ messages in thread
From: Shaohua Li @ 2017-05-23 15:13 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-kernel, tj, gregkh, viro, Kernel-team

On Tue, May 23, 2017 at 12:39:41AM -0700, Christoph Hellwig wrote:
> On Mon, May 22, 2017 at 03:53:04PM -0700, Shaohua Li wrote:
> > Hi,
> > 
> > The goal isn't to export kernfs to NFS. The intention is to make tracing cgroup
> > aware. To do this, tracing will record an id for cgroup and use the id to find
> > cgroup name later. The best id is the cgroup directory inode number. Further to
> > filter out stale cgroup directory, fhandle is the best to identify a cgroup. So
> > this is what this series try to do.
> 
> Eww.  Even if you need to maintain i_generation for that please don't
> add the full export_operations.  People will just get stupid ideas based
> on that.

Hi Christoph,
what did you mean 'don't add the full export_operations'? I'd love to change
this if there is better identification for a cgroup other than fhandle.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 4/5] kernfs: don't set dentry->d_fsdata
  2017-05-22 22:53 ` [PATCH 4/5] kernfs: don't set dentry->d_fsdata Shaohua Li
@ 2017-05-23 18:37   ` Tejun Heo
  0 siblings, 0 replies; 22+ messages in thread
From: Tejun Heo @ 2017-05-23 18:37 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-kernel, gregkh, viro, Kernel-team

Hello,

On Mon, May 22, 2017 at 03:53:08PM -0700, Shaohua Li wrote:
> +#define kernfs_dentry_node(d)  ((d_inode(d))->i_private)

This would oops on a negative dentry.  We need to test d_inode(d) for
NULL first.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 5/5] kernfs: add exportfs operations
  2017-05-23  7:40   ` Christoph Hellwig
@ 2017-05-23 18:57     ` Tejun Heo
  2017-05-24 17:38       ` Christoph Hellwig
  0 siblings, 1 reply; 22+ messages in thread
From: Tejun Heo @ 2017-05-23 18:57 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Shaohua Li, linux-kernel, gregkh, viro, Kernel-team

Hello, Christoph.

On Tue, May 23, 2017 at 12:40:19AM -0700, Christoph Hellwig wrote:
> On Mon, May 22, 2017 at 03:53:09PM -0700, Shaohua Li wrote:
> > Now we have the facilities to implement exportfs operations.
> 
> But do we have a use case?  I'd rather avoid this..

Yeah, this is one of the repeatedly requested features - a cgroup id
which can be looked up in a scalable way.  We probably should have
added this earlier too as we already have places where we're passing
in full cgroup path into the kernel (ipt_cgroup match).

IIUC, Shaohua is adding it so that block tracing can be made aware of
cgroups but something like this is necessary whenever we try to refer
to a cgroup in a race-free / scalable way.

If you have any other ideas, I'm all ears.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 5/5] kernfs: add exportfs operations
  2017-05-22 22:53 ` [PATCH 5/5] kernfs: add exportfs operations Shaohua Li
  2017-05-23  7:40   ` Christoph Hellwig
@ 2017-05-23 18:59   ` Tejun Heo
  1 sibling, 0 replies; 22+ messages in thread
From: Tejun Heo @ 2017-05-23 18:59 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-kernel, gregkh, viro, Kernel-team

On Mon, May 22, 2017 at 03:53:09PM -0700, Shaohua Li wrote:
> +static struct inode *kernfs_nfs_get_inode(struct super_block *sb,
> +		u64 ino, u32 generation)

Heh, do we have to name this kernfs_nfs_get_inode()?  I think it'd be
nice to give it a different name and add a comment explaining the goal
of the interface.

Also, I think it probably would be better to make this an optional
feature of kernfs.  sysfs has no need to provide something like this
yet.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/5] kernfs: add exportfs operations
  2017-05-22 22:53 [PATCH 0/5] kernfs: add exportfs operations Shaohua Li
                   ` (5 preceding siblings ...)
  2017-05-23  7:39 ` [PATCH 0/5] " Christoph Hellwig
@ 2017-05-23 19:06 ` Tejun Heo
  6 siblings, 0 replies; 22+ messages in thread
From: Tejun Heo @ 2017-05-23 19:06 UTC (permalink / raw)
  To: Shaohua Li; +Cc: linux-kernel, gregkh, viro, Kernel-team

Hello,

On Mon, May 22, 2017 at 03:53:04PM -0700, Shaohua Li wrote:
> The goal isn't to export kernfs to NFS. The intention is to make tracing cgroup
> aware. To do this, tracing will record an id for cgroup and use the id to find
> cgroup name later. The best id is the cgroup directory inode number. Further to
> filter out stale cgroup directory, fhandle is the best to identify a cgroup. So
> this is what this series try to do.

Generally looks good to me.  Had some review points in other replies.
There is a related issue which can be a nice follow-up - replacing
cgrp->id with kernfs's fid.  cgrp->id is allocated on its own idr and
its only use is to accelerate testing for ancestry and we can easily
replace that with the unique file id.

Thanks a lot!

-- 
tejun

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/5] kernfs: implement i_generation
  2017-05-23 15:09     ` Shaohua Li
@ 2017-05-23 19:20       ` Tejun Heo
  2017-05-24 17:40       ` Christoph Hellwig
  1 sibling, 0 replies; 22+ messages in thread
From: Tejun Heo @ 2017-05-23 19:20 UTC (permalink / raw)
  To: Shaohua Li; +Cc: Christoph Hellwig, linux-kernel, gregkh, viro, Kernel-team

Hello,

On Tue, May 23, 2017 at 08:09:48AM -0700, Shaohua Li wrote:
> > i_generation is only supposed to be valid on a per-inode basis, so this
> > global counter seems really odd.
> 
> What's the difference between per-inode or per-super? The i_generation doesn't
> need to be consecutive for an inode. I checked other fs, a lot of filesystems
> implement i_generation in this way, for example, f2fs, ext4.

One worry is that it is reasonably possible to wrap on the generation
number.  Some setups go through cgroups really fast and wrapping 2^32
on the number of files ever created isn't that difficult over time.
This actually showing up as a malfunction would be very low probabilty
but is still nasty.

It'd be nice to give out unique 64bit id for each kn but then we can't
use idr.  rbtree or extensible hash should work, right?

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 5/5] kernfs: add exportfs operations
  2017-05-23 18:57     ` Tejun Heo
@ 2017-05-24 17:38       ` Christoph Hellwig
  2017-05-24 17:39         ` Tejun Heo
  0 siblings, 1 reply; 22+ messages in thread
From: Christoph Hellwig @ 2017-05-24 17:38 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Christoph Hellwig, Shaohua Li, linux-kernel, gregkh, viro, Kernel-team

On Tue, May 23, 2017 at 02:57:11PM -0400, Tejun Heo wrote:
> 
> Yeah, this is one of the repeatedly requested features - a cgroup id
> which can be looked up in a scalable way.  We probably should have
> added this earlier too as we already have places where we're passing
> in full cgroup path into the kernel (ipt_cgroup match).

I still have no idea how this ties into export ops.  Currently I don't
see any tracing code using export operations, so maybe a big part of
the series is missing/

> IIUC, Shaohua is adding it so that block tracing can be made aware of
> cgroups but something like this is necessary whenever we try to refer
> to a cgroup in a race-free / scalable way.

You can already get i_ino using statx, and i_generation using
FS_IOC_GETVERSION, so I'm a little lost on why you need the full
exportfs infrastructure.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 5/5] kernfs: add exportfs operations
  2017-05-24 17:38       ` Christoph Hellwig
@ 2017-05-24 17:39         ` Tejun Heo
  2017-05-24 17:41           ` Christoph Hellwig
  0 siblings, 1 reply; 22+ messages in thread
From: Tejun Heo @ 2017-05-24 17:39 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Shaohua Li, linux-kernel, gregkh, viro, Kernel-team

Hello, Christoph.

On Wed, May 24, 2017 at 10:38:22AM -0700, Christoph Hellwig wrote:
> On Tue, May 23, 2017 at 02:57:11PM -0400, Tejun Heo wrote:
> > 
> > Yeah, this is one of the repeatedly requested features - a cgroup id
> > which can be looked up in a scalable way.  We probably should have
> > added this earlier too as we already have places where we're passing
> > in full cgroup path into the kernel (ipt_cgroup match).
> 
> I still have no idea how this ties into export ops.  Currently I don't
> see any tracing code using export operations, so maybe a big part of
> the series is missing/

Oh yeah, it's not adding any users yet.

> > IIUC, Shaohua is adding it so that block tracing can be made aware of
> > cgroups but something like this is necessary whenever we try to refer
> > to a cgroup in a race-free / scalable way.
> 
> You can already get i_ino using statx, and i_generation using
> FS_IOC_GETVERSION, so I'm a little lost on why you need the full
> exportfs infrastructure.

But how do you map that back to the cgroup without scanning the cgroup
hierarchy?

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 1/5] kernfs: implement i_generation
  2017-05-23 15:09     ` Shaohua Li
  2017-05-23 19:20       ` Tejun Heo
@ 2017-05-24 17:40       ` Christoph Hellwig
  1 sibling, 0 replies; 22+ messages in thread
From: Christoph Hellwig @ 2017-05-24 17:40 UTC (permalink / raw)
  To: Shaohua Li; +Cc: Christoph Hellwig, linux-kernel, tj, gregkh, viro, Kernel-team

On Tue, May 23, 2017 at 08:09:48AM -0700, Shaohua Li wrote:
> > > diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> > > index db5900aaa..09d093e 100644
> > > --- a/fs/kernfs/dir.c
> > > +++ b/fs/kernfs/dir.c
> > > @@ -634,6 +634,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
> > >  	if (ret < 0)
> > >  		goto err_out2;
> > >  	kn->ino = ret;
> > > +	kn->generation = atomic_inc_return(&root->next_generation);
> > 
> > i_generation is only supposed to be valid on a per-inode basis, so this
> > global counter seems really odd.
> 
> What's the difference between per-inode or per-super? The i_generation doesn't
> need to be consecutive for an inode. I checked other fs, a lot of filesystems
> implement i_generation in this way, for example, f2fs, ext4.

of course per-sb is a valid implementation, but it seems like
introducing an easily avoidable bottleneck by serializing on a per-sb
cacheline for each file creation.  But then again it seems like kernfs
already has various other per-sb contention points, so maybe it's not
an issue in the end.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 5/5] kernfs: add exportfs operations
  2017-05-24 17:39         ` Tejun Heo
@ 2017-05-24 17:41           ` Christoph Hellwig
  2017-05-24 17:46             ` Tejun Heo
  0 siblings, 1 reply; 22+ messages in thread
From: Christoph Hellwig @ 2017-05-24 17:41 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Christoph Hellwig, Shaohua Li, linux-kernel, gregkh, viro, Kernel-team

On Wed, May 24, 2017 at 01:39:39PM -0400, Tejun Heo wrote:
> > You can already get i_ino using statx, and i_generation using
> > FS_IOC_GETVERSION, so I'm a little lost on why you need the full
> > exportfs infrastructure.
> 
> But how do you map that back to the cgroup without scanning the cgroup
> hierarchy?

I'm totally lost on why you would do that.  So maybe you just need
to send the full patch so that reviewers get the full picture.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 5/5] kernfs: add exportfs operations
  2017-05-24 17:41           ` Christoph Hellwig
@ 2017-05-24 17:46             ` Tejun Heo
  2017-05-24 18:01               ` Shaohua Li
  0 siblings, 1 reply; 22+ messages in thread
From: Tejun Heo @ 2017-05-24 17:46 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Shaohua Li, linux-kernel, gregkh, viro, Kernel-team

Hello, Christoph.

On Wed, May 24, 2017 at 10:41:38AM -0700, Christoph Hellwig wrote:
> > But how do you map that back to the cgroup without scanning the cgroup
> > hierarchy?
> 
> I'm totally lost on why you would do that.  So maybe you just need
> to send the full patch so that reviewers get the full picture.

Here's a simple scenario.  Let's say blktrace now exposes the cgroup
inode and generation numbers per trace.  Userland tool now wants to
show that in a human readable format but it can only map back the
inode and generation numbers to the path by scanning the cgroup tree.
So, the goal is having a token which is not path which uniquely
identifies a cgroup and the ability to map that back to cgroup path.
We can add a dedicated interface to cgroup root, for example, and
allow querying by echoing inode and generation numbers into it but
that's kinda clumsy.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 5/5] kernfs: add exportfs operations
  2017-05-24 17:46             ` Tejun Heo
@ 2017-05-24 18:01               ` Shaohua Li
  0 siblings, 0 replies; 22+ messages in thread
From: Shaohua Li @ 2017-05-24 18:01 UTC (permalink / raw)
  To: Tejun Heo; +Cc: Christoph Hellwig, linux-kernel, gregkh, viro, Kernel-team

On Wed, May 24, 2017 at 01:46:04PM -0400, Tejun Heo wrote:
> Hello, Christoph.
> 
> On Wed, May 24, 2017 at 10:41:38AM -0700, Christoph Hellwig wrote:
> > > But how do you map that back to the cgroup without scanning the cgroup
> > > hierarchy?
> > 
> > I'm totally lost on why you would do that.  So maybe you just need
> > to send the full patch so that reviewers get the full picture.
> 
> Here's a simple scenario.  Let's say blktrace now exposes the cgroup
> inode and generation numbers per trace.  Userland tool now wants to
> show that in a human readable format but it can only map back the
> inode and generation numbers to the path by scanning the cgroup tree.
> So, the goal is having a token which is not path which uniquely
> identifies a cgroup and the ability to map that back to cgroup path.
> We can add a dedicated interface to cgroup root, for example, and
> allow querying by echoing inode and generation numbers into it but
> that's kinda clumsy.

blktrace is the target I'm working on, because we can't use pid to determine
where a bio comes from, so we show an ID. Userspace tool can use the ID to find
the cgroup path. Or an BPF program can use the ID to filter trace. Regardless
how we implement the ID, it should be something like an inode number and
generation number. And since we already had existing syscall to map between
fhandle and path, I choose the exportfs operations. I'll post a full patch set
later.

Thanks,
Shaohua

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2017-05-24 18:01 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-22 22:53 [PATCH 0/5] kernfs: add exportfs operations Shaohua Li
2017-05-22 22:53 ` [PATCH 1/5] kernfs: implement i_generation Shaohua Li
2017-05-23  7:41   ` Christoph Hellwig
2017-05-23 15:09     ` Shaohua Li
2017-05-23 19:20       ` Tejun Heo
2017-05-24 17:40       ` Christoph Hellwig
2017-05-22 22:53 ` [PATCH 2/5] kernfs: use idr instead of ida to manage inode number Shaohua Li
2017-05-22 22:53 ` [PATCH 3/5] kernfs: add an API to get kernfs node from " Shaohua Li
2017-05-22 22:53 ` [PATCH 4/5] kernfs: don't set dentry->d_fsdata Shaohua Li
2017-05-23 18:37   ` Tejun Heo
2017-05-22 22:53 ` [PATCH 5/5] kernfs: add exportfs operations Shaohua Li
2017-05-23  7:40   ` Christoph Hellwig
2017-05-23 18:57     ` Tejun Heo
2017-05-24 17:38       ` Christoph Hellwig
2017-05-24 17:39         ` Tejun Heo
2017-05-24 17:41           ` Christoph Hellwig
2017-05-24 17:46             ` Tejun Heo
2017-05-24 18:01               ` Shaohua Li
2017-05-23 18:59   ` Tejun Heo
2017-05-23  7:39 ` [PATCH 0/5] " Christoph Hellwig
2017-05-23 15:13   ` Shaohua Li
2017-05-23 19:06 ` Tejun Heo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).