All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yi Tao <escape@linux.alibaba.com>
To: gregkh@linuxfoundation.org, tj@kernel.org,
	lizefan.x@bytedance.com, hannes@cmpxchg.org, mcgrof@kernel.org,
	keescook@chromium.org, yzaikin@google.com
Cc: linux-kernel@vger.kernel.org, cgroups@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, shanpeic@linux.alibaba.com
Subject: [RFC PATCH 1/2] add pinned flags for kernfs node
Date: Wed,  8 Sep 2021 20:15:12 +0800	[thread overview]
Message-ID: <e753e449240bfc43fcb7aa26dca196e2f51e0836.1631102579.git.escape@linux.alibaba.com> (raw)
In-Reply-To: <cover.1631102579.git.escape@linux.alibaba.com>
In-Reply-To: <cover.1631102579.git.escape@linux.alibaba.com>

This patch is preparing for the implementation of cgroup pool. If a
kernfs node is set to pinned. the data of this node will no longer be
protected by kernfs internally. When it performs the following actions,
the area protected by kernfs_rwsem will be protected by the specific
spinlock:
	1.rename this node
	2.remove this node
	3.create child node

Suggested-by: Shanpei Chen <shanpeic@linux.alibaba.com>
Signed-off-by: Yi Tao <escape@linux.alibaba.com>
---
 fs/kernfs/dir.c        | 74 ++++++++++++++++++++++++++++++++++++--------------
 include/linux/kernfs.h | 14 ++++++++++
 2 files changed, 68 insertions(+), 20 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index ba581429bf7b..68b05b5bc1a2 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -26,7 +26,6 @@
 
 static bool kernfs_active(struct kernfs_node *kn)
 {
-	lockdep_assert_held(&kernfs_rwsem);
 	return atomic_read(&kn->active) >= 0;
 }
 
@@ -461,10 +460,9 @@ static void kernfs_drain(struct kernfs_node *kn)
 {
 	struct kernfs_root *root = kernfs_root(kn);
 
-	lockdep_assert_held_write(&kernfs_rwsem);
 	WARN_ON_ONCE(kernfs_active(kn));
 
-	up_write(&kernfs_rwsem);
+	kernfs_unlock(kn);
 
 	if (kernfs_lockdep(kn)) {
 		rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
@@ -483,7 +481,7 @@ static void kernfs_drain(struct kernfs_node *kn)
 
 	kernfs_drain_open_files(kn);
 
-	down_write(&kernfs_rwsem);
+	kernfs_lock(kn);
 }
 
 /**
@@ -722,7 +720,7 @@ int kernfs_add_one(struct kernfs_node *kn)
 	bool has_ns;
 	int ret;
 
-	down_write(&kernfs_rwsem);
+	kernfs_lock(parent);
 
 	ret = -EINVAL;
 	has_ns = kernfs_ns_enabled(parent);
@@ -753,7 +751,7 @@ int kernfs_add_one(struct kernfs_node *kn)
 		ps_iattr->ia_mtime = ps_iattr->ia_ctime;
 	}
 
-	up_write(&kernfs_rwsem);
+	kernfs_unlock(parent);
 
 	/*
 	 * Activate the new node unless CREATE_DEACTIVATED is requested.
@@ -767,7 +765,7 @@ int kernfs_add_one(struct kernfs_node *kn)
 	return 0;
 
 out_unlock:
-	up_write(&kernfs_rwsem);
+	kernfs_unlock(parent);
 	return ret;
 }
 
@@ -788,8 +786,6 @@ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
 	bool has_ns = kernfs_ns_enabled(parent);
 	unsigned int hash;
 
-	lockdep_assert_held(&kernfs_rwsem);
-
 	if (has_ns != (bool)ns) {
 		WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
 		     has_ns ? "required" : "invalid", parent->name, name);
@@ -1242,8 +1238,6 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
 {
 	struct rb_node *rbn;
 
-	lockdep_assert_held_write(&kernfs_rwsem);
-
 	/* if first iteration, visit leftmost descendant which may be root */
 	if (!pos)
 		return kernfs_leftmost_descendant(root);
@@ -1299,8 +1293,6 @@ static void __kernfs_remove(struct kernfs_node *kn)
 {
 	struct kernfs_node *pos;
 
-	lockdep_assert_held_write(&kernfs_rwsem);
-
 	/*
 	 * Short-circuit if non-root @kn has already finished removal.
 	 * This is for kernfs_remove_self() which plays with active ref
@@ -1369,9 +1361,9 @@ static void __kernfs_remove(struct kernfs_node *kn)
  */
 void kernfs_remove(struct kernfs_node *kn)
 {
-	down_write(&kernfs_rwsem);
+	kernfs_lock(kn);
 	__kernfs_remove(kn);
-	up_write(&kernfs_rwsem);
+	kernfs_unlock(kn);
 }
 
 /**
@@ -1525,13 +1517,13 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
 		return -ENOENT;
 	}
 
-	down_write(&kernfs_rwsem);
+	kernfs_lock(parent);
 
 	kn = kernfs_find_ns(parent, name, ns);
 	if (kn)
 		__kernfs_remove(kn);
 
-	up_write(&kernfs_rwsem);
+	kernfs_unlock(parent);
 
 	if (kn)
 		return 0;
@@ -1557,7 +1549,9 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
 	if (!kn->parent)
 		return -EINVAL;
 
-	down_write(&kernfs_rwsem);
+	/* if parent is pinned, parent->lock protects rename */
+	if (!kn->parent->pinned)
+		down_write(&kernfs_rwsem);
 
 	error = -ENOENT;
 	if (!kernfs_active(kn) || !kernfs_active(new_parent) ||
@@ -1576,7 +1570,8 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
 	/* rename kernfs_node */
 	if (strcmp(kn->name, new_name) != 0) {
 		error = -ENOMEM;
-		new_name = kstrdup_const(new_name, GFP_KERNEL);
+		/* use GFP_ATOMIC to avoid sleep */
+		new_name = kstrdup_const(new_name, GFP_ATOMIC);
 		if (!new_name)
 			goto out;
 	} else {
@@ -1611,10 +1606,49 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
 
 	error = 0;
  out:
-	up_write(&kernfs_rwsem);
+	if (!kn->parent->pinned)
+		up_write(&kernfs_rwsem);
 	return error;
 }
 
+/* Traverse all descendants and set pinned */
+void kernfs_set_pinned(struct kernfs_node *kn, spinlock_t *lock)
+{
+	struct kernfs_node *pos = NULL;
+
+	while ((pos = kernfs_next_descendant_post(pos, kn))) {
+		pos->pinned = true;
+		pos->lock = lock;
+	}
+}
+
+/* Traverse all descendants and clear pinned */
+void kernfs_clear_pinned(struct kernfs_node *kn)
+{
+	struct kernfs_node *pos = NULL;
+
+	while ((pos = kernfs_next_descendant_post(pos, kn))) {
+		pos->pinned = false;
+		pos->lock = NULL;
+	}
+}
+
+void kernfs_lock(struct kernfs_node *kn)
+{
+	if (!kn->pinned)
+		down_write(&kernfs_rwsem);
+	else
+		spin_lock(kn->lock);
+}
+
+void kernfs_unlock(struct kernfs_node *kn)
+{
+	if (!kn->pinned)
+		up_write(&kernfs_rwsem);
+	else
+		spin_unlock(kn->lock);
+}
+
 /* Relationship between mode and the DT_xxx types */
 static inline unsigned char dt_type(struct kernfs_node *kn)
 {
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 1093abf7c28c..a70d96308c51 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -161,6 +161,13 @@ struct kernfs_node {
 	unsigned short		flags;
 	umode_t			mode;
 	struct kernfs_iattrs	*iattr;
+
+	/*
+	 * If pinned is true, use lock to protect remove, rename this kernfs
+	 * node or create child kernfs node.
+	 */
+	bool			pinned;
+	spinlock_t		*lock;
 };
 
 /*
@@ -415,6 +422,11 @@ int kernfs_xattr_set(struct kernfs_node *kn, const char *name,
 
 struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
 						   u64 id);
+
+void kernfs_set_pinned(struct kernfs_node *kn, spinlock_t *lock);
+void kernfs_clear_pinned(struct kernfs_node *kn);
+void kernfs_lock(struct kernfs_node *kn);
+void kernfs_unlock(struct kernfs_node *kn);
 #else	/* CONFIG_KERNFS */
 
 static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn)
@@ -528,6 +540,8 @@ static inline void kernfs_kill_sb(struct super_block *sb) { }
 
 static inline void kernfs_init(void) { }
 
+inline void kernfs_set_pinned(struct kernfs_node *kn, spinlock_t *lock) {}
+inline void kernfs_clear_pinned(struct kernfs_node *kn) {}
 #endif	/* CONFIG_KERNFS */
 
 /**
-- 
1.8.3.1


  reply	other threads:[~2021-09-08 12:15 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-08 12:15 [RFC PATCH 0/2] support cgroup pool in v1 Yi Tao
2021-09-08 12:15 ` Yi Tao
2021-09-08 12:15 ` Yi Tao [this message]
2021-09-08 12:15   ` [RFC PATCH 2/2] " Yi Tao
2021-09-08 12:15     ` Yi Tao
2021-09-08 12:35     ` Greg KH
2021-09-08 12:35       ` Greg KH
     [not found]       ` <084930d2-057a-04a7-76d1-b2a7bd37deb0@linux.alibaba.com>
2021-09-09 13:27         ` Greg KH
2021-09-10  2:20           ` taoyi.ty
2021-09-10  2:15       ` taoyi.ty
2021-09-10  2:15         ` taoyi.ty
2021-09-10  6:01         ` Greg KH
2021-09-10  6:01           ` Greg KH
2021-09-08 15:30     ` kernel test robot
2021-09-08 16:52     ` kernel test robot
2021-09-08 17:39     ` kernel test robot
2021-09-08 17:39     ` [RFC PATCH] cgroup_pool_mutex can be static kernel test robot
2021-09-08 12:35   ` [RFC PATCH 1/2] add pinned flags for kernfs node Greg KH
2021-09-08 12:35     ` Greg KH
2021-09-10  2:14     ` taoyi.ty
2021-09-10  6:00       ` Greg KH
2021-09-10  6:00         ` Greg KH
2021-09-08 16:26   ` kernel test robot
2021-09-08 12:37 ` [RFC PATCH 0/2] support cgroup pool in v1 Greg KH
2021-09-10  2:11   ` taoyi.ty
2021-09-10  6:01     ` Greg KH
2021-09-10  6:01       ` Greg KH
2021-09-10 16:49     ` Tejun Heo
2021-09-10 16:49       ` Tejun Heo
2021-09-13 14:20       ` Christian Brauner
2021-09-13 14:20         ` Christian Brauner
2021-09-13 16:24         ` Tejun Heo
2021-09-13 16:24           ` Tejun Heo
2021-09-08 16:35 ` Tejun Heo
2021-09-10  2:12   ` taoyi.ty

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e753e449240bfc43fcb7aa26dca196e2f51e0836.1631102579.git.escape@linux.alibaba.com \
    --to=escape@linux.alibaba.com \
    --cc=cgroups@vger.kernel.org \
    --cc=gregkh@linuxfoundation.org \
    --cc=hannes@cmpxchg.org \
    --cc=keescook@chromium.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lizefan.x@bytedance.com \
    --cc=mcgrof@kernel.org \
    --cc=shanpeic@linux.alibaba.com \
    --cc=tj@kernel.org \
    --cc=yzaikin@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.