* [PATCH v5 1/2] kernfs: use hashed mutex and spinlock in place of global ones.
2022-02-06 1:09 [PATCH v5 0/2] kernfs: use hashed mutex and spinlock in place of global ones Imran Khan
@ 2022-02-06 1:09 ` Imran Khan
2022-02-08 11:27 ` Greg KH
2022-02-08 17:22 ` Tejun Heo
2022-02-06 1:09 ` [PATCH v5 2/2] kernfs: Replace per-fs global rwsem with per-fs hashed rwsem Imran Khan
1 sibling, 2 replies; 10+ messages in thread
From: Imran Khan @ 2022-02-06 1:09 UTC (permalink / raw)
To: tj, gregkh; +Cc: linux-kernel
Right now a global mutex (kernfs_open_file_mutex) protects list of
kernfs_open_file instances corresponding to a sysfs attribute. So even
if different tasks are opening or closing different sysfs files they
can contend on osq_lock of this mutex. The contention is more apparent
in large scale systems with few hundred CPUs where most of the CPUs have
running tasks that are opening, accessing or closing sysfs files at any
point of time. Since each list of kernfs_open_file belongs to a
kernfs_open_node instance which in turn corresponds to one kernfs_node,
moving global kernfs_open_file_mutex within kernfs_node would sound like
fixing this contention but it has unwanted side effect of bloating up
kernfs_node size and hence kobject memory usage.
Also since kernfs_node->attr.open points to kernfs_open_node instance
corresponding to the kernfs_node, we can use a kernfs_node specific
spinlock in place of current global spinlock i.e kernfs_open_node_lock.
But this approach will increase kobject memory usage as well.
Use per-fs hashed locks in place of above mentioned global locks to reduce
kernfs access contention without increasing kobject memory usage.
Signed-off-by: Imran Khan <imran.f.khan@oracle.com>
---
fs/kernfs/dir.c | 5 +++
fs/kernfs/file.c | 61 ++++++++++++++++---------------------
fs/kernfs/kernfs-internal.h | 51 +++++++++++++++++++++++++++++++
include/linux/kernfs.h | 39 ++++++++++++++++++++++++
4 files changed, 122 insertions(+), 34 deletions(-)
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index e6d9772ddb4ca..d26fb3bffda92 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -909,6 +909,7 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
{
struct kernfs_root *root;
struct kernfs_node *kn;
+ int lock_count;
root = kzalloc(sizeof(*root), GFP_KERNEL);
if (!root)
@@ -916,6 +917,10 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
idr_init(&root->ino_idr);
init_rwsem(&root->kernfs_rwsem);
+ for (lock_count = 0; lock_count < NR_KERNFS_LOCKS; lock_count++) {
+ spin_lock_init(&root->open_node_locks[lock_count].lock);
+ mutex_init(&root->open_file_mutex[lock_count].lock);
+ }
INIT_LIST_HEAD(&root->supers);
/*
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 9414a7a60a9f4..018d038b72fdd 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -18,20 +18,6 @@
#include "kernfs-internal.h"
-/*
- * There's one kernfs_open_file for each open file and one kernfs_open_node
- * for each kernfs_node with one or more open files.
- *
- * kernfs_node->attr.open points to kernfs_open_node. attr.open is
- * protected by kernfs_open_node_lock.
- *
- * filp->private_data points to seq_file whose ->private points to
- * kernfs_open_file. kernfs_open_files are chained at
- * kernfs_open_node->files, which is protected by kernfs_open_file_mutex.
- */
-static DEFINE_SPINLOCK(kernfs_open_node_lock);
-static DEFINE_MUTEX(kernfs_open_file_mutex);
-
struct kernfs_open_node {
atomic_t refcnt;
atomic_t event;
@@ -524,10 +510,11 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
struct kernfs_open_file *of)
{
struct kernfs_open_node *on, *new_on = NULL;
-
+ struct mutex *mutex = NULL;
+ spinlock_t *lock = NULL;
retry:
- mutex_lock(&kernfs_open_file_mutex);
- spin_lock_irq(&kernfs_open_node_lock);
+ mutex = kernfs_open_file_mutex_lock(kn);
+ lock = kernfs_open_node_lock(kn);
if (!kn->attr.open && new_on) {
kn->attr.open = new_on;
@@ -540,8 +527,8 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
list_add_tail(&of->list, &on->files);
}
- spin_unlock_irq(&kernfs_open_node_lock);
- mutex_unlock(&kernfs_open_file_mutex);
+ spin_unlock_irq(lock);
+ mutex_unlock(mutex);
if (on) {
kfree(new_on);
@@ -575,10 +562,14 @@ static void kernfs_put_open_node(struct kernfs_node *kn,
struct kernfs_open_file *of)
{
struct kernfs_open_node *on = kn->attr.open;
+ struct mutex *mutex = NULL;
+ spinlock_t *lock = NULL;
unsigned long flags;
- mutex_lock(&kernfs_open_file_mutex);
- spin_lock_irqsave(&kernfs_open_node_lock, flags);
+ mutex = kernfs_open_file_mutex_lock(kn);
+ lock = kernfs_open_node_lock_ptr(kn);
+
+ spin_lock_irqsave(lock, flags);
if (of)
list_del(&of->list);
@@ -588,8 +579,8 @@ static void kernfs_put_open_node(struct kernfs_node *kn,
else
on = NULL;
- spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
- mutex_unlock(&kernfs_open_file_mutex);
+ spin_unlock_irqrestore(lock, flags);
+ mutex_unlock(mutex);
kfree(on);
}
@@ -729,11 +720,11 @@ static void kernfs_release_file(struct kernfs_node *kn,
/*
* @of is guaranteed to have no other file operations in flight and
* we just want to synchronize release and drain paths.
- * @kernfs_open_file_mutex is enough. @of->mutex can't be used
+ * @open_file_mutex is enough. @of->mutex can't be used
* here because drain path may be called from places which can
* cause circular dependency.
*/
- lockdep_assert_held(&kernfs_open_file_mutex);
+ lockdep_assert_held(kernfs_open_file_mutex_ptr(kn));
if (!of->released) {
/*
@@ -750,11 +741,12 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
{
struct kernfs_node *kn = inode->i_private;
struct kernfs_open_file *of = kernfs_of(filp);
+ struct mutex *lock = NULL;
if (kn->flags & KERNFS_HAS_RELEASE) {
- mutex_lock(&kernfs_open_file_mutex);
+ lock = kernfs_open_file_mutex_lock(kn);
kernfs_release_file(kn, of);
- mutex_unlock(&kernfs_open_file_mutex);
+ mutex_unlock(lock);
}
kernfs_put_open_node(kn, of);
@@ -769,19 +761,21 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
{
struct kernfs_open_node *on;
struct kernfs_open_file *of;
+ struct mutex *mutex = NULL;
+ spinlock_t *lock = NULL;
if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE)))
return;
- spin_lock_irq(&kernfs_open_node_lock);
+ lock = kernfs_open_node_lock(kn);
on = kn->attr.open;
if (on)
atomic_inc(&on->refcnt);
- spin_unlock_irq(&kernfs_open_node_lock);
+ spin_unlock_irq(lock);
if (!on)
return;
- mutex_lock(&kernfs_open_file_mutex);
+ mutex = kernfs_open_file_mutex_lock(kn);
list_for_each_entry(of, &on->files, list) {
struct inode *inode = file_inode(of->file);
@@ -793,8 +787,7 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
kernfs_release_file(kn, of);
}
- mutex_unlock(&kernfs_open_file_mutex);
-
+ mutex_unlock(mutex);
kernfs_put_open_node(kn, NULL);
}
@@ -922,13 +915,13 @@ void kernfs_notify(struct kernfs_node *kn)
return;
/* kick poll immediately */
- spin_lock_irqsave(&kernfs_open_node_lock, flags);
+ spin_lock_irqsave(kernfs_open_node_lock_ptr(kn), flags);
on = kn->attr.open;
if (on) {
atomic_inc(&on->event);
wake_up_interruptible(&on->poll);
}
- spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
+ spin_unlock_irqrestore(kernfs_open_node_lock_ptr(kn), flags);
/* schedule work to kick fsnotify */
spin_lock_irqsave(&kernfs_notify_lock, flags);
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index f9cc912c31e1b..cc49a6cd94154 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -31,6 +31,7 @@ struct kernfs_iattrs {
atomic_t user_xattr_size;
};
+
/* +1 to avoid triggering overflow warning when negating it */
#define KN_DEACTIVATED_BIAS (INT_MIN + 1)
@@ -147,4 +148,54 @@ void kernfs_drain_open_files(struct kernfs_node *kn);
*/
extern const struct inode_operations kernfs_symlink_iops;
+static inline spinlock_t *kernfs_open_node_lock_ptr(struct kernfs_node *kn)
+{
+ struct kernfs_root *root;
+ int idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS);
+
+ root = kernfs_root(kn);
+
+ return &root->open_node_locks[idx].lock;
+}
+
+static inline spinlock_t *kernfs_open_node_lock(struct kernfs_node *kn)
+{
+ struct kernfs_root *root;
+ spinlock_t *lock;
+ int idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS);
+
+ root = kernfs_root(kn);
+
+ lock = &root->open_node_locks[idx].lock;
+
+ spin_lock_irq(lock);
+
+ return lock;
+}
+
+static inline struct mutex *kernfs_open_file_mutex_ptr(struct kernfs_node *kn)
+{
+ struct kernfs_root *root;
+ int idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS);
+
+ root = kernfs_root(kn);
+
+ return &root->open_file_mutex[idx].lock;
+}
+
+static inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn)
+{
+ struct kernfs_root *root;
+ struct mutex *lock;
+ int idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS);
+
+ root = kernfs_root(kn);
+
+ lock = &root->open_file_mutex[idx].lock;
+
+ mutex_lock(lock);
+
+ return lock;
+}
+
#endif /* __KERNFS_INTERNAL_H */
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 861c4f0f8a29f..5bf9f02ce9dce 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -18,6 +18,8 @@
#include <linux/uidgid.h>
#include <linux/wait.h>
#include <linux/rwsem.h>
+#include <linux/spinlock.h>
+#include <linux/cache.h>
struct file;
struct dentry;
@@ -34,6 +36,40 @@ struct kernfs_fs_context;
struct kernfs_open_node;
struct kernfs_iattrs;
+/*
+ * NR_KERNFS_LOCK_BITS determines size (NR_KERNFS_LOCKS) of hash
+ * table of locks.
+ * Having a small hash table would impact scalability, since
+ * more and more kernfs_node objects will end up using same lock
+ * and having a very large hash table would waste memory.
+ *
+ * At the moment size of hash table of locks is being set based on
+ * the number of CPUs as follows:
+ *
+ * NR_CPU NR_KERNFS_LOCK_BITS NR_KERNFS_LOCKS
+ * 1 1 2
+ * 2-3 2 4
+ * 4-7 4 16
+ * 8-15 6 64
+ * 16-31 8 256
+ * 32 and more 10 1024
+ */
+#ifdef CONFIG_SMP
+#define NR_KERNFS_LOCK_BITS (2 * (ilog2(NR_CPUS < 32 ? NR_CPUS : 32)))
+#else
+#define NR_KERNFS_LOCK_BITS 1
+#endif
+
+#define NR_KERNFS_LOCKS (1 << NR_KERNFS_LOCK_BITS)
+
+struct kernfs_open_node_lock {
+ spinlock_t lock;
+} ____cacheline_aligned_in_smp;
+
+struct kernfs_open_file_mutex {
+ struct mutex lock;
+} ____cacheline_aligned_in_smp;
+
enum kernfs_node_type {
KERNFS_DIR = 0x0001,
KERNFS_FILE = 0x0002,
@@ -90,6 +126,7 @@ enum kernfs_root_flag {
KERNFS_ROOT_SUPPORT_USER_XATTR = 0x0008,
};
+
/* type-specific structures for kernfs_node union members */
struct kernfs_elem_dir {
unsigned long subdirs;
@@ -201,6 +238,8 @@ struct kernfs_root {
wait_queue_head_t deactivate_waitq;
struct rw_semaphore kernfs_rwsem;
+ struct kernfs_open_node_lock open_node_locks[NR_KERNFS_LOCKS];
+ struct kernfs_open_file_mutex open_file_mutex[NR_KERNFS_LOCKS];
};
struct kernfs_open_file {
--
2.30.2
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v5 2/2] kernfs: Replace per-fs global rwsem with per-fs hashed rwsem.
2022-02-06 1:09 [PATCH v5 0/2] kernfs: use hashed mutex and spinlock in place of global ones Imran Khan
2022-02-06 1:09 ` [PATCH v5 1/2] " Imran Khan
@ 2022-02-06 1:09 ` Imran Khan
2022-02-08 18:26 ` Tejun Heo
1 sibling, 1 reply; 10+ messages in thread
From: Imran Khan @ 2022-02-06 1:09 UTC (permalink / raw)
To: tj, gregkh; +Cc: linux-kernel
Having a single rwsem to synchronize all operations across a kernfs
based file system (cgroup, sysfs etc.) does not scale well. Replace
it with a hashed rwsem to reduce contention around single per-fs
rwsem.
Also introduce a perfs rwsem to protect per-fs list of kernfs_super_info.
Signed-off-by: Imran Khan <imran.f.khan@oracle.com>
---
fs/kernfs/dir.c | 276 ++++++++++++++++++++++++------------
fs/kernfs/file.c | 6 +-
fs/kernfs/inode.c | 22 ++-
fs/kernfs/kernfs-internal.h | 112 +++++++++++++++
fs/kernfs/mount.c | 13 +-
fs/kernfs/symlink.c | 5 +-
include/linux/kernfs.h | 5 +-
7 files changed, 323 insertions(+), 116 deletions(-)
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index d26fb3bffda92..ec1fff78c25a9 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -25,7 +25,9 @@ static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */
static bool kernfs_active(struct kernfs_node *kn)
{
- lockdep_assert_held(&kernfs_root(kn)->kernfs_rwsem);
+ int idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS);
+
+ lockdep_assert_held(&kernfs_root(kn)->kernfs_rwsem[idx]);
return atomic_read(&kn->active) >= 0;
}
@@ -450,40 +452,42 @@ void kernfs_put_active(struct kernfs_node *kn)
/**
* kernfs_drain - drain kernfs_node
* @kn: kernfs_node to drain
+ * @anc: ancestor of kernfs_node to drain
*
* Drain existing usages and nuke all existing mmaps of @kn. Mutiple
* removers may invoke this function concurrently on @kn and all will
* return after draining is complete.
*/
-static void kernfs_drain(struct kernfs_node *kn)
- __releases(&kernfs_root(kn)->kernfs_rwsem)
- __acquires(&kernfs_root(kn)->kernfs_rwsem)
+static void kernfs_drain(struct kernfs_node *kn, struct kernfs_node *anc)
+ __releases(&kernfs_root(anc)->kernfs_rwsem[a_idx])
+ __acquires(&kernfs_root(anc)->kernfs_rwsem[a_idx])
{
struct kernfs_root *root = kernfs_root(kn);
+ int a_idx = hash_ptr(anc, NR_KERNFS_LOCK_BITS);
- lockdep_assert_held_write(&root->kernfs_rwsem);
- WARN_ON_ONCE(kernfs_active(kn));
+ lockdep_assert_held_write(&root->kernfs_rwsem[a_idx]);
+ WARN_ON_ONCE(atomic_read(&kn->active) >= 0);
- up_write(&root->kernfs_rwsem);
+ up_write_kernfs_rwsem(anc);
- if (kernfs_lockdep(kn)) {
- rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
- if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS)
- lock_contended(&kn->dep_map, _RET_IP_);
+ if (kernfs_lockdep(anc)) {
+ rwsem_acquire(&anc->dep_map, 0, 0, _RET_IP_);
+ if (atomic_read(&anc->active) != KN_DEACTIVATED_BIAS)
+ lock_contended(&anc->dep_map, _RET_IP_);
}
/* but everyone should wait for draining */
wait_event(root->deactivate_waitq,
atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
- if (kernfs_lockdep(kn)) {
- lock_acquired(&kn->dep_map, _RET_IP_);
- rwsem_release(&kn->dep_map, _RET_IP_);
+ if (kernfs_lockdep(anc)) {
+ lock_acquired(&anc->dep_map, _RET_IP_);
+ rwsem_release(&anc->dep_map, _RET_IP_);
}
kernfs_drain_open_files(kn);
- down_write(&root->kernfs_rwsem);
+ down_write_kernfs_rwsem(anc, LOCK_SELF, 0);
}
/**
@@ -718,12 +722,11 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
int kernfs_add_one(struct kernfs_node *kn)
{
struct kernfs_node *parent = kn->parent;
- struct kernfs_root *root = kernfs_root(parent);
struct kernfs_iattrs *ps_iattr;
bool has_ns;
int ret;
- down_write(&root->kernfs_rwsem);
+ down_write_kernfs_rwsem(parent, LOCK_SELF, 0);
ret = -EINVAL;
has_ns = kernfs_ns_enabled(parent);
@@ -754,7 +757,7 @@ int kernfs_add_one(struct kernfs_node *kn)
ps_iattr->ia_mtime = ps_iattr->ia_ctime;
}
- up_write(&root->kernfs_rwsem);
+ up_write_kernfs_rwsem(parent);
/*
* Activate the new node unless CREATE_DEACTIVATED is requested.
@@ -768,7 +771,7 @@ int kernfs_add_one(struct kernfs_node *kn)
return 0;
out_unlock:
- up_write(&root->kernfs_rwsem);
+ up_write_kernfs_rwsem(parent);
return ret;
}
@@ -788,8 +791,9 @@ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
struct rb_node *node = parent->dir.children.rb_node;
bool has_ns = kernfs_ns_enabled(parent);
unsigned int hash;
+ int idx = hash_ptr(parent, NR_KERNFS_LOCK_BITS);
- lockdep_assert_held(&kernfs_root(parent)->kernfs_rwsem);
+ lockdep_assert_held(&kernfs_root(parent)->kernfs_rwsem[idx]);
if (has_ns != (bool)ns) {
WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
@@ -820,8 +824,9 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
{
size_t len;
char *p, *name;
+ int idx = hash_ptr(parent, NR_KERNFS_LOCK_BITS);
- lockdep_assert_held_read(&kernfs_root(parent)->kernfs_rwsem);
+ lockdep_assert_held_read(&kernfs_root(parent)->kernfs_rwsem[idx]);
/* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */
spin_lock_irq(&kernfs_rename_lock);
@@ -860,12 +865,11 @@ struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
const char *name, const void *ns)
{
struct kernfs_node *kn;
- struct kernfs_root *root = kernfs_root(parent);
- down_read(&root->kernfs_rwsem);
+ down_read_kernfs_rwsem(parent, LOCK_SELF, 0);
kn = kernfs_find_ns(parent, name, ns);
kernfs_get(kn);
- up_read(&root->kernfs_rwsem);
+ up_read_kernfs_rwsem(parent);
return kn;
}
@@ -885,12 +889,11 @@ struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent,
const char *path, const void *ns)
{
struct kernfs_node *kn;
- struct kernfs_root *root = kernfs_root(parent);
- down_read(&root->kernfs_rwsem);
+ down_read_kernfs_rwsem(parent, LOCK_SELF, 0);
kn = kernfs_walk_ns(parent, path, ns);
kernfs_get(kn);
- up_read(&root->kernfs_rwsem);
+ up_read_kernfs_rwsem(parent);
return kn;
}
@@ -916,11 +919,12 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
return ERR_PTR(-ENOMEM);
idr_init(&root->ino_idr);
- init_rwsem(&root->kernfs_rwsem);
for (lock_count = 0; lock_count < NR_KERNFS_LOCKS; lock_count++) {
spin_lock_init(&root->open_node_locks[lock_count].lock);
mutex_init(&root->open_file_mutex[lock_count].lock);
+ init_rwsem(&root->kernfs_rwsem[lock_count]);
}
+ init_rwsem(&root->supers_rwsem);
INIT_LIST_HEAD(&root->supers);
/*
@@ -1050,7 +1054,6 @@ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
{
struct kernfs_node *kn;
- struct kernfs_root *root;
if (flags & LOOKUP_RCU)
return -ECHILD;
@@ -1066,13 +1069,12 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
parent = kernfs_dentry_node(dentry->d_parent);
if (parent) {
spin_unlock(&dentry->d_lock);
- root = kernfs_root(parent);
- down_read(&root->kernfs_rwsem);
+ down_read_kernfs_rwsem(parent, LOCK_SELF, 0);
if (kernfs_dir_changed(parent, dentry)) {
- up_read(&root->kernfs_rwsem);
+ up_read_kernfs_rwsem(parent);
return 0;
}
- up_read(&root->kernfs_rwsem);
+ up_read_kernfs_rwsem(parent);
} else
spin_unlock(&dentry->d_lock);
@@ -1083,8 +1085,7 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
}
kn = kernfs_dentry_node(dentry);
- root = kernfs_root(kn);
- down_read(&root->kernfs_rwsem);
+ down_read_kernfs_rwsem(kn, LOCK_SELF, 0);
/* The kernfs node has been deactivated */
if (!kernfs_active(kn))
@@ -1103,10 +1104,10 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
kernfs_info(dentry->d_sb)->ns != kn->ns)
goto out_bad;
- up_read(&root->kernfs_rwsem);
+ up_read_kernfs_rwsem(kn);
return 1;
out_bad:
- up_read(&root->kernfs_rwsem);
+ up_read_kernfs_rwsem(kn);
return 0;
}
@@ -1120,28 +1121,30 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
{
struct kernfs_node *parent = dir->i_private;
struct kernfs_node *kn;
- struct kernfs_root *root;
struct inode *inode = NULL;
const void *ns = NULL;
- root = kernfs_root(parent);
- down_read(&root->kernfs_rwsem);
+ down_read_kernfs_rwsem(parent, LOCK_SELF, 0);
if (kernfs_ns_enabled(parent))
ns = kernfs_info(dir->i_sb)->ns;
kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
+ up_read_kernfs_rwsem(parent);
/* attach dentry and inode */
if (kn) {
/* Inactive nodes are invisible to the VFS so don't
* create a negative.
*/
+ down_read_kernfs_rwsem(kn, LOCK_SELF, 0);
if (!kernfs_active(kn)) {
- up_read(&root->kernfs_rwsem);
+ /* Unlock both node and parent before returning */
+ up_read_kernfs_rwsem(kn);
return NULL;
}
inode = kernfs_get_inode(dir->i_sb, kn);
if (!inode)
inode = ERR_PTR(-ENOMEM);
+ up_read_kernfs_rwsem(kn);
}
/*
* Needed for negative dentry validation.
@@ -1149,9 +1152,10 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
* or transforms from positive dentry in dentry_unlink_inode()
* called from vfs_rmdir().
*/
+ down_read_kernfs_rwsem(parent, LOCK_SELF, 0);
if (!IS_ERR(inode))
kernfs_set_rev(parent, dentry);
- up_read(&root->kernfs_rwsem);
+ up_read_kernfs_rwsem(parent);
/* instantiate and hash (possibly negative) dentry */
return d_splice_alias(inode, dentry);
@@ -1273,8 +1277,9 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
struct kernfs_node *root)
{
struct rb_node *rbn;
+ int idx = hash_ptr(root, NR_KERNFS_LOCK_BITS);
- lockdep_assert_held_write(&kernfs_root(root)->kernfs_rwsem);
+ lockdep_assert_held_write(&kernfs_root(root)->kernfs_rwsem[idx]);
/* if first iteration, visit leftmost descendant which may be root */
if (!pos)
@@ -1309,9 +1314,8 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
void kernfs_activate(struct kernfs_node *kn)
{
struct kernfs_node *pos;
- struct kernfs_root *root = kernfs_root(kn);
- down_write(&root->kernfs_rwsem);
+ down_write_kernfs_rwsem(kn, LOCK_SELF, 0);
pos = NULL;
while ((pos = kernfs_next_descendant_post(pos, kn))) {
@@ -1325,14 +1329,15 @@ void kernfs_activate(struct kernfs_node *kn)
pos->flags |= KERNFS_ACTIVATED;
}
- up_write(&root->kernfs_rwsem);
+ up_write_kernfs_rwsem(kn);
}
static void __kernfs_remove(struct kernfs_node *kn)
{
struct kernfs_node *pos;
+ int idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS);
- lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem);
+ lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem[idx]);
/*
* Short-circuit if non-root @kn has already finished removal.
@@ -1346,9 +1351,16 @@ static void __kernfs_remove(struct kernfs_node *kn)
/* prevent any new usage under @kn by deactivating all nodes */
pos = NULL;
- while ((pos = kernfs_next_descendant_post(pos, kn)))
+ while ((pos = kernfs_next_descendant_post(pos, kn))) {
+ int n_idx = hash_ptr(pos, NR_KERNFS_LOCK_BITS);
+
+ if (n_idx != idx)
+ down_write_kernfs_rwsem(pos, LOCK_SELF, 1);
if (kernfs_active(pos))
atomic_add(KN_DEACTIVATED_BIAS, &pos->active);
+ if (n_idx != idx)
+ up_write_kernfs_rwsem(pos);
+ }
/* deactivate and unlink the subtree node-by-node */
do {
@@ -1369,7 +1381,7 @@ static void __kernfs_remove(struct kernfs_node *kn)
* error paths without worrying about draining.
*/
if (kn->flags & KERNFS_ACTIVATED)
- kernfs_drain(pos);
+ kernfs_drain(pos, kn);
else
WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
@@ -1402,11 +1414,9 @@ static void __kernfs_remove(struct kernfs_node *kn)
*/
void kernfs_remove(struct kernfs_node *kn)
{
- struct kernfs_root *root = kernfs_root(kn);
-
- down_write(&root->kernfs_rwsem);
+ down_write_kernfs_rwsem(kn, LOCK_SELF, 0);
__kernfs_remove(kn);
- up_write(&root->kernfs_rwsem);
+ up_write_kernfs_rwsem(kn);
}
/**
@@ -1492,9 +1502,8 @@ void kernfs_unbreak_active_protection(struct kernfs_node *kn)
bool kernfs_remove_self(struct kernfs_node *kn)
{
bool ret;
- struct kernfs_root *root = kernfs_root(kn);
- down_write(&root->kernfs_rwsem);
+ down_write_kernfs_rwsem(kn, LOCK_SELF, 0);
kernfs_break_active_protection(kn);
/*
@@ -1522,9 +1531,9 @@ bool kernfs_remove_self(struct kernfs_node *kn)
atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
break;
- up_write(&root->kernfs_rwsem);
+ up_write_kernfs_rwsem(kn);
schedule();
- down_write(&root->kernfs_rwsem);
+ down_write_kernfs_rwsem(kn, LOCK_SELF, 0);
}
finish_wait(waitq, &wait);
WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb));
@@ -1537,7 +1546,7 @@ bool kernfs_remove_self(struct kernfs_node *kn)
*/
kernfs_unbreak_active_protection(kn);
- up_write(&root->kernfs_rwsem);
+ up_write_kernfs_rwsem(kn);
return ret;
}
@@ -1554,7 +1563,6 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
const void *ns)
{
struct kernfs_node *kn;
- struct kernfs_root *root;
if (!parent) {
WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
@@ -1562,14 +1570,15 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
return -ENOENT;
}
- root = kernfs_root(parent);
- down_write(&root->kernfs_rwsem);
+ down_write_kernfs_rwsem(parent, LOCK_SELF, 0);
kn = kernfs_find_ns(parent, name, ns);
- if (kn)
+ up_write_kernfs_rwsem(parent);
+ if (kn) {
+ down_write_kernfs_rwsem(kn, LOCK_SELF, 0);
__kernfs_remove(kn);
-
- up_write(&root->kernfs_rwsem);
+ up_write_kernfs_rwsem(kn);
+ }
if (kn)
return 0;
@@ -1588,37 +1597,65 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
const char *new_name, const void *new_ns)
{
struct kernfs_node *old_parent;
- struct kernfs_root *root;
const char *old_name = NULL;
- int error;
+ int error, idx, np_idx, p_idx;
/* can't move or rename root */
if (!kn->parent)
return -EINVAL;
- root = kernfs_root(kn);
- down_write(&root->kernfs_rwsem);
+ /*
+ * Take lock of node's old (current) parent.
+ * If new parent has a different lock, then take that
+ * lock as well.
+ */
+ idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS);
+ p_idx = hash_ptr(kn->parent, NR_KERNFS_LOCK_BITS);
+ np_idx = hash_ptr(new_parent, NR_KERNFS_LOCK_BITS);
+
+ /*
+ * Take only kn's lock. The subsequent kernfs_put
+ * may free up old_parent so if old_parent has a
+ * different lock, we will explicitly release that.
+ */
+ down_write_kernfs_rwsem(kn, LOCK_SELF, 0);
+
+ if (idx != np_idx) /* new parent hashes to different lock */
+ down_write_kernfs_rwsem(new_parent, LOCK_SELF, 1);
+
+ /* old_parent hashes to a different lock */
+ if (idx != p_idx && p_idx != np_idx)
+ down_write_kernfs_rwsem(kn->parent, LOCK_SELF, 2);
error = -ENOENT;
if (!kernfs_active(kn) || !kernfs_active(new_parent) ||
- (new_parent->flags & KERNFS_EMPTY_DIR))
+ (new_parent->flags & KERNFS_EMPTY_DIR)) {
+ if (idx != p_idx && p_idx != np_idx)
+ up_write_kernfs_rwsem(kn->parent);
goto out;
-
+ }
error = 0;
if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
- (strcmp(kn->name, new_name) == 0))
+ (strcmp(kn->name, new_name) == 0)) {
+ if (idx != p_idx && p_idx != np_idx)
+ up_write_kernfs_rwsem(kn->parent);
goto out; /* nothing to rename */
-
+ }
error = -EEXIST;
- if (kernfs_find_ns(new_parent, new_name, new_ns))
+ if (kernfs_find_ns(new_parent, new_name, new_ns)) {
+ if (idx != p_idx && p_idx != np_idx)
+ up_write_kernfs_rwsem(kn->parent);
goto out;
-
+ }
/* rename kernfs_node */
if (strcmp(kn->name, new_name) != 0) {
error = -ENOMEM;
new_name = kstrdup_const(new_name, GFP_KERNEL);
- if (!new_name)
+ if (!new_name) {
+ if (idx != p_idx && p_idx != np_idx)
+ up_write_kernfs_rwsem(kn->parent);
goto out;
+ }
} else {
new_name = NULL;
}
@@ -1646,12 +1683,22 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
kn->hash = kernfs_name_hash(kn->name, kn->ns);
kernfs_link_sibling(kn);
+ /* Release old_parent's lock, if it is different */
+ if (idx != p_idx && p_idx != np_idx)
+ up_write_kernfs_rwsem(old_parent);
kernfs_put(old_parent);
kfree_const(old_name);
error = 0;
out:
- up_write(&root->kernfs_rwsem);
+ /*
+ * If new parent lock has been taken release it.
+ * Lastly release node's lock.
+ */
+ if (idx != np_idx) /* new parent hashes to different lock */
+ up_write_kernfs_rwsem(new_parent);
+
+ up_write_kernfs_rwsem(kn);
return error;
}
@@ -1670,9 +1717,20 @@ static int kernfs_dir_fop_release(struct inode *inode, struct file *filp)
static struct kernfs_node *kernfs_dir_pos(const void *ns,
struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
{
+ int idx, p_idx;
+
+ p_idx = hash_ptr(parent, NR_KERNFS_LOCK_BITS);
+ lockdep_assert_held(&kernfs_root(parent)->kernfs_rwsem[p_idx]);
if (pos) {
- int valid = kernfs_active(pos) &&
+ int valid = 0;
+
+ idx = hash_ptr(pos, NR_KERNFS_LOCK_BITS);
+ if (idx != p_idx)
+ down_read_kernfs_rwsem(pos, LOCK_SELF, 1);
+ valid = kernfs_active(pos) &&
pos->parent == parent && hash == pos->hash;
+ if (idx != p_idx)
+ up_read_kernfs_rwsem(pos);
kernfs_put(pos);
if (!valid)
pos = NULL;
@@ -1681,18 +1739,37 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns,
struct rb_node *node = parent->dir.children.rb_node;
while (node) {
pos = rb_to_kn(node);
-
+ idx = hash_ptr(pos, NR_KERNFS_LOCK_BITS);
+ if (idx != p_idx)
+ down_read_kernfs_rwsem(pos, LOCK_SELF, 1);
if (hash < pos->hash)
node = node->rb_left;
else if (hash > pos->hash)
node = node->rb_right;
- else
+ else {
+ if (idx != p_idx)
+ up_read_kernfs_rwsem(pos);
break;
+ }
+ if (idx != p_idx)
+ up_read_kernfs_rwsem(pos);
}
}
/* Skip over entries which are dying/dead or in the wrong namespace */
- while (pos && (!kernfs_active(pos) || pos->ns != ns)) {
- struct rb_node *node = rb_next(&pos->rb);
+ while (pos) {
+ struct rb_node *node;
+
+ idx = hash_ptr(pos, NR_KERNFS_LOCK_BITS);
+ if (idx != p_idx)
+ down_read_kernfs_rwsem(pos, LOCK_SELF, 1);
+ if (kernfs_active(pos) && pos->ns == ns) {
+ if (idx != p_idx)
+ up_read_kernfs_rwsem(pos);
+ break;
+ }
+ node = rb_next(&pos->rb);
+ if (idx != p_idx)
+ up_read_kernfs_rwsem(pos);
if (!node)
pos = NULL;
else
@@ -1704,16 +1781,41 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns,
static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
{
+ int idx, p_idx;
+ int unlock_node = 0;
+
+ p_idx = hash_ptr(parent, NR_KERNFS_LOCK_BITS);
+ lockdep_assert_held(&kernfs_root(parent)->kernfs_rwsem[p_idx]);
pos = kernfs_dir_pos(ns, parent, ino, pos);
if (pos) {
+ idx = hash_ptr(pos, NR_KERNFS_LOCK_BITS);
+ if (idx != p_idx)
+ down_read_kernfs_rwsem(pos, LOCK_SELF, 1);
do {
struct rb_node *node = rb_next(&pos->rb);
+ if (idx != p_idx) {
+ up_read_kernfs_rwsem(pos);
+ unlock_node = 0;
+ }
if (!node)
pos = NULL;
- else
+ else {
pos = rb_to_kn(node);
+ if (pos != NULL) {
+ idx = hash_ptr(pos,
+ NR_KERNFS_LOCK_BITS);
+ if (idx != p_idx) {
+ down_read_kernfs_rwsem(pos,
+ LOCK_SELF,
+ 1);
+ unlock_node = 1;
+ }
+ }
+ }
} while (pos && (!kernfs_active(pos) || pos->ns != ns));
}
+ if (unlock_node)
+ up_read_kernfs_rwsem(pos);
return pos;
}
@@ -1722,14 +1824,12 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
struct dentry *dentry = file->f_path.dentry;
struct kernfs_node *parent = kernfs_dentry_node(dentry);
struct kernfs_node *pos = file->private_data;
- struct kernfs_root *root;
const void *ns = NULL;
if (!dir_emit_dots(file, ctx))
return 0;
- root = kernfs_root(parent);
- down_read(&root->kernfs_rwsem);
+ down_read_kernfs_rwsem(parent, LOCK_SELF, 0);
if (kernfs_ns_enabled(parent))
ns = kernfs_info(dentry->d_sb)->ns;
@@ -1746,12 +1846,12 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
file->private_data = pos;
kernfs_get(pos);
- up_read(&root->kernfs_rwsem);
+ up_read_kernfs_rwsem(parent);
if (!dir_emit(ctx, name, len, ino, type))
return 0;
- down_read(&root->kernfs_rwsem);
+ down_read_kernfs_rwsem(parent, LOCK_SELF, 0);
}
- up_read(&root->kernfs_rwsem);
+ up_read_kernfs_rwsem(parent);
file->private_data = NULL;
ctx->pos = INT_MAX;
return 0;
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 018d038b72fdd..5124add292582 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -855,8 +855,9 @@ static void kernfs_notify_workfn(struct work_struct *work)
root = kernfs_root(kn);
/* kick fsnotify */
- down_write(&root->kernfs_rwsem);
+ down_write_kernfs_rwsem(kn, LOCK_SELF, 0);
+ down_write(&root->supers_rwsem);
list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
struct kernfs_node *parent;
struct inode *p_inode = NULL;
@@ -892,8 +893,9 @@ static void kernfs_notify_workfn(struct work_struct *work)
iput(inode);
}
+ up_write(&root->supers_rwsem);
- up_write(&root->kernfs_rwsem);
+ up_write_kernfs_rwsem(kn);
kernfs_put(kn);
goto repeat;
}
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index 3d783d80f5daa..fa9a6a48119c0 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -99,11 +99,10 @@ int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
{
int ret;
- struct kernfs_root *root = kernfs_root(kn);
- down_write(&root->kernfs_rwsem);
+ down_write_kernfs_rwsem(kn, LOCK_SELF, 0);
ret = __kernfs_setattr(kn, iattr);
- up_write(&root->kernfs_rwsem);
+ up_write_kernfs_rwsem(kn);
return ret;
}
@@ -112,14 +111,12 @@ int kernfs_iop_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
{
struct inode *inode = d_inode(dentry);
struct kernfs_node *kn = inode->i_private;
- struct kernfs_root *root;
int error;
if (!kn)
return -EINVAL;
- root = kernfs_root(kn);
- down_write(&root->kernfs_rwsem);
+ down_write_kernfs_rwsem(kn, LOCK_SELF, 0);
error = setattr_prepare(&init_user_ns, dentry, iattr);
if (error)
goto out;
@@ -132,7 +129,7 @@ int kernfs_iop_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
setattr_copy(&init_user_ns, inode, iattr);
out:
- up_write(&root->kernfs_rwsem);
+ up_write_kernfs_rwsem(kn);
return error;
}
@@ -187,14 +184,13 @@ int kernfs_iop_getattr(struct user_namespace *mnt_userns,
{
struct inode *inode = d_inode(path->dentry);
struct kernfs_node *kn = inode->i_private;
- struct kernfs_root *root = kernfs_root(kn);
- down_read(&root->kernfs_rwsem);
+ down_read_kernfs_rwsem(kn, LOCK_SELF, 0);
spin_lock(&inode->i_lock);
kernfs_refresh_inode(kn, inode);
generic_fillattr(&init_user_ns, inode, stat);
spin_unlock(&inode->i_lock);
- up_read(&root->kernfs_rwsem);
+ up_read_kernfs_rwsem(kn);
return 0;
}
@@ -278,21 +274,19 @@ int kernfs_iop_permission(struct user_namespace *mnt_userns,
struct inode *inode, int mask)
{
struct kernfs_node *kn;
- struct kernfs_root *root;
int ret;
if (mask & MAY_NOT_BLOCK)
return -ECHILD;
kn = inode->i_private;
- root = kernfs_root(kn);
- down_read(&root->kernfs_rwsem);
+ down_read_kernfs_rwsem(kn, LOCK_SELF, 0);
spin_lock(&inode->i_lock);
kernfs_refresh_inode(kn, inode);
ret = generic_permission(&init_user_ns, inode, mask);
spin_unlock(&inode->i_lock);
- up_read(&root->kernfs_rwsem);
+ up_read_kernfs_rwsem(kn);
return ret;
}
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index cc49a6cd94154..3f011b323173c 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -19,6 +19,9 @@
#include <linux/kernfs.h>
#include <linux/fs_context.h>
+#define LOCK_SELF 0
+#define LOCK_SELF_AND_PARENT 1
+
struct kernfs_iattrs {
kuid_t ia_uid;
kgid_t ia_gid;
@@ -102,6 +105,115 @@ static inline bool kernfs_dir_changed(struct kernfs_node *parent,
return false;
}
+/*
+ * If both node and it's parent need locking,
+ * lock child first so that kernfs_rename_ns
+ * does not change the parent, leaving us
+ * with old parent here.
+ */
+static inline void down_write_kernfs_rwsem(struct kernfs_node *kn,
+ u8 lock_parent,
+ u8 nesting)
+{
+ int idx, p_idx;
+ struct kernfs_root *root;
+
+ idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS);
+ root = kernfs_root(kn);
+
+ down_write_nested(&root->kernfs_rwsem[idx], nesting);
+
+ kernfs_get(kn);
+
+ if (kn->parent)
+ p_idx = hash_ptr(kn->parent, NR_KERNFS_LOCK_BITS);
+
+ if (kn->parent && lock_parent && p_idx != idx) {
+ /*
+ * Node and parent hash to different locks.
+ * node's lock has already been taken.
+ * Take parent's lock and update token.
+ */
+ down_write_nested(&root->kernfs_rwsem[p_idx],
+ nesting + 1);
+
+ kernfs_get(kn->parent);
+ kn->unlock_parent = 1;
+ }
+}
+
+static inline void up_write_kernfs_rwsem(struct kernfs_node *kn)
+{
+ int p_idx, idx;
+ struct kernfs_root *root;
+
+ /* node lock is already taken in down_xxx so kn->parent is safe */
+ p_idx = hash_ptr(kn->parent, NR_KERNFS_LOCK_BITS);
+ idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS);
+ root = kernfs_root(kn);
+
+ if (kn->unlock_parent) {
+ kn->unlock_parent = 0;
+ up_write(&root->kernfs_rwsem[p_idx]);
+ kernfs_put(kn->parent);
+ }
+
+ up_write(&root->kernfs_rwsem[idx]);
+ kernfs_put(kn);
+}
+
+static inline void down_read_kernfs_rwsem(struct kernfs_node *kn,
+ u8 lock_parent,
+ u8 nesting)
+{
+ int idx, p_idx;
+ struct kernfs_root *root;
+
+ idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS);
+ root = kernfs_root(kn);
+
+ down_read_nested(&root->kernfs_rwsem[idx], nesting);
+
+ kernfs_get(kn);
+
+ if (kn->parent)
+ p_idx = hash_ptr(kn->parent, NR_KERNFS_LOCK_BITS);
+
+ if (kn->parent && lock_parent && p_idx != idx) {
+ /*
+ * Node and parent hash to different locks.
+ * node's lock has already been taken.
+ * Take parent's lock and update token.
+ */
+ down_read_nested(&root->kernfs_rwsem[p_idx],
+ nesting + 1);
+
+ kernfs_get(kn->parent);
+
+ kn->unlock_parent = 1;
+ }
+}
+
+static inline void up_read_kernfs_rwsem(struct kernfs_node *kn)
+{
+ int p_idx, idx;
+ struct kernfs_root *root;
+
+ /* node lock is already taken in down_xxx so kn->parent is safe */
+ p_idx = hash_ptr(kn->parent, NR_KERNFS_LOCK_BITS);
+ idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS);
+ root = kernfs_root(kn);
+
+ if (kn->unlock_parent) {
+ kn->unlock_parent = 0;
+ up_read(&root->kernfs_rwsem[p_idx]);
+ kernfs_put(kn->parent);
+ }
+
+ up_read(&root->kernfs_rwsem[idx]);
+ kernfs_put(kn);
+}
+
extern const struct super_operations kernfs_sops;
extern struct kmem_cache *kernfs_node_cache, *kernfs_iattrs_cache;
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index cfa79715fc1a7..ebb7d9a10f47e 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -236,7 +236,6 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *kfc)
{
struct kernfs_super_info *info = kernfs_info(sb);
- struct kernfs_root *kf_root = kfc->root;
struct inode *inode;
struct dentry *root;
@@ -256,9 +255,9 @@ static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *k
sb->s_shrink.seeks = 0;
/* get root inode, initialize and unlock it */
- down_read(&kf_root->kernfs_rwsem);
+ down_read_kernfs_rwsem(info->root->kn, 0, 0);
inode = kernfs_get_inode(sb, info->root->kn);
- up_read(&kf_root->kernfs_rwsem);
+ up_read_kernfs_rwsem(info->root->kn);
if (!inode) {
pr_debug("kernfs: could not get root inode\n");
return -ENOMEM;
@@ -346,9 +345,9 @@ int kernfs_get_tree(struct fs_context *fc)
}
sb->s_flags |= SB_ACTIVE;
- down_write(&root->kernfs_rwsem);
+ down_write(&root->supers_rwsem);
list_add(&info->node, &info->root->supers);
- up_write(&root->kernfs_rwsem);
+ up_write(&root->supers_rwsem);
}
fc->root = dget(sb->s_root);
@@ -375,9 +374,9 @@ void kernfs_kill_sb(struct super_block *sb)
struct kernfs_super_info *info = kernfs_info(sb);
struct kernfs_root *root = info->root;
- down_write(&root->kernfs_rwsem);
+ down_write(&root->supers_rwsem);
list_del(&info->node);
- up_write(&root->kernfs_rwsem);
+ up_write(&root->supers_rwsem);
/*
* Remove the superblock from fs_supers/s_instances
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
index 0ab13824822f7..5d4a769e2ab1e 100644
--- a/fs/kernfs/symlink.c
+++ b/fs/kernfs/symlink.c
@@ -113,12 +113,11 @@ static int kernfs_getlink(struct inode *inode, char *path)
struct kernfs_node *kn = inode->i_private;
struct kernfs_node *parent = kn->parent;
struct kernfs_node *target = kn->symlink.target_kn;
- struct kernfs_root *root = kernfs_root(parent);
int error;
- down_read(&root->kernfs_rwsem);
+ down_read_kernfs_rwsem(parent, LOCK_SELF, 0);
error = kernfs_get_target_path(parent, target, path);
- up_read(&root->kernfs_rwsem);
+ up_read_kernfs_rwsem(parent);
return error;
}
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 5bf9f02ce9dce..3b3c3e0b44083 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -179,6 +179,7 @@ struct kernfs_node {
*/
struct kernfs_node *parent;
const char *name;
+ u8 unlock_parent; /* release parent's rwsem */
struct rb_node rb;
@@ -237,9 +238,10 @@ struct kernfs_root {
struct list_head supers;
wait_queue_head_t deactivate_waitq;
- struct rw_semaphore kernfs_rwsem;
struct kernfs_open_node_lock open_node_locks[NR_KERNFS_LOCKS];
struct kernfs_open_file_mutex open_file_mutex[NR_KERNFS_LOCKS];
+ struct rw_semaphore supers_rwsem;
+ struct rw_semaphore kernfs_rwsem[NR_KERNFS_LOCKS];
};
struct kernfs_open_file {
@@ -619,5 +621,4 @@ static inline int kernfs_rename(struct kernfs_node *kn,
{
return kernfs_rename_ns(kn, new_parent, new_name, NULL);
}
-
#endif /* __LINUX_KERNFS_H */
--
2.30.2
^ permalink raw reply related [flat|nested] 10+ messages in thread