* [PATCH] cpumask: fix lg_lock/br_lock. @ 2012-02-27 23:22 Rusty Russell 2012-02-27 23:53 ` Andrew Morton 0 siblings, 1 reply; 45+ messages in thread From: Rusty Russell @ 2012-02-27 23:22 UTC (permalink / raw) To: Nick Piggin Cc: linux-kernel, Alexander Viro, Andrew Morton, Andi Kleen, Srivatsa S. Bhat, linux-fsdevel Use a cpumask_var_t instead of cpumask_t. We're doing plenty of allocations here anyway, so it's not really an issue, and it sets a good example. (cpumask_t is obsolescent, as are the cpus_* functions). Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> --- fs/file_table.c | 2 +- fs/namespace.c | 4 +++- include/linux/lglock.h | 23 +++++++++++++---------- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/fs/file_table.c b/fs/file_table.c --- a/fs/file_table.c +++ b/fs/file_table.c @@ -526,6 +526,6 @@ void __init files_init(unsigned long mem n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); files_defer_init(); - lg_lock_init(files_lglock); + lg_lock_init(files_lglock, GFP_KERNEL); percpu_counter_init(&nr_files, 0); } diff --git a/fs/namespace.c b/fs/namespace.c --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2576,7 +2576,9 @@ void __init mnt_init(void) for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mount_hashtable[u]); - br_lock_init(vfsmount_lock); + err = br_lock_init(vfsmount_lock, GFP_KERNEL); + if (err) + panic("Failed to init vfsmount_lock: %d\n", err); err = sysfs_init(); if (err) diff --git a/include/linux/lglock.h b/include/linux/lglock.h --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -25,7 +25,7 @@ #include <linux/cpu.h> /* can make br locks by using local lock for read side, global lock for write */ -#define br_lock_init(name) name##_lock_init() +#define br_lock_init(name, gfp) name##_lock_init(gfp) #define br_read_lock(name) name##_local_lock() #define br_read_unlock(name) name##_local_unlock() #define br_write_lock(name) name##_global_lock_online() @@ -35,7 +35,7 @@ #define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) -#define lg_lock_init(name) name##_lock_init() +#define lg_lock_init(name, gfp) name##_lock_init(gfp) #define lg_local_lock(name) name##_local_lock() #define lg_local_unlock(name) name##_local_unlock() #define lg_local_lock_cpu(name, cpu) name##_local_lock_cpu(cpu) @@ -61,7 +61,7 @@ #define DECLARE_LGLOCK(name) \ - extern void name##_lock_init(void); \ + extern int name##_lock_init(gfp_t gfp); \ extern void name##_local_lock(void); \ extern void name##_local_unlock(void); \ extern void name##_local_lock_cpu(int cpu); \ @@ -74,7 +74,7 @@ #define DEFINE_LGLOCK(name) \ \ DEFINE_SPINLOCK(name##_cpu_lock); \ - cpumask_t name##_cpus __read_mostly; \ + cpumask_var_t name##_cpus __read_mostly; \ DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ DEFINE_LGLOCK_LOCKDEP(name); \ \ @@ -85,12 +85,12 @@ switch (action & ~CPU_TASKS_FROZEN) { \ case CPU_UP_PREPARE: \ spin_lock(&name##_cpu_lock); \ - cpu_set((unsigned long)hcpu, name##_cpus); \ + cpumask_set_cpu((unsigned long)hcpu, name##_cpus); \ spin_unlock(&name##_cpu_lock); \ break; \ case CPU_UP_CANCELED: case CPU_DEAD: \ spin_lock(&name##_cpu_lock); \ - cpu_clear((unsigned long)hcpu, name##_cpus); \ + cpumask_clear_cpu((unsigned long)hcpu, name##_cpus); \ spin_unlock(&name##_cpu_lock); \ } \ return NOTIFY_OK; \ @@ -98,7 +98,7 @@ static struct notifier_block name##_lg_cpu_notifier = { \ .notifier_call = name##_lg_cpu_callback, \ }; \ - void name##_lock_init(void) { \ + int name##_lock_init(gfp_t gfp) { \ int i; \ LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ for_each_possible_cpu(i) { \ @@ -107,10 +107,13 @@ *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ } \ register_hotcpu_notifier(&name##_lg_cpu_notifier); \ + if (!alloc_cpumask_var(&name##_cpus, gfp)) \ + return -ENOMEM; \ get_online_cpus(); \ for_each_online_cpu(i) \ - cpu_set(i, name##_cpus); \ + cpumask_set_cpu(i, name##_cpus); \ put_online_cpus(); \ + return 0; \ } \ EXPORT_SYMBOL(name##_lock_init); \ \ @@ -154,7 +157,7 @@ int i; \ spin_lock(&name##_cpu_lock); \ rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_cpu(i, &name##_cpus) { \ + for_each_cpu(i, name##_cpus) { \ arch_spinlock_t *lock; \ lock = &per_cpu(name##_lock, i); \ arch_spin_lock(lock); \ @@ -165,7 +168,7 @@ void name##_global_unlock_online(void) { \ int i; \ rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_cpu(i, &name##_cpus) { \ + for_each_cpu(i, name##_cpus) { \ arch_spinlock_t *lock; \ lock = &per_cpu(name##_lock, i); \ arch_spin_unlock(lock); \ ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH] cpumask: fix lg_lock/br_lock. 2012-02-27 23:22 [PATCH] cpumask: fix lg_lock/br_lock Rusty Russell @ 2012-02-27 23:53 ` Andrew Morton 0 siblings, 0 replies; 45+ messages in thread From: Andrew Morton @ 2012-02-27 23:53 UTC (permalink / raw) To: Rusty Russell Cc: Nick Piggin, linux-kernel, Alexander Viro, Andi Kleen, Srivatsa S. Bhat, linux-fsdevel On Tue, 28 Feb 2012 09:52:30 +1030 Rusty Russell <rusty@rustcorp.com.au> wrote: > Use a cpumask_var_t instead of cpumask_t. We're doing plenty of > allocations here anyway, so it's not really an issue, and it sets a > good example. > > (cpumask_t is obsolescent, as are the cpus_* functions). Congratulations to yourself and Andi: patching file fs/file_table.c Hunk #1 FAILED at 526. 1 out of 1 hunk FAILED -- saving rejects to file fs/file_table.c.rej patching file fs/namespace.c Hunk #1 FAILED at 2576. 1 out of 1 hunk FAILED -- saving rejects to file fs/namespace.c.rej patching file include/linux/lglock.h Hunk #1 FAILED at 25. Hunk #2 FAILED at 35. Hunk #3 FAILED at 61. Hunk #4 FAILED at 74. Hunk #5 FAILED at 85. Hunk #6 FAILED at 98. Hunk #7 FAILED at 107. Hunk #8 FAILED at 154. Hunk #9 FAILED at 165. 9 out of 9 hunks FAILED -- saving rejects to file include/linux/lglock.h.rej Failed to apply cpumask-fix-lg_lock-br_lock a clean sweep! It's due to brlocks-lglocks-cleanups.patch, which I didn't think to cc you on. Thoughts? From: Andi Kleen <ak@linux.intel.com> Subject: brlocks/lglocks: cleanups lglocks and brlocks are currently generated with some complicated macros in lglock.h. But there's no reason to not just use common utility functions and put all the data into a common data structure. Since there are at least two users it makes sense to share this code in a library. This is also easier maintainable than a macro forest. This will also make it later possible to dynamically allocate lglocks and also use them in modules (this would both still need some additional, but now straightforward, code) In general the users now look more like normal function calls with pointers, not magic macros. The patch is rather large because I move over all users in one go to keep it bisectable. This impacts the VFS somewhat in terms of lines changed. But no actual behaviour change. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Andi Kleen <ak@linux.intel.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> --- fs/dcache.c | 4 fs/file_table.c | 17 +-- fs/internal.h | 3 fs/namei.c | 24 ++-- fs/namespace.c | 140 ++++++++++++++-------------- fs/pnode.c | 4 fs/proc_namespace.c | 4 include/linux/lglock.h | 189 +++++++-------------------------------- kernel/Makefile | 2 kernel/lglock.c | 136 ++++++++++++++++++++++++++++ 10 files changed, 269 insertions(+), 254 deletions(-) diff -puN fs/dcache.c~brlocks-lglocks-cleanups fs/dcache.c --- a/fs/dcache.c~brlocks-lglocks-cleanups +++ a/fs/dcache.c @@ -2442,7 +2442,7 @@ static int prepend_path(const struct pat bool slash = false; int error = 0; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); while (dentry != root->dentry || vfsmnt != root->mnt) { struct dentry * parent; @@ -2473,7 +2473,7 @@ static int prepend_path(const struct pat error = prepend(buffer, buflen, "/", 1); out: - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return error; global_root: diff -puN fs/file_table.c~brlocks-lglocks-cleanups fs/file_table.c --- a/fs/file_table.c~brlocks-lglocks-cleanups +++ a/fs/file_table.c @@ -34,7 +34,6 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -DECLARE_LGLOCK(files_lglock); DEFINE_LGLOCK(files_lglock); /* SLAB cache for file structures */ @@ -421,9 +420,9 @@ static inline void __file_sb_list_add(st */ void file_sb_list_add(struct file *file, struct super_block *sb) { - lg_local_lock(files_lglock); + lg_local_lock(&files_lglock); __file_sb_list_add(file, sb); - lg_local_unlock(files_lglock); + lg_local_unlock(&files_lglock); } /** @@ -436,9 +435,9 @@ void file_sb_list_add(struct file *file, void file_sb_list_del(struct file *file) { if (!list_empty(&file->f_u.fu_list)) { - lg_local_lock_cpu(files_lglock, file_list_cpu(file)); + lg_local_lock_cpu(&files_lglock, file_list_cpu(file)); list_del_init(&file->f_u.fu_list); - lg_local_unlock_cpu(files_lglock, file_list_cpu(file)); + lg_local_unlock_cpu(&files_lglock, file_list_cpu(file)); } } @@ -485,7 +484,7 @@ void mark_files_ro(struct super_block *s struct file *f; retry: - lg_global_lock(files_lglock); + lg_global_lock(&files_lglock); do_file_list_for_each_entry(sb, f) { struct vfsmount *mnt; if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) @@ -502,12 +501,12 @@ retry: file_release_write(f); mnt = mntget(f->f_path.mnt); /* This can sleep, so we can't hold the spinlock. */ - lg_global_unlock(files_lglock); + lg_global_unlock(&files_lglock); mnt_drop_write(mnt); mntput(mnt); goto retry; } while_file_list_for_each_entry; - lg_global_unlock(files_lglock); + lg_global_unlock(&files_lglock); } void __init files_init(unsigned long mempages) @@ -525,6 +524,6 @@ void __init files_init(unsigned long mem n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); files_defer_init(); - lg_lock_init(files_lglock); + lg_lock_init(&files_lglock, "files_lglock"); percpu_counter_init(&nr_files, 0); } diff -puN fs/internal.h~brlocks-lglocks-cleanups fs/internal.h --- a/fs/internal.h~brlocks-lglocks-cleanups +++ a/fs/internal.h @@ -56,8 +56,7 @@ extern int sb_prepare_remount_readonly(s extern void __init mnt_init(void); -DECLARE_BRLOCK(vfsmount_lock); - +extern struct lglock vfsmount_lock; /* * fs_struct.c diff -puN fs/namei.c~brlocks-lglocks-cleanups fs/namei.c --- a/fs/namei.c~brlocks-lglocks-cleanups +++ a/fs/namei.c @@ -462,7 +462,7 @@ static int unlazy_walk(struct nameidata mntget(nd->path.mnt); rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); nd->flags &= ~LOOKUP_RCU; return 0; @@ -520,14 +520,14 @@ static int complete_walk(struct nameidat if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { spin_unlock(&dentry->d_lock); rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return -ECHILD; } BUG_ON(nd->inode != dentry->d_inode); spin_unlock(&dentry->d_lock); mntget(nd->path.mnt); rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); } if (likely(!(nd->flags & LOOKUP_JUMPED))) @@ -694,15 +694,15 @@ int follow_up(struct path *path) struct mount *parent; struct dentry *mountpoint; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); parent = mnt->mnt_parent; if (&parent->mnt == path->mnt) { - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return 0; } mntget(&parent->mnt); mountpoint = dget(mnt->mnt_mountpoint); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); dput(path->dentry); path->dentry = mountpoint; mntput(path->mnt); @@ -960,7 +960,7 @@ failed: if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return -ECHILD; } @@ -1256,7 +1256,7 @@ static void terminate_walk(struct nameid if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); } } @@ -1490,7 +1490,7 @@ static int path_init(int dfd, const char nd->path = nd->root; nd->inode = inode; if (flags & LOOKUP_RCU) { - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); } else { @@ -1503,7 +1503,7 @@ static int path_init(int dfd, const char if (*name=='/') { if (flags & LOOKUP_RCU) { - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); set_root_rcu(nd); } else { @@ -1516,7 +1516,7 @@ static int path_init(int dfd, const char struct fs_struct *fs = current->fs; unsigned seq; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); do { @@ -1552,7 +1552,7 @@ static int path_init(int dfd, const char if (fput_needed) *fp = file; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); } else { path_get(&file->f_path); diff -puN fs/namespace.c~brlocks-lglocks-cleanups fs/namespace.c --- a/fs/namespace.c~brlocks-lglocks-cleanups +++ a/fs/namespace.c @@ -397,7 +397,7 @@ static int mnt_make_readonly(struct moun { int ret = 0; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; /* * After storing MNT_WRITE_HOLD, we'll read the counters. This store @@ -431,15 +431,15 @@ static int mnt_make_readonly(struct moun */ smp_wmb(); mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return ret; } static void __mnt_unmake_readonly(struct mount *mnt) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt->mnt.mnt_flags &= ~MNT_READONLY; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } int sb_prepare_remount_readonly(struct super_block *sb) @@ -451,7 +451,7 @@ int sb_prepare_remount_readonly(struct s if (atomic_long_read(&sb->s_remove_count)) return -EBUSY; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { if (!(mnt->mnt.mnt_flags & MNT_READONLY)) { mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; @@ -473,7 +473,7 @@ int sb_prepare_remount_readonly(struct s if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return err; } @@ -522,14 +522,14 @@ struct vfsmount *lookup_mnt(struct path { struct mount *child_mnt; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); child_mnt = __lookup_mnt(path->mnt, path->dentry, 1); if (child_mnt) { mnt_add_count(child_mnt, 1); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return &child_mnt->mnt; } else { - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return NULL; } } @@ -714,9 +714,9 @@ vfs_kern_mount(struct file_system_type * mnt->mnt.mnt_sb = root->d_sb; mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return &mnt->mnt; } EXPORT_SYMBOL_GPL(vfs_kern_mount); @@ -745,9 +745,9 @@ static struct mount *clone_mnt(struct mo mnt->mnt.mnt_root = dget(root); mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&mnt->mnt_instance, &sb->s_mounts); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); if (flag & CL_SLAVE) { list_add(&mnt->mnt_slave, &old->mnt_slave_list); @@ -803,35 +803,36 @@ static void mntput_no_expire(struct moun { put_again: #ifdef CONFIG_SMP - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); if (likely(atomic_read(&mnt->mnt_longterm))) { mnt_add_count(mnt, -1); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return; } - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt_add_count(mnt, -1); if (mnt_get_count(mnt)) { - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return; } #else mnt_add_count(mnt, -1); if (likely(mnt_get_count(mnt))) return; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); #endif if (unlikely(mnt->mnt_pinned)) { mnt_add_count(mnt, mnt->mnt_pinned + 1); mnt->mnt_pinned = 0; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); acct_auto_close_mnt(&mnt->mnt); goto put_again; } + list_del(&mnt->mnt_instance); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); mntfree(mnt); } @@ -857,21 +858,21 @@ EXPORT_SYMBOL(mntget); void mnt_pin(struct vfsmount *mnt) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); real_mount(mnt)->mnt_pinned++; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *m) { struct mount *mnt = real_mount(m); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (mnt->mnt_pinned) { mnt_add_count(mnt, 1); mnt->mnt_pinned--; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } EXPORT_SYMBOL(mnt_unpin); @@ -988,12 +989,12 @@ int may_umount_tree(struct vfsmount *m) BUG_ON(!m); /* write lock needed for mnt_get_count */ - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); for (p = mnt; p; p = next_mnt(p, mnt)) { actual_refs += mnt_get_count(p); minimum_refs += 2; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); if (actual_refs > minimum_refs) return 0; @@ -1020,10 +1021,10 @@ int may_umount(struct vfsmount *mnt) { int ret = 1; down_read(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (propagate_mount_busy(real_mount(mnt), 2)) ret = 0; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_read(&namespace_sem); return ret; } @@ -1040,13 +1041,13 @@ void release_mounts(struct list_head *he struct dentry *dentry; struct mount *m; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); dentry = mnt->mnt_mountpoint; m = mnt->mnt_parent; mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; m->mnt_ghosts--; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); dput(dentry); mntput(&m->mnt); } @@ -1112,12 +1113,12 @@ static int do_umount(struct mount *mnt, * probably don't strictly need the lock here if we examined * all race cases, but it's a slowpath. */ - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (mnt_get_count(mnt) != 2) { - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return -EBUSY; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); if (!xchg(&mnt->mnt_expiry_mark, 1)) return -EAGAIN; @@ -1159,7 +1160,7 @@ static int do_umount(struct mount *mnt, } down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); event++; if (!(flags & MNT_DETACH)) @@ -1171,7 +1172,7 @@ static int do_umount(struct mount *mnt, umount_tree(mnt, 1, &umount_list); retval = 0; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); return retval; @@ -1286,19 +1287,19 @@ struct mount *copy_tree(struct mount *mn q = clone_mnt(p, p->mnt.mnt_root, flag); if (!q) goto Enomem; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&q->mnt_list, &res->mnt_list); attach_mnt(q, &path); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } } return res; Enomem: if (res) { LIST_HEAD(umount_list); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(res, 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); release_mounts(&umount_list); } return NULL; @@ -1318,9 +1319,9 @@ void drop_collected_mounts(struct vfsmou { LIST_HEAD(umount_list); down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(real_mount(mnt), 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); } @@ -1448,7 +1449,7 @@ static int attach_recursive_mnt(struct m if (err) goto out_cleanup_ids; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (IS_MNT_SHARED(dest_mnt)) { for (p = source_mnt; p; p = next_mnt(p, source_mnt)) @@ -1467,7 +1468,7 @@ static int attach_recursive_mnt(struct m list_del_init(&child->mnt_hash); commit_tree(child); } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return 0; @@ -1565,10 +1566,10 @@ static int do_change_type(struct path *p goto out_unlock; } - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) change_mnt_propagation(m, type); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); out_unlock: up_write(&namespace_sem); @@ -1617,9 +1618,9 @@ static int do_loopback(struct path *path err = graft_tree(mnt, path); if (err) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(mnt, 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } out2: unlock_mount(path); @@ -1677,16 +1678,16 @@ static int do_remount(struct path *path, else err = do_remount_sb(sb, flags, data, 0); if (!err) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; mnt->mnt.mnt_flags = mnt_flags; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } up_write(&sb->s_umount); if (!err) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); touch_mnt_namespace(mnt->mnt_ns); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } return err; } @@ -1893,9 +1894,9 @@ fail: /* remove m from any expiration list it may be on */ if (!list_empty(&mnt->mnt_expire)) { down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_del_init(&mnt->mnt_expire); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); } mntput(m); @@ -1911,11 +1912,11 @@ fail: void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) { down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); } EXPORT_SYMBOL(mnt_set_expiry); @@ -1935,7 +1936,7 @@ void mark_mounts_for_expiry(struct list_ return; down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); /* extract from the expiration list every vfsmount that matches the * following criteria: @@ -1954,7 +1955,7 @@ void mark_mounts_for_expiry(struct list_ touch_mnt_namespace(mnt->mnt_ns); umount_tree(mnt, 1, &umounts); } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umounts); @@ -2218,9 +2219,9 @@ void mnt_make_shortterm(struct vfsmount struct mount *mnt = real_mount(m); if (atomic_add_unless(&mnt->mnt_longterm, -1, 1)) return; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); atomic_dec(&mnt->mnt_longterm); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); #endif } @@ -2249,10 +2250,9 @@ static struct mnt_namespace *dup_mnt_ns( kfree(new_ns); return ERR_PTR(-ENOMEM); } - new_ns->root = new; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&new_ns->list, &new->mnt_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); /* * Second pass: switch the tsk->fs->* elements and mark new vfsmounts @@ -2416,9 +2416,9 @@ bool is_path_reachable(struct mount *mnt int path_is_under(struct path *path1, struct path *path2) { int res; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return res; } EXPORT_SYMBOL(path_is_under); @@ -2505,7 +2505,7 @@ SYSCALL_DEFINE2(pivot_root, const char _ /* make sure we can reach put_old from new_root */ if (!is_path_reachable(real_mount(old.mnt), old.dentry, &new)) goto out4; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); detach_mnt(new_mnt, &parent_path); detach_mnt(root_mnt, &root_parent); /* mount old root on put_old */ @@ -2513,7 +2513,7 @@ SYSCALL_DEFINE2(pivot_root, const char _ /* mount new_root on / */ attach_mnt(new_mnt, &root_parent); touch_mnt_namespace(current->nsproxy->mnt_ns); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); chroot_fs_refs(&root, &new); error = 0; out4: @@ -2576,7 +2576,7 @@ void __init mnt_init(void) for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mount_hashtable[u]); - br_lock_init(vfsmount_lock); + br_lock_init(&vfsmount_lock); err = sysfs_init(); if (err) @@ -2596,9 +2596,9 @@ void put_mnt_ns(struct mnt_namespace *ns if (!atomic_dec_and_test(&ns->count)) return; down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(ns->root, 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); kfree(ns); diff -puN fs/pnode.c~brlocks-lglocks-cleanups fs/pnode.c --- a/fs/pnode.c~brlocks-lglocks-cleanups +++ a/fs/pnode.c @@ -257,12 +257,12 @@ int propagate_mnt(struct mount *dest_mnt prev_src_mnt = child; } out: - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); while (!list_empty(&tmp_list)) { child = list_first_entry(&tmp_list, struct mount, mnt_hash); umount_tree(child, 0, &umount_list); } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); release_mounts(&umount_list); return ret; } diff -puN fs/proc_namespace.c~brlocks-lglocks-cleanups fs/proc_namespace.c --- a/fs/proc_namespace.c~brlocks-lglocks-cleanups +++ a/fs/proc_namespace.c @@ -23,12 +23,12 @@ static unsigned mounts_poll(struct file poll_wait(file, &p->ns->poll, wait); - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); if (p->m.poll_event != ns->event) { p->m.poll_event = ns->event; res |= POLLERR | POLLPRI; } - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return res; } diff -puN include/linux/lglock.h~brlocks-lglocks-cleanups include/linux/lglock.h --- a/include/linux/lglock.h~brlocks-lglocks-cleanups +++ a/include/linux/lglock.h @@ -23,28 +23,17 @@ #include <linux/lockdep.h> #include <linux/percpu.h> #include <linux/cpu.h> +#include <linux/notifier.h> /* can make br locks by using local lock for read side, global lock for write */ -#define br_lock_init(name) name##_lock_init() -#define br_read_lock(name) name##_local_lock() -#define br_read_unlock(name) name##_local_unlock() -#define br_write_lock(name) name##_global_lock_online() -#define br_write_unlock(name) name##_global_unlock_online() +#define br_lock_init(name) lg_lock_init(name, #name) +#define br_read_lock(name) lg_local_lock(name) +#define br_read_unlock(name) lg_local_unlock(name) +#define br_write_lock(name) lg_global_lock_online(name) +#define br_write_unlock(name) lg_global_unlock_online(name) -#define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) #define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) - -#define lg_lock_init(name) name##_lock_init() -#define lg_local_lock(name) name##_local_lock() -#define lg_local_unlock(name) name##_local_unlock() -#define lg_local_lock_cpu(name, cpu) name##_local_lock_cpu(cpu) -#define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) -#define lg_global_lock(name) name##_global_lock() -#define lg_global_unlock(name) name##_global_unlock() -#define lg_global_lock_online(name) name##_global_lock_online() -#define lg_global_unlock_online(name) name##_global_unlock_online() - #ifdef CONFIG_DEBUG_LOCK_ALLOC #define LOCKDEP_INIT_MAP lockdep_init_map @@ -59,142 +48,34 @@ #define DEFINE_LGLOCK_LOCKDEP(name) #endif +struct lglock { + arch_spinlock_t __percpu *lock; + cpumask_t cpus; /* XXX need to put on separate cacheline? */ + spinlock_t cpu_lock; + struct notifier_block cpu_notifier; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lock_class_key lock_key; + struct lockdep_map lock_dep_map; +#endif +}; + +#define DEFINE_LGLOCK(name) \ + DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) = __ARCH_SPIN_LOCK_UNLOCKED; \ + struct lglock name = { .lock = &name ## _lock, \ + .cpu_lock = __SPIN_LOCK_UNLOCKED(cpu_lock), \ + .cpu_notifier.notifier_call = lg_cpu_callback } + +/* Only valid for statics */ +void lg_lock_init(struct lglock *lg, char *name); +void lg_local_lock(struct lglock *lg); +void lg_local_unlock(struct lglock *lg); +void lg_local_lock_cpu(struct lglock *lg, int cpu); +void lg_local_unlock_cpu(struct lglock *lg, int cpu); +void lg_global_lock_online(struct lglock *lg); +void lg_global_unlock_online(struct lglock *lg); +void lg_global_lock(struct lglock *lg); +void lg_global_unlock(struct lglock *lg); + +int lg_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu); -#define DECLARE_LGLOCK(name) \ - extern void name##_lock_init(void); \ - extern void name##_local_lock(void); \ - extern void name##_local_unlock(void); \ - extern void name##_local_lock_cpu(int cpu); \ - extern void name##_local_unlock_cpu(int cpu); \ - extern void name##_global_lock(void); \ - extern void name##_global_unlock(void); \ - extern void name##_global_lock_online(void); \ - extern void name##_global_unlock_online(void); \ - -#define DEFINE_LGLOCK(name) \ - \ - DEFINE_SPINLOCK(name##_cpu_lock); \ - cpumask_t name##_cpus __read_mostly; \ - DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ - DEFINE_LGLOCK_LOCKDEP(name); \ - \ - static int \ - name##_lg_cpu_callback(struct notifier_block *nb, \ - unsigned long action, void *hcpu) \ - { \ - switch (action & ~CPU_TASKS_FROZEN) { \ - case CPU_UP_PREPARE: \ - spin_lock(&name##_cpu_lock); \ - cpu_set((unsigned long)hcpu, name##_cpus); \ - spin_unlock(&name##_cpu_lock); \ - break; \ - case CPU_UP_CANCELED: case CPU_DEAD: \ - spin_lock(&name##_cpu_lock); \ - cpu_clear((unsigned long)hcpu, name##_cpus); \ - spin_unlock(&name##_cpu_lock); \ - } \ - return NOTIFY_OK; \ - } \ - static struct notifier_block name##_lg_cpu_notifier = { \ - .notifier_call = name##_lg_cpu_callback, \ - }; \ - void name##_lock_init(void) { \ - int i; \ - LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ - } \ - register_hotcpu_notifier(&name##_lg_cpu_notifier); \ - get_online_cpus(); \ - for_each_online_cpu(i) \ - cpu_set(i, name##_cpus); \ - put_online_cpus(); \ - } \ - EXPORT_SYMBOL(name##_lock_init); \ - \ - void name##_local_lock(void) { \ - arch_spinlock_t *lock; \ - preempt_disable(); \ - rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ - lock = &__get_cpu_var(name##_lock); \ - arch_spin_lock(lock); \ - } \ - EXPORT_SYMBOL(name##_local_lock); \ - \ - void name##_local_unlock(void) { \ - arch_spinlock_t *lock; \ - rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ - lock = &__get_cpu_var(name##_lock); \ - arch_spin_unlock(lock); \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_local_unlock); \ - \ - void name##_local_lock_cpu(int cpu) { \ - arch_spinlock_t *lock; \ - preempt_disable(); \ - rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ - lock = &per_cpu(name##_lock, cpu); \ - arch_spin_lock(lock); \ - } \ - EXPORT_SYMBOL(name##_local_lock_cpu); \ - \ - void name##_local_unlock_cpu(int cpu) { \ - arch_spinlock_t *lock; \ - rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ - lock = &per_cpu(name##_lock, cpu); \ - arch_spin_unlock(lock); \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_local_unlock_cpu); \ - \ - void name##_global_lock_online(void) { \ - int i; \ - spin_lock(&name##_cpu_lock); \ - rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_cpu(i, &name##_cpus) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_lock(lock); \ - } \ - } \ - EXPORT_SYMBOL(name##_global_lock_online); \ - \ - void name##_global_unlock_online(void) { \ - int i; \ - rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_cpu(i, &name##_cpus) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_unlock(lock); \ - } \ - spin_unlock(&name##_cpu_lock); \ - } \ - EXPORT_SYMBOL(name##_global_unlock_online); \ - \ - void name##_global_lock(void) { \ - int i; \ - preempt_disable(); \ - rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_lock(lock); \ - } \ - } \ - EXPORT_SYMBOL(name##_global_lock); \ - \ - void name##_global_unlock(void) { \ - int i; \ - rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_unlock(lock); \ - } \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_global_unlock); #endif diff -puN kernel/Makefile~brlocks-lglocks-cleanups kernel/Makefile --- a/kernel/Makefile~brlocks-lglocks-cleanups +++ a/kernel/Makefile @@ -10,7 +10,7 @@ obj-y = fork.o exec_domain.o panic.o kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o cred.o \ - async.o range.o groups.o + async.o range.o groups.o lglock.o ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files diff -puN /dev/null kernel/lglock.c --- /dev/null +++ a/kernel/lglock.c @@ -0,0 +1,136 @@ +/* See include/linux/lglock.h for description */ +#include <linux/module.h> +#include <linux/lglock.h> +#include <linux/cpu.h> +#include <linux/string.h> + +/* Note there is no uninit, so lglocks cannot be defined in + * modules (but it's fine to use them from there) + * Could be added though, just undo lg_lock_init + */ + +void lg_lock_init(struct lglock *lg, char *name) +{ + int i; + + LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0); + + register_hotcpu_notifier(&lg->cpu_notifier); + get_online_cpus(); + for_each_online_cpu (i) + cpu_set(i, lg->cpus); + put_online_cpus(); +} +EXPORT_SYMBOL(lg_lock_init); + +void lg_local_lock(struct lglock *lg) +{ + arch_spinlock_t *lock; + preempt_disable(); + rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); + lock = this_cpu_ptr(lg->lock); + arch_spin_lock(lock); +} +EXPORT_SYMBOL(lg_local_lock); + +void lg_local_unlock(struct lglock *lg) +{ + arch_spinlock_t *lock; + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock = this_cpu_ptr(lg->lock); + arch_spin_unlock(lock); + preempt_enable(); +} +EXPORT_SYMBOL(lg_local_unlock); + +void lg_local_lock_cpu(struct lglock *lg, int cpu) +{ + arch_spinlock_t *lock; + preempt_disable(); + rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); + lock = per_cpu_ptr(lg->lock, cpu); + arch_spin_lock(lock); +} +EXPORT_SYMBOL(lg_local_lock_cpu); + +void lg_local_unlock_cpu(struct lglock *lg, int cpu) +{ + arch_spinlock_t *lock; + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock = per_cpu_ptr(lg->lock, cpu); + arch_spin_unlock(lock); + preempt_enable(); +} +EXPORT_SYMBOL(lg_local_unlock_cpu); + +void lg_global_lock_online(struct lglock *lg) +{ + int i; + spin_lock(&lg->cpu_lock); + rwlock_acquire(&lg->lock_dep_map, 0, 0, _RET_IP_); + for_each_cpu(i, &lg->cpus) { + arch_spinlock_t *lock; + lock = per_cpu_ptr(lg->lock, i); + arch_spin_lock(lock); + } +} +EXPORT_SYMBOL(lg_global_lock_online); + +void lg_global_unlock_online(struct lglock *lg) +{ + int i; + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + for_each_cpu(i, &lg->cpus) { + arch_spinlock_t *lock; + lock = per_cpu_ptr(lg->lock, i); + arch_spin_unlock(lock); + } + spin_unlock(&lg->cpu_lock); +} +EXPORT_SYMBOL(lg_global_unlock_online); + +void lg_global_lock(struct lglock *lg) +{ + int i; + preempt_disable(); + rwlock_acquire(&lg->lock_dep_map, 0, 0, _RET_IP_); + for_each_possible_cpu(i) { + arch_spinlock_t *lock; + lock = per_cpu_ptr(lg->lock, i); + arch_spin_lock(lock); + } +} +EXPORT_SYMBOL(lg_global_lock); + +void lg_global_unlock(struct lglock *lg) +{ + int i; + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + for_each_possible_cpu(i) { + arch_spinlock_t *lock; + lock = per_cpu_ptr(lg->lock, i); + arch_spin_unlock(lock); + } + preempt_enable(); +} +EXPORT_SYMBOL(lg_global_unlock); + +int lg_cpu_callback(struct notifier_block *nb, + unsigned long action, void *hcpu) +{ + struct lglock *lglock = container_of(nb, struct lglock, cpu_notifier); + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_UP_PREPARE: + spin_lock(&lglock->cpu_lock); + cpu_set((unsigned long)hcpu, lglock->cpus); + spin_unlock(&lglock->cpu_lock); + break; + case CPU_UP_CANCELED: case CPU_DEAD: + spin_lock(&lglock->cpu_lock); + cpu_clear((unsigned long)hcpu, lglock->cpus); + spin_unlock(&lglock->cpu_lock); + } + return NOTIFY_OK; +} +EXPORT_SYMBOL(lg_cpu_callback); + _ ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH] cpumask: fix lg_lock/br_lock. @ 2012-02-27 23:53 ` Andrew Morton 0 siblings, 0 replies; 45+ messages in thread From: Andrew Morton @ 2012-02-27 23:53 UTC (permalink / raw) To: Rusty Russell Cc: Nick Piggin, linux-kernel, Alexander Viro, Andi Kleen, Srivatsa S. Bhat, linux-fsdevel On Tue, 28 Feb 2012 09:52:30 +1030 Rusty Russell <rusty@rustcorp.com.au> wrote: > Use a cpumask_var_t instead of cpumask_t. We're doing plenty of > allocations here anyway, so it's not really an issue, and it sets a > good example. > > (cpumask_t is obsolescent, as are the cpus_* functions). Congratulations to yourself and Andi: patching file fs/file_table.c Hunk #1 FAILED at 526. 1 out of 1 hunk FAILED -- saving rejects to file fs/file_table.c.rej patching file fs/namespace.c Hunk #1 FAILED at 2576. 1 out of 1 hunk FAILED -- saving rejects to file fs/namespace.c.rej patching file include/linux/lglock.h Hunk #1 FAILED at 25. Hunk #2 FAILED at 35. Hunk #3 FAILED at 61. Hunk #4 FAILED at 74. Hunk #5 FAILED at 85. Hunk #6 FAILED at 98. Hunk #7 FAILED at 107. Hunk #8 FAILED at 154. Hunk #9 FAILED at 165. 9 out of 9 hunks FAILED -- saving rejects to file include/linux/lglock.h.rej Failed to apply cpumask-fix-lg_lock-br_lock a clean sweep! It's due to brlocks-lglocks-cleanups.patch, which I didn't think to cc you on. Thoughts? From: Andi Kleen <ak@linux.intel.com> Subject: brlocks/lglocks: cleanups lglocks and brlocks are currently generated with some complicated macros in lglock.h. But there's no reason to not just use common utility functions and put all the data into a common data structure. Since there are at least two users it makes sense to share this code in a library. This is also easier maintainable than a macro forest. This will also make it later possible to dynamically allocate lglocks and also use them in modules (this would both still need some additional, but now straightforward, code) In general the users now look more like normal function calls with pointers, not magic macros. The patch is rather large because I move over all users in one go to keep it bisectable. This impacts the VFS somewhat in terms of lines changed. But no actual behaviour change. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Andi Kleen <ak@linux.intel.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> --- fs/dcache.c | 4 fs/file_table.c | 17 +-- fs/internal.h | 3 fs/namei.c | 24 ++-- fs/namespace.c | 140 ++++++++++++++-------------- fs/pnode.c | 4 fs/proc_namespace.c | 4 include/linux/lglock.h | 189 +++++++-------------------------------- kernel/Makefile | 2 kernel/lglock.c | 136 ++++++++++++++++++++++++++++ 10 files changed, 269 insertions(+), 254 deletions(-) diff -puN fs/dcache.c~brlocks-lglocks-cleanups fs/dcache.c --- a/fs/dcache.c~brlocks-lglocks-cleanups +++ a/fs/dcache.c @@ -2442,7 +2442,7 @@ static int prepend_path(const struct pat bool slash = false; int error = 0; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); while (dentry != root->dentry || vfsmnt != root->mnt) { struct dentry * parent; @@ -2473,7 +2473,7 @@ static int prepend_path(const struct pat error = prepend(buffer, buflen, "/", 1); out: - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return error; global_root: diff -puN fs/file_table.c~brlocks-lglocks-cleanups fs/file_table.c --- a/fs/file_table.c~brlocks-lglocks-cleanups +++ a/fs/file_table.c @@ -34,7 +34,6 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -DECLARE_LGLOCK(files_lglock); DEFINE_LGLOCK(files_lglock); /* SLAB cache for file structures */ @@ -421,9 +420,9 @@ static inline void __file_sb_list_add(st */ void file_sb_list_add(struct file *file, struct super_block *sb) { - lg_local_lock(files_lglock); + lg_local_lock(&files_lglock); __file_sb_list_add(file, sb); - lg_local_unlock(files_lglock); + lg_local_unlock(&files_lglock); } /** @@ -436,9 +435,9 @@ void file_sb_list_add(struct file *file, void file_sb_list_del(struct file *file) { if (!list_empty(&file->f_u.fu_list)) { - lg_local_lock_cpu(files_lglock, file_list_cpu(file)); + lg_local_lock_cpu(&files_lglock, file_list_cpu(file)); list_del_init(&file->f_u.fu_list); - lg_local_unlock_cpu(files_lglock, file_list_cpu(file)); + lg_local_unlock_cpu(&files_lglock, file_list_cpu(file)); } } @@ -485,7 +484,7 @@ void mark_files_ro(struct super_block *s struct file *f; retry: - lg_global_lock(files_lglock); + lg_global_lock(&files_lglock); do_file_list_for_each_entry(sb, f) { struct vfsmount *mnt; if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) @@ -502,12 +501,12 @@ retry: file_release_write(f); mnt = mntget(f->f_path.mnt); /* This can sleep, so we can't hold the spinlock. */ - lg_global_unlock(files_lglock); + lg_global_unlock(&files_lglock); mnt_drop_write(mnt); mntput(mnt); goto retry; } while_file_list_for_each_entry; - lg_global_unlock(files_lglock); + lg_global_unlock(&files_lglock); } void __init files_init(unsigned long mempages) @@ -525,6 +524,6 @@ void __init files_init(unsigned long mem n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); files_defer_init(); - lg_lock_init(files_lglock); + lg_lock_init(&files_lglock, "files_lglock"); percpu_counter_init(&nr_files, 0); } diff -puN fs/internal.h~brlocks-lglocks-cleanups fs/internal.h --- a/fs/internal.h~brlocks-lglocks-cleanups +++ a/fs/internal.h @@ -56,8 +56,7 @@ extern int sb_prepare_remount_readonly(s extern void __init mnt_init(void); -DECLARE_BRLOCK(vfsmount_lock); - +extern struct lglock vfsmount_lock; /* * fs_struct.c diff -puN fs/namei.c~brlocks-lglocks-cleanups fs/namei.c --- a/fs/namei.c~brlocks-lglocks-cleanups +++ a/fs/namei.c @@ -462,7 +462,7 @@ static int unlazy_walk(struct nameidata mntget(nd->path.mnt); rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); nd->flags &= ~LOOKUP_RCU; return 0; @@ -520,14 +520,14 @@ static int complete_walk(struct nameidat if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { spin_unlock(&dentry->d_lock); rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return -ECHILD; } BUG_ON(nd->inode != dentry->d_inode); spin_unlock(&dentry->d_lock); mntget(nd->path.mnt); rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); } if (likely(!(nd->flags & LOOKUP_JUMPED))) @@ -694,15 +694,15 @@ int follow_up(struct path *path) struct mount *parent; struct dentry *mountpoint; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); parent = mnt->mnt_parent; if (&parent->mnt == path->mnt) { - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return 0; } mntget(&parent->mnt); mountpoint = dget(mnt->mnt_mountpoint); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); dput(path->dentry); path->dentry = mountpoint; mntput(path->mnt); @@ -960,7 +960,7 @@ failed: if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return -ECHILD; } @@ -1256,7 +1256,7 @@ static void terminate_walk(struct nameid if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); } } @@ -1490,7 +1490,7 @@ static int path_init(int dfd, const char nd->path = nd->root; nd->inode = inode; if (flags & LOOKUP_RCU) { - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); } else { @@ -1503,7 +1503,7 @@ static int path_init(int dfd, const char if (*name=='/') { if (flags & LOOKUP_RCU) { - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); set_root_rcu(nd); } else { @@ -1516,7 +1516,7 @@ static int path_init(int dfd, const char struct fs_struct *fs = current->fs; unsigned seq; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); do { @@ -1552,7 +1552,7 @@ static int path_init(int dfd, const char if (fput_needed) *fp = file; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); } else { path_get(&file->f_path); diff -puN fs/namespace.c~brlocks-lglocks-cleanups fs/namespace.c --- a/fs/namespace.c~brlocks-lglocks-cleanups +++ a/fs/namespace.c @@ -397,7 +397,7 @@ static int mnt_make_readonly(struct moun { int ret = 0; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; /* * After storing MNT_WRITE_HOLD, we'll read the counters. This store @@ -431,15 +431,15 @@ static int mnt_make_readonly(struct moun */ smp_wmb(); mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return ret; } static void __mnt_unmake_readonly(struct mount *mnt) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt->mnt.mnt_flags &= ~MNT_READONLY; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } int sb_prepare_remount_readonly(struct super_block *sb) @@ -451,7 +451,7 @@ int sb_prepare_remount_readonly(struct s if (atomic_long_read(&sb->s_remove_count)) return -EBUSY; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { if (!(mnt->mnt.mnt_flags & MNT_READONLY)) { mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; @@ -473,7 +473,7 @@ int sb_prepare_remount_readonly(struct s if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return err; } @@ -522,14 +522,14 @@ struct vfsmount *lookup_mnt(struct path { struct mount *child_mnt; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); child_mnt = __lookup_mnt(path->mnt, path->dentry, 1); if (child_mnt) { mnt_add_count(child_mnt, 1); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return &child_mnt->mnt; } else { - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return NULL; } } @@ -714,9 +714,9 @@ vfs_kern_mount(struct file_system_type * mnt->mnt.mnt_sb = root->d_sb; mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return &mnt->mnt; } EXPORT_SYMBOL_GPL(vfs_kern_mount); @@ -745,9 +745,9 @@ static struct mount *clone_mnt(struct mo mnt->mnt.mnt_root = dget(root); mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&mnt->mnt_instance, &sb->s_mounts); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); if (flag & CL_SLAVE) { list_add(&mnt->mnt_slave, &old->mnt_slave_list); @@ -803,35 +803,36 @@ static void mntput_no_expire(struct moun { put_again: #ifdef CONFIG_SMP - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); if (likely(atomic_read(&mnt->mnt_longterm))) { mnt_add_count(mnt, -1); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return; } - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt_add_count(mnt, -1); if (mnt_get_count(mnt)) { - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return; } #else mnt_add_count(mnt, -1); if (likely(mnt_get_count(mnt))) return; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); #endif if (unlikely(mnt->mnt_pinned)) { mnt_add_count(mnt, mnt->mnt_pinned + 1); mnt->mnt_pinned = 0; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); acct_auto_close_mnt(&mnt->mnt); goto put_again; } + list_del(&mnt->mnt_instance); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); mntfree(mnt); } @@ -857,21 +858,21 @@ EXPORT_SYMBOL(mntget); void mnt_pin(struct vfsmount *mnt) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); real_mount(mnt)->mnt_pinned++; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *m) { struct mount *mnt = real_mount(m); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (mnt->mnt_pinned) { mnt_add_count(mnt, 1); mnt->mnt_pinned--; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } EXPORT_SYMBOL(mnt_unpin); @@ -988,12 +989,12 @@ int may_umount_tree(struct vfsmount *m) BUG_ON(!m); /* write lock needed for mnt_get_count */ - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); for (p = mnt; p; p = next_mnt(p, mnt)) { actual_refs += mnt_get_count(p); minimum_refs += 2; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); if (actual_refs > minimum_refs) return 0; @@ -1020,10 +1021,10 @@ int may_umount(struct vfsmount *mnt) { int ret = 1; down_read(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (propagate_mount_busy(real_mount(mnt), 2)) ret = 0; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_read(&namespace_sem); return ret; } @@ -1040,13 +1041,13 @@ void release_mounts(struct list_head *he struct dentry *dentry; struct mount *m; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); dentry = mnt->mnt_mountpoint; m = mnt->mnt_parent; mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; m->mnt_ghosts--; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); dput(dentry); mntput(&m->mnt); } @@ -1112,12 +1113,12 @@ static int do_umount(struct mount *mnt, * probably don't strictly need the lock here if we examined * all race cases, but it's a slowpath. */ - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (mnt_get_count(mnt) != 2) { - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return -EBUSY; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); if (!xchg(&mnt->mnt_expiry_mark, 1)) return -EAGAIN; @@ -1159,7 +1160,7 @@ static int do_umount(struct mount *mnt, } down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); event++; if (!(flags & MNT_DETACH)) @@ -1171,7 +1172,7 @@ static int do_umount(struct mount *mnt, umount_tree(mnt, 1, &umount_list); retval = 0; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); return retval; @@ -1286,19 +1287,19 @@ struct mount *copy_tree(struct mount *mn q = clone_mnt(p, p->mnt.mnt_root, flag); if (!q) goto Enomem; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&q->mnt_list, &res->mnt_list); attach_mnt(q, &path); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } } return res; Enomem: if (res) { LIST_HEAD(umount_list); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(res, 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); release_mounts(&umount_list); } return NULL; @@ -1318,9 +1319,9 @@ void drop_collected_mounts(struct vfsmou { LIST_HEAD(umount_list); down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(real_mount(mnt), 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); } @@ -1448,7 +1449,7 @@ static int attach_recursive_mnt(struct m if (err) goto out_cleanup_ids; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (IS_MNT_SHARED(dest_mnt)) { for (p = source_mnt; p; p = next_mnt(p, source_mnt)) @@ -1467,7 +1468,7 @@ static int attach_recursive_mnt(struct m list_del_init(&child->mnt_hash); commit_tree(child); } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return 0; @@ -1565,10 +1566,10 @@ static int do_change_type(struct path *p goto out_unlock; } - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) change_mnt_propagation(m, type); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); out_unlock: up_write(&namespace_sem); @@ -1617,9 +1618,9 @@ static int do_loopback(struct path *path err = graft_tree(mnt, path); if (err) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(mnt, 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } out2: unlock_mount(path); @@ -1677,16 +1678,16 @@ static int do_remount(struct path *path, else err = do_remount_sb(sb, flags, data, 0); if (!err) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; mnt->mnt.mnt_flags = mnt_flags; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } up_write(&sb->s_umount); if (!err) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); touch_mnt_namespace(mnt->mnt_ns); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } return err; } @@ -1893,9 +1894,9 @@ fail: /* remove m from any expiration list it may be on */ if (!list_empty(&mnt->mnt_expire)) { down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_del_init(&mnt->mnt_expire); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); } mntput(m); @@ -1911,11 +1912,11 @@ fail: void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) { down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); } EXPORT_SYMBOL(mnt_set_expiry); @@ -1935,7 +1936,7 @@ void mark_mounts_for_expiry(struct list_ return; down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); /* extract from the expiration list every vfsmount that matches the * following criteria: @@ -1954,7 +1955,7 @@ void mark_mounts_for_expiry(struct list_ touch_mnt_namespace(mnt->mnt_ns); umount_tree(mnt, 1, &umounts); } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umounts); @@ -2218,9 +2219,9 @@ void mnt_make_shortterm(struct vfsmount struct mount *mnt = real_mount(m); if (atomic_add_unless(&mnt->mnt_longterm, -1, 1)) return; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); atomic_dec(&mnt->mnt_longterm); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); #endif } @@ -2249,10 +2250,9 @@ static struct mnt_namespace *dup_mnt_ns( kfree(new_ns); return ERR_PTR(-ENOMEM); } - new_ns->root = new; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&new_ns->list, &new->mnt_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); /* * Second pass: switch the tsk->fs->* elements and mark new vfsmounts @@ -2416,9 +2416,9 @@ bool is_path_reachable(struct mount *mnt int path_is_under(struct path *path1, struct path *path2) { int res; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return res; } EXPORT_SYMBOL(path_is_under); @@ -2505,7 +2505,7 @@ SYSCALL_DEFINE2(pivot_root, const char _ /* make sure we can reach put_old from new_root */ if (!is_path_reachable(real_mount(old.mnt), old.dentry, &new)) goto out4; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); detach_mnt(new_mnt, &parent_path); detach_mnt(root_mnt, &root_parent); /* mount old root on put_old */ @@ -2513,7 +2513,7 @@ SYSCALL_DEFINE2(pivot_root, const char _ /* mount new_root on / */ attach_mnt(new_mnt, &root_parent); touch_mnt_namespace(current->nsproxy->mnt_ns); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); chroot_fs_refs(&root, &new); error = 0; out4: @@ -2576,7 +2576,7 @@ void __init mnt_init(void) for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mount_hashtable[u]); - br_lock_init(vfsmount_lock); + br_lock_init(&vfsmount_lock); err = sysfs_init(); if (err) @@ -2596,9 +2596,9 @@ void put_mnt_ns(struct mnt_namespace *ns if (!atomic_dec_and_test(&ns->count)) return; down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(ns->root, 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); kfree(ns); diff -puN fs/pnode.c~brlocks-lglocks-cleanups fs/pnode.c --- a/fs/pnode.c~brlocks-lglocks-cleanups +++ a/fs/pnode.c @@ -257,12 +257,12 @@ int propagate_mnt(struct mount *dest_mnt prev_src_mnt = child; } out: - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); while (!list_empty(&tmp_list)) { child = list_first_entry(&tmp_list, struct mount, mnt_hash); umount_tree(child, 0, &umount_list); } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); release_mounts(&umount_list); return ret; } diff -puN fs/proc_namespace.c~brlocks-lglocks-cleanups fs/proc_namespace.c --- a/fs/proc_namespace.c~brlocks-lglocks-cleanups +++ a/fs/proc_namespace.c @@ -23,12 +23,12 @@ static unsigned mounts_poll(struct file poll_wait(file, &p->ns->poll, wait); - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); if (p->m.poll_event != ns->event) { p->m.poll_event = ns->event; res |= POLLERR | POLLPRI; } - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return res; } diff -puN include/linux/lglock.h~brlocks-lglocks-cleanups include/linux/lglock.h --- a/include/linux/lglock.h~brlocks-lglocks-cleanups +++ a/include/linux/lglock.h @@ -23,28 +23,17 @@ #include <linux/lockdep.h> #include <linux/percpu.h> #include <linux/cpu.h> +#include <linux/notifier.h> /* can make br locks by using local lock for read side, global lock for write */ -#define br_lock_init(name) name##_lock_init() -#define br_read_lock(name) name##_local_lock() -#define br_read_unlock(name) name##_local_unlock() -#define br_write_lock(name) name##_global_lock_online() -#define br_write_unlock(name) name##_global_unlock_online() +#define br_lock_init(name) lg_lock_init(name, #name) +#define br_read_lock(name) lg_local_lock(name) +#define br_read_unlock(name) lg_local_unlock(name) +#define br_write_lock(name) lg_global_lock_online(name) +#define br_write_unlock(name) lg_global_unlock_online(name) -#define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) #define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) - -#define lg_lock_init(name) name##_lock_init() -#define lg_local_lock(name) name##_local_lock() -#define lg_local_unlock(name) name##_local_unlock() -#define lg_local_lock_cpu(name, cpu) name##_local_lock_cpu(cpu) -#define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) -#define lg_global_lock(name) name##_global_lock() -#define lg_global_unlock(name) name##_global_unlock() -#define lg_global_lock_online(name) name##_global_lock_online() -#define lg_global_unlock_online(name) name##_global_unlock_online() - #ifdef CONFIG_DEBUG_LOCK_ALLOC #define LOCKDEP_INIT_MAP lockdep_init_map @@ -59,142 +48,34 @@ #define DEFINE_LGLOCK_LOCKDEP(name) #endif +struct lglock { + arch_spinlock_t __percpu *lock; + cpumask_t cpus; /* XXX need to put on separate cacheline? */ + spinlock_t cpu_lock; + struct notifier_block cpu_notifier; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lock_class_key lock_key; + struct lockdep_map lock_dep_map; +#endif +}; + +#define DEFINE_LGLOCK(name) \ + DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) = __ARCH_SPIN_LOCK_UNLOCKED; \ + struct lglock name = { .lock = &name ## _lock, \ + .cpu_lock = __SPIN_LOCK_UNLOCKED(cpu_lock), \ + .cpu_notifier.notifier_call = lg_cpu_callback } + +/* Only valid for statics */ +void lg_lock_init(struct lglock *lg, char *name); +void lg_local_lock(struct lglock *lg); +void lg_local_unlock(struct lglock *lg); +void lg_local_lock_cpu(struct lglock *lg, int cpu); +void lg_local_unlock_cpu(struct lglock *lg, int cpu); +void lg_global_lock_online(struct lglock *lg); +void lg_global_unlock_online(struct lglock *lg); +void lg_global_lock(struct lglock *lg); +void lg_global_unlock(struct lglock *lg); + +int lg_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu); -#define DECLARE_LGLOCK(name) \ - extern void name##_lock_init(void); \ - extern void name##_local_lock(void); \ - extern void name##_local_unlock(void); \ - extern void name##_local_lock_cpu(int cpu); \ - extern void name##_local_unlock_cpu(int cpu); \ - extern void name##_global_lock(void); \ - extern void name##_global_unlock(void); \ - extern void name##_global_lock_online(void); \ - extern void name##_global_unlock_online(void); \ - -#define DEFINE_LGLOCK(name) \ - \ - DEFINE_SPINLOCK(name##_cpu_lock); \ - cpumask_t name##_cpus __read_mostly; \ - DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ - DEFINE_LGLOCK_LOCKDEP(name); \ - \ - static int \ - name##_lg_cpu_callback(struct notifier_block *nb, \ - unsigned long action, void *hcpu) \ - { \ - switch (action & ~CPU_TASKS_FROZEN) { \ - case CPU_UP_PREPARE: \ - spin_lock(&name##_cpu_lock); \ - cpu_set((unsigned long)hcpu, name##_cpus); \ - spin_unlock(&name##_cpu_lock); \ - break; \ - case CPU_UP_CANCELED: case CPU_DEAD: \ - spin_lock(&name##_cpu_lock); \ - cpu_clear((unsigned long)hcpu, name##_cpus); \ - spin_unlock(&name##_cpu_lock); \ - } \ - return NOTIFY_OK; \ - } \ - static struct notifier_block name##_lg_cpu_notifier = { \ - .notifier_call = name##_lg_cpu_callback, \ - }; \ - void name##_lock_init(void) { \ - int i; \ - LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ - } \ - register_hotcpu_notifier(&name##_lg_cpu_notifier); \ - get_online_cpus(); \ - for_each_online_cpu(i) \ - cpu_set(i, name##_cpus); \ - put_online_cpus(); \ - } \ - EXPORT_SYMBOL(name##_lock_init); \ - \ - void name##_local_lock(void) { \ - arch_spinlock_t *lock; \ - preempt_disable(); \ - rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ - lock = &__get_cpu_var(name##_lock); \ - arch_spin_lock(lock); \ - } \ - EXPORT_SYMBOL(name##_local_lock); \ - \ - void name##_local_unlock(void) { \ - arch_spinlock_t *lock; \ - rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ - lock = &__get_cpu_var(name##_lock); \ - arch_spin_unlock(lock); \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_local_unlock); \ - \ - void name##_local_lock_cpu(int cpu) { \ - arch_spinlock_t *lock; \ - preempt_disable(); \ - rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ - lock = &per_cpu(name##_lock, cpu); \ - arch_spin_lock(lock); \ - } \ - EXPORT_SYMBOL(name##_local_lock_cpu); \ - \ - void name##_local_unlock_cpu(int cpu) { \ - arch_spinlock_t *lock; \ - rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ - lock = &per_cpu(name##_lock, cpu); \ - arch_spin_unlock(lock); \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_local_unlock_cpu); \ - \ - void name##_global_lock_online(void) { \ - int i; \ - spin_lock(&name##_cpu_lock); \ - rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_cpu(i, &name##_cpus) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_lock(lock); \ - } \ - } \ - EXPORT_SYMBOL(name##_global_lock_online); \ - \ - void name##_global_unlock_online(void) { \ - int i; \ - rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_cpu(i, &name##_cpus) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_unlock(lock); \ - } \ - spin_unlock(&name##_cpu_lock); \ - } \ - EXPORT_SYMBOL(name##_global_unlock_online); \ - \ - void name##_global_lock(void) { \ - int i; \ - preempt_disable(); \ - rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_lock(lock); \ - } \ - } \ - EXPORT_SYMBOL(name##_global_lock); \ - \ - void name##_global_unlock(void) { \ - int i; \ - rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_unlock(lock); \ - } \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_global_unlock); #endif diff -puN kernel/Makefile~brlocks-lglocks-cleanups kernel/Makefile --- a/kernel/Makefile~brlocks-lglocks-cleanups +++ a/kernel/Makefile @@ -10,7 +10,7 @@ obj-y = fork.o exec_domain.o panic.o kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o cred.o \ - async.o range.o groups.o + async.o range.o groups.o lglock.o ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files diff -puN /dev/null kernel/lglock.c --- /dev/null +++ a/kernel/lglock.c @@ -0,0 +1,136 @@ +/* See include/linux/lglock.h for description */ +#include <linux/module.h> +#include <linux/lglock.h> +#include <linux/cpu.h> +#include <linux/string.h> + +/* Note there is no uninit, so lglocks cannot be defined in + * modules (but it's fine to use them from there) + * Could be added though, just undo lg_lock_init + */ + +void lg_lock_init(struct lglock *lg, char *name) +{ + int i; + + LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0); + + register_hotcpu_notifier(&lg->cpu_notifier); + get_online_cpus(); + for_each_online_cpu (i) + cpu_set(i, lg->cpus); + put_online_cpus(); +} +EXPORT_SYMBOL(lg_lock_init); + +void lg_local_lock(struct lglock *lg) +{ + arch_spinlock_t *lock; + preempt_disable(); + rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); + lock = this_cpu_ptr(lg->lock); + arch_spin_lock(lock); +} +EXPORT_SYMBOL(lg_local_lock); + +void lg_local_unlock(struct lglock *lg) +{ + arch_spinlock_t *lock; + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock = this_cpu_ptr(lg->lock); + arch_spin_unlock(lock); + preempt_enable(); +} +EXPORT_SYMBOL(lg_local_unlock); + +void lg_local_lock_cpu(struct lglock *lg, int cpu) +{ + arch_spinlock_t *lock; + preempt_disable(); + rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); + lock = per_cpu_ptr(lg->lock, cpu); + arch_spin_lock(lock); +} +EXPORT_SYMBOL(lg_local_lock_cpu); + +void lg_local_unlock_cpu(struct lglock *lg, int cpu) +{ + arch_spinlock_t *lock; + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock = per_cpu_ptr(lg->lock, cpu); + arch_spin_unlock(lock); + preempt_enable(); +} +EXPORT_SYMBOL(lg_local_unlock_cpu); + +void lg_global_lock_online(struct lglock *lg) +{ + int i; + spin_lock(&lg->cpu_lock); + rwlock_acquire(&lg->lock_dep_map, 0, 0, _RET_IP_); + for_each_cpu(i, &lg->cpus) { + arch_spinlock_t *lock; + lock = per_cpu_ptr(lg->lock, i); + arch_spin_lock(lock); + } +} +EXPORT_SYMBOL(lg_global_lock_online); + +void lg_global_unlock_online(struct lglock *lg) +{ + int i; + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + for_each_cpu(i, &lg->cpus) { + arch_spinlock_t *lock; + lock = per_cpu_ptr(lg->lock, i); + arch_spin_unlock(lock); + } + spin_unlock(&lg->cpu_lock); +} +EXPORT_SYMBOL(lg_global_unlock_online); + +void lg_global_lock(struct lglock *lg) +{ + int i; + preempt_disable(); + rwlock_acquire(&lg->lock_dep_map, 0, 0, _RET_IP_); + for_each_possible_cpu(i) { + arch_spinlock_t *lock; + lock = per_cpu_ptr(lg->lock, i); + arch_spin_lock(lock); + } +} +EXPORT_SYMBOL(lg_global_lock); + +void lg_global_unlock(struct lglock *lg) +{ + int i; + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + for_each_possible_cpu(i) { + arch_spinlock_t *lock; + lock = per_cpu_ptr(lg->lock, i); + arch_spin_unlock(lock); + } + preempt_enable(); +} +EXPORT_SYMBOL(lg_global_unlock); + +int lg_cpu_callback(struct notifier_block *nb, + unsigned long action, void *hcpu) +{ + struct lglock *lglock = container_of(nb, struct lglock, cpu_notifier); + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_UP_PREPARE: + spin_lock(&lglock->cpu_lock); + cpu_set((unsigned long)hcpu, lglock->cpus); + spin_unlock(&lglock->cpu_lock); + break; + case CPU_UP_CANCELED: case CPU_DEAD: + spin_lock(&lglock->cpu_lock); + cpu_clear((unsigned long)hcpu, lglock->cpus); + spin_unlock(&lglock->cpu_lock); + } + return NOTIFY_OK; +} +EXPORT_SYMBOL(lg_cpu_callback); + _ ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH] cpumask: fix lg_lock/br_lock. 2012-02-27 23:53 ` Andrew Morton (?) @ 2012-02-28 8:43 ` Ingo Molnar 2012-02-28 11:25 ` Andi Kleen 2012-02-28 21:27 ` Andrew Morton -1 siblings, 2 replies; 45+ messages in thread From: Ingo Molnar @ 2012-02-28 8:43 UTC (permalink / raw) To: Andrew Morton Cc: Rusty Russell, Nick Piggin, linux-kernel, Alexander Viro, Andi Kleen, Srivatsa S. Bhat, linux-fsdevel * Andrew Morton <akpm@linux-foundation.org> wrote: > From: Andi Kleen <ak@linux.intel.com> > Subject: brlocks/lglocks: cleanups This patch, while I agree with the goal of de-CPP-ing the lglocks code, violates my sense of taste and logic in numerous ways. If we touch this code it should be done right. This patch should also probably go upstream through the locking/lockdep tree? Mind sending it us once you think it's ready? > > fs/dcache.c | 4 > fs/file_table.c | 17 +-- > fs/internal.h | 3 > fs/namei.c | 24 ++-- > fs/namespace.c | 140 ++++++++++++++-------------- > fs/pnode.c | 4 > fs/proc_namespace.c | 4 > include/linux/lglock.h | 189 +++++++-------------------------------- > kernel/Makefile | 2 > kernel/lglock.c | 136 ++++++++++++++++++++++++++++ > 10 files changed, 269 insertions(+), 254 deletions(-) It's absolutely crazy to do such a large patch that both uninlines the code and changes the API. There is absolutely no good reason to do it like that - and the resulting merge pain on Andrew is a direct result of that: had it been kept separate it would be much easier and safer to update just the API change patch ... Do those things in at least two patches, making the bigger API change patch plain and *OBVIOUS*. > +struct lglock { > + arch_spinlock_t __percpu *lock; > + cpumask_t cpus; /* XXX need to put on separate cacheline? */ > + spinlock_t cpu_lock; > + struct notifier_block cpu_notifier; > +#ifdef CONFIG_DEBUG_LOCK_ALLOC > + struct lock_class_key lock_key; > + struct lockdep_map lock_dep_map; > +#endif > +}; This is not a readable structure definition. Plenty of readable ones exist in the tree, they should be inspected for clues. > +/* Only valid for statics */ what does this comment mean? > +void lg_lock_init(struct lglock *lg, char *name); > +void lg_local_lock(struct lglock *lg); > +void lg_local_unlock(struct lglock *lg); > +void lg_local_lock_cpu(struct lglock *lg, int cpu); > +void lg_local_unlock_cpu(struct lglock *lg, int cpu); > +void lg_global_lock_online(struct lglock *lg); > +void lg_global_unlock_online(struct lglock *lg); > +void lg_global_lock(struct lglock *lg); > +void lg_global_unlock(struct lglock *lg); > + > +int lg_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu); These APIs lack documentation. > +/* Note there is no uninit, so lglocks cannot be defined in > + * modules (but it's fine to use them from there) > + * Could be added though, just undo lg_lock_init > + */ I'm sure Andi knows what the standard shape of multi-line comments is in the kernel? Why has he still not learned? > +void lg_local_lock(struct lglock *lg) > +{ > + arch_spinlock_t *lock; > + preempt_disable(); I'm sure Andi knows what's wrong here, he has been reviewed dozens of times for similar mistakes in the x86 trees. > +void lg_local_unlock(struct lglock *lg) > +{ > + arch_spinlock_t *lock; > + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); ditto. > +void lg_local_lock_cpu(struct lglock *lg, int cpu) > +{ > + arch_spinlock_t *lock; > + preempt_disable(); ditto. > + arch_spinlock_t *lock; > + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); ditto. > +{ > + int i; > + spin_lock(&lg->cpu_lock); ditto. > +{ > + int i; > + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); ditto. > +{ > + int i; > + preempt_disable(); ditto. > +{ > + int i; > + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); ditto. > +int lg_cpu_callback(struct notifier_block *nb, > + unsigned long action, void *hcpu) ugly linebreak, we can do better. > +{ > + struct lglock *lglock = container_of(nb, struct lglock, cpu_notifier); > + switch (action & ~CPU_TASKS_FROZEN) { sigh. Until these defects (and probably more, haven't done a full check) are fixed: Nacked-by: Ingo Molnar <mingo@elte.hu> Thanks, Ingo ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH] cpumask: fix lg_lock/br_lock. 2012-02-28 8:43 ` Ingo Molnar @ 2012-02-28 11:25 ` Andi Kleen 2012-02-28 12:51 ` Ingo Molnar 2012-02-28 21:27 ` Andrew Morton 1 sibling, 1 reply; 45+ messages in thread From: Andi Kleen @ 2012-02-28 11:25 UTC (permalink / raw) To: Ingo Molnar Cc: Andrew Morton, Rusty Russell, Nick Piggin, linux-kernel, Alexander Viro, Srivatsa S. Bhat, linux-fsdevel As far as I can tell from skimming that lengthy rant it's only about some white space. So the patch must be perfect in substance! -Andi ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH] cpumask: fix lg_lock/br_lock. 2012-02-28 11:25 ` Andi Kleen @ 2012-02-28 12:51 ` Ingo Molnar 0 siblings, 0 replies; 45+ messages in thread From: Ingo Molnar @ 2012-02-28 12:51 UTC (permalink / raw) To: Andi Kleen Cc: Andrew Morton, Rusty Russell, Nick Piggin, linux-kernel, Alexander Viro, Srivatsa S. Bhat, linux-fsdevel * Andi Kleen <ak@linux.intel.com> wrote: > As far as I can tell from skimming that lengthy rant it's only > about some white space. Read it again. Thanks, Ingo ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH] cpumask: fix lg_lock/br_lock. 2012-02-28 8:43 ` Ingo Molnar 2012-02-28 11:25 ` Andi Kleen @ 2012-02-28 21:27 ` Andrew Morton 2012-02-29 5:44 ` Srivatsa S. Bhat 2012-02-29 8:29 ` [PATCH] cpumask: fix lg_lock/br_lock Ingo Molnar 1 sibling, 2 replies; 45+ messages in thread From: Andrew Morton @ 2012-02-28 21:27 UTC (permalink / raw) To: Ingo Molnar Cc: Rusty Russell, Nick Piggin, linux-kernel, Alexander Viro, Andi Kleen, Srivatsa S. Bhat, linux-fsdevel On Tue, 28 Feb 2012 09:43:59 +0100 Ingo Molnar <mingo@elte.hu> wrote: > This patch should also probably go upstream through the > locking/lockdep tree? Mind sending it us once you think it's > ready? Oh goody, that means you own http://marc.info/?l=linux-kernel&m=131419353511653&w=2. I do think the brlock code is sick, but Nick has turned into an ex-Nick and nobody works on it. ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH] cpumask: fix lg_lock/br_lock. 2012-02-28 21:27 ` Andrew Morton @ 2012-02-29 5:44 ` Srivatsa S. Bhat 2012-02-29 9:17 ` Ingo Molnar 2012-02-29 8:29 ` [PATCH] cpumask: fix lg_lock/br_lock Ingo Molnar 1 sibling, 1 reply; 45+ messages in thread From: Srivatsa S. Bhat @ 2012-02-29 5:44 UTC (permalink / raw) To: Andrew Morton Cc: Ingo Molnar, Rusty Russell, Nick Piggin, linux-kernel, Alexander Viro, Andi Kleen, linux-fsdevel Hi Andrew, On 02/29/2012 02:57 AM, Andrew Morton wrote: > On Tue, 28 Feb 2012 09:43:59 +0100 > Ingo Molnar <mingo@elte.hu> wrote: > >> This patch should also probably go upstream through the >> locking/lockdep tree? Mind sending it us once you think it's >> ready? > > Oh goody, that means you own > http://marc.info/?l=linux-kernel&m=131419353511653&w=2. > That bug got fixed sometime around Dec 2011. See commit e30e2fdf (VFS: Fix race between CPU hotplug and lglocks) > I do think the brlock code is sick, but Nick has turned into an ex-Nick > and nobody works on it. > Regards, Srivatsa S. Bhat ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH] cpumask: fix lg_lock/br_lock. 2012-02-29 5:44 ` Srivatsa S. Bhat @ 2012-02-29 9:17 ` Ingo Molnar 2012-02-29 11:12 ` Srivatsa S. Bhat 0 siblings, 1 reply; 45+ messages in thread From: Ingo Molnar @ 2012-02-29 9:17 UTC (permalink / raw) To: Srivatsa S. Bhat Cc: Andrew Morton, Rusty Russell, Nick Piggin, linux-kernel, Alexander Viro, Andi Kleen, linux-fsdevel * Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> wrote: > Hi Andrew, > > On 02/29/2012 02:57 AM, Andrew Morton wrote: > > > On Tue, 28 Feb 2012 09:43:59 +0100 > > Ingo Molnar <mingo@elte.hu> wrote: > > > >> This patch should also probably go upstream through the > >> locking/lockdep tree? Mind sending it us once you think it's > >> ready? > > > > Oh goody, that means you own > > http://marc.info/?l=linux-kernel&m=131419353511653&w=2. > > > > > That bug got fixed sometime around Dec 2011. See commit e30e2fdf > (VFS: Fix race between CPU hotplug and lglocks) The lglocks code is still CPU-hotplug racy AFAICS, despite the ->cpu_lock complication: Consider a taken global lock on a CPU: CPU#1 ... br_write_lock(vfsmount_lock); this takes the lock of all online CPUs: say CPU#1 and CPU#2. Now CPU#3 comes online and takes the read lock: CPU#3 br_read_lock(vfsmount_lock); This will succeed while the br_write_lock() is still active, because CPU#1 has only taken the locks of CPU#1 and CPU#2. Crash! The proper fix would be for CPU-online to serialize with all known lglocks, via the notifier callback, i.e. to do something like this: case CPU_UP_PREPARE: for_each_online_cpu(cpu) { spin_lock(&name##_cpu_lock); spin_unlock(&name##_cpu_lock); } ... I.e. in essence do: case CPU_UP_PREPARE: name##_global_lock_online(); name##_global_unlock_online(); Another detail I noticed, this bit: register_hotcpu_notifier(&name##_lg_cpu_notifier); \ get_online_cpus(); \ for_each_online_cpu(i) \ cpu_set(i, name##_cpus); \ put_online_cpus(); \ could be something simpler and loop-less, like: get_online_cpus(); cpumask_copy(name##_cpus, cpu_online_mask); register_hotcpu_notifier(&name##_lg_cpu_notifier); put_online_cpus(); Thanks, Ingo ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH] cpumask: fix lg_lock/br_lock. 2012-02-29 9:17 ` Ingo Molnar @ 2012-02-29 11:12 ` Srivatsa S. Bhat 2012-03-01 7:38 ` Ingo Molnar 2012-03-01 8:12 ` Srivatsa S. Bhat 0 siblings, 2 replies; 45+ messages in thread From: Srivatsa S. Bhat @ 2012-02-29 11:12 UTC (permalink / raw) To: Ingo Molnar Cc: Andrew Morton, Rusty Russell, Nick Piggin, linux-kernel, Alexander Viro, Andi Kleen, linux-fsdevel, Peter Zijlstra, Arjan van de Ven On 02/29/2012 02:47 PM, Ingo Molnar wrote: > > * Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> wrote: > >> Hi Andrew, >> >> On 02/29/2012 02:57 AM, Andrew Morton wrote: >> >>> On Tue, 28 Feb 2012 09:43:59 +0100 >>> Ingo Molnar <mingo@elte.hu> wrote: >>> >>>> This patch should also probably go upstream through the >>>> locking/lockdep tree? Mind sending it us once you think it's >>>> ready? >>> >>> Oh goody, that means you own >>> http://marc.info/?l=linux-kernel&m=131419353511653&w=2. >>> >> >> >> That bug got fixed sometime around Dec 2011. See commit e30e2fdf >> (VFS: Fix race between CPU hotplug and lglocks) > > The lglocks code is still CPU-hotplug racy AFAICS, despite the > ->cpu_lock complication: > > Consider a taken global lock on a CPU: > > CPU#1 > ... > br_write_lock(vfsmount_lock); > > this takes the lock of all online CPUs: say CPU#1 and CPU#2. Now > CPU#3 comes online and takes the read lock: CPU#3 cannot come online! :-) No new CPU can come online until that corresponding br_write_unlock() is completed. That is because br_write_lock acquires &name##_cpu_lock and only br_write_unlock will release it. And, CPU_UP_PREPARE callback tries to acquire that very same spinlock, and hence will keep spinning until br_write_unlock() is run. And hence, the CPU#3 or any new CPU online for that matter will not complete until br_write_unlock() is done. It is of course debatable as to how good this design really is, but IMHO, the lglocks code is not CPU-hotplug racy now.. Here is the link to the original discussion during the development of that patch: thread.gmane.org/gmane.linux.file-systems/59750/ > > CPU#3 > br_read_lock(vfsmount_lock); > > This will succeed while the br_write_lock() is still active, > because CPU#1 has only taken the locks of CPU#1 and CPU#2. > > Crash! > > The proper fix would be for CPU-online to serialize with all > known lglocks, via the notifier callback, i.e. to do something > like this: > > case CPU_UP_PREPARE: > for_each_online_cpu(cpu) { > spin_lock(&name##_cpu_lock); > spin_unlock(&name##_cpu_lock); > } > ... > > I.e. in essence do: > > case CPU_UP_PREPARE: > name##_global_lock_online(); > name##_global_unlock_online(); > > Another detail I noticed, this bit: > > register_hotcpu_notifier(&name##_lg_cpu_notifier); \ > get_online_cpus(); \ > for_each_online_cpu(i) \ > cpu_set(i, name##_cpus); \ > put_online_cpus(); \ > > could be something simpler and loop-less, like: > > get_online_cpus(); > cpumask_copy(name##_cpus, cpu_online_mask); > register_hotcpu_notifier(&name##_lg_cpu_notifier); > put_online_cpus(); > While the cpumask_copy is definitely better, we can't put the register_hotcpu_notifier() within get/put_online_cpus() because it will lead to ABBA deadlock with a newly initiated CPU Hotplug operation, the 2 locks involved being the cpu_add_remove_lock and the cpu_hotplug lock. IOW, at the moment there is no "absolutely race-free way" way to do CPU Hotplug callback registration. Some time ago, while going through the asynchronous booting patch by Arjan [1] I had written up a patch to fix that race because that race got transformed from "purely theoretical" to "very real" with the async boot patch, as shown by the powerpc boot failures [2]. But then I stopped short of posting that patch to the lists because I started wondering how important that race would actually turn out to be, in case the async booting design takes a totally different approach altogether.. [And the reason why I didn't post it is also because it would require lots of changes in many parts where CPU Hotplug registration is done, and that wouldn't probably be justified (I don't know..) if the race remained only theoretical, as it is now.] [1]. http://thread.gmane.org/gmane.linux.kernel/1246209 [2]. https://lkml.org/lkml/2012/2/13/383 Regards, Srivatsa S. Bhat ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH] cpumask: fix lg_lock/br_lock. 2012-02-29 11:12 ` Srivatsa S. Bhat @ 2012-03-01 7:38 ` Ingo Molnar 2012-03-01 9:15 ` Srivatsa S. Bhat 2012-03-01 8:12 ` Srivatsa S. Bhat 1 sibling, 1 reply; 45+ messages in thread From: Ingo Molnar @ 2012-03-01 7:38 UTC (permalink / raw) To: Srivatsa S. Bhat Cc: Andrew Morton, Rusty Russell, Nick Piggin, linux-kernel, Alexander Viro, Andi Kleen, linux-fsdevel, Peter Zijlstra, Arjan van de Ven * Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> wrote: > On 02/29/2012 02:47 PM, Ingo Molnar wrote: > > > > > * Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> wrote: > > > >> Hi Andrew, > >> > >> On 02/29/2012 02:57 AM, Andrew Morton wrote: > >> > >>> On Tue, 28 Feb 2012 09:43:59 +0100 > >>> Ingo Molnar <mingo@elte.hu> wrote: > >>> > >>>> This patch should also probably go upstream through the > >>>> locking/lockdep tree? Mind sending it us once you think it's > >>>> ready? > >>> > >>> Oh goody, that means you own > >>> http://marc.info/?l=linux-kernel&m=131419353511653&w=2. > >>> > >> > >> > >> That bug got fixed sometime around Dec 2011. See commit e30e2fdf > >> (VFS: Fix race between CPU hotplug and lglocks) > > > > The lglocks code is still CPU-hotplug racy AFAICS, despite the > > ->cpu_lock complication: > > > > Consider a taken global lock on a CPU: > > > > CPU#1 > > ... > > br_write_lock(vfsmount_lock); > > > > this takes the lock of all online CPUs: say CPU#1 and CPU#2. Now > > CPU#3 comes online and takes the read lock: > > > CPU#3 cannot come online! :-) > > No new CPU can come online until that corresponding br_write_unlock() > is completed. That is because br_write_lock acquires &name##_cpu_lock > and only br_write_unlock will release it. Indeed, you are right. Note that ->cpu_lock is an entirely superfluous complication in br_write_lock(): the whole CPU hotplug race can be addressed by doing a br_write_lock()/unlock() barrier in the hotplug callback ... > > Another detail I noticed, this bit: > > > > register_hotcpu_notifier(&name##_lg_cpu_notifier); \ > > get_online_cpus(); \ > > for_each_online_cpu(i) \ > > cpu_set(i, name##_cpus); \ > > put_online_cpus(); \ > > > > could be something simpler and loop-less, like: > > > > get_online_cpus(); > > cpumask_copy(name##_cpus, cpu_online_mask); > > register_hotcpu_notifier(&name##_lg_cpu_notifier); > > put_online_cpus(); > > > > > While the cpumask_copy is definitely better, we can't put the > register_hotcpu_notifier() within get/put_online_cpus() > because it will lead to ABBA deadlock with a newly initiated > CPU Hotplug operation, the 2 locks involved being the > cpu_add_remove_lock and the cpu_hotplug lock. > > IOW, at the moment there is no "absolutely race-free way" way > to do CPU Hotplug callback registration. Some time ago, while > going through the asynchronous booting patch by Arjan [1] I > had written up a patch to fix that race because that race got > transformed from "purely theoretical" to "very real" with the > async boot patch, as shown by the powerpc boot failures [2]. > > But then I stopped short of posting that patch to the lists > because I started wondering how important that race would > actually turn out to be, in case the async booting design > takes a totally different approach altogether.. [And the > reason why I didn't post it is also because it would require > lots of changes in many parts where CPU Hotplug registration > is done, and that wouldn't probably be justified (I don't > know..) if the race remained only theoretical, as it is now.] A fairly simple solution would be to eliminate the _cpus mask as well, and do a for_each_possible_cpu() loop in the super-slow loop - like dozens and dozens of other places do it in the kernel. At a first quick glance that way the code gets a lot simpler and the only CPU hotplug related change needed are the CPU_* callbacks to do the lock barrier. Thanks, Ingo ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH] cpumask: fix lg_lock/br_lock. 2012-03-01 7:38 ` Ingo Molnar @ 2012-03-01 9:15 ` Srivatsa S. Bhat 2012-03-01 9:45 ` Ingo Molnar 0 siblings, 1 reply; 45+ messages in thread From: Srivatsa S. Bhat @ 2012-03-01 9:15 UTC (permalink / raw) To: Ingo Molnar Cc: Andrew Morton, Rusty Russell, Nick Piggin, linux-kernel, Alexander Viro, Andi Kleen, linux-fsdevel, Peter Zijlstra, Arjan van de Ven, Paul E. McKenney, mc On 03/01/2012 01:08 PM, Ingo Molnar wrote: > > * Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> wrote: > >> On 02/29/2012 02:47 PM, Ingo Molnar wrote: >> >>> >>> * Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> wrote: >>> >>>> Hi Andrew, >>>> >>>> On 02/29/2012 02:57 AM, Andrew Morton wrote: >>>> >>>>> On Tue, 28 Feb 2012 09:43:59 +0100 >>>>> Ingo Molnar <mingo@elte.hu> wrote: >>>>> >>>>>> This patch should also probably go upstream through the >>>>>> locking/lockdep tree? Mind sending it us once you think it's >>>>>> ready? >>>>> >>>>> Oh goody, that means you own >>>>> http://marc.info/?l=linux-kernel&m=131419353511653&w=2. >>>>> >>>> >>>> >>>> That bug got fixed sometime around Dec 2011. See commit e30e2fdf >>>> (VFS: Fix race between CPU hotplug and lglocks) >>> >>> The lglocks code is still CPU-hotplug racy AFAICS, despite the >>> ->cpu_lock complication: >>> >>> Consider a taken global lock on a CPU: >>> >>> CPU#1 >>> ... >>> br_write_lock(vfsmount_lock); >>> >>> this takes the lock of all online CPUs: say CPU#1 and CPU#2. Now >>> CPU#3 comes online and takes the read lock: >> >> >> CPU#3 cannot come online! :-) >> >> No new CPU can come online until that corresponding br_write_unlock() >> is completed. That is because br_write_lock acquires &name##_cpu_lock >> and only br_write_unlock will release it. > > Indeed, you are right. > > Note that ->cpu_lock is an entirely superfluous complication in > br_write_lock(): the whole CPU hotplug race can be addressed by > doing a br_write_lock()/unlock() barrier in the hotplug callback I don't think I understood your point completely, but please see below... > ... > >>> Another detail I noticed, this bit: >>> >>> register_hotcpu_notifier(&name##_lg_cpu_notifier); \ >>> get_online_cpus(); \ >>> for_each_online_cpu(i) \ >>> cpu_set(i, name##_cpus); \ >>> put_online_cpus(); \ >>> >>> could be something simpler and loop-less, like: >>> >>> get_online_cpus(); >>> cpumask_copy(name##_cpus, cpu_online_mask); >>> register_hotcpu_notifier(&name##_lg_cpu_notifier); >>> put_online_cpus(); >>> >> >> >> While the cpumask_copy is definitely better, we can't put the >> register_hotcpu_notifier() within get/put_online_cpus() >> because it will lead to ABBA deadlock with a newly initiated >> CPU Hotplug operation, the 2 locks involved being the >> cpu_add_remove_lock and the cpu_hotplug lock. >> >> IOW, at the moment there is no "absolutely race-free way" way >> to do CPU Hotplug callback registration. Some time ago, while >> going through the asynchronous booting patch by Arjan [1] I >> had written up a patch to fix that race because that race got >> transformed from "purely theoretical" to "very real" with the >> async boot patch, as shown by the powerpc boot failures [2]. >> >> But then I stopped short of posting that patch to the lists >> because I started wondering how important that race would >> actually turn out to be, in case the async booting design >> takes a totally different approach altogether.. [And the >> reason why I didn't post it is also because it would require >> lots of changes in many parts where CPU Hotplug registration >> is done, and that wouldn't probably be justified (I don't >> know..) if the race remained only theoretical, as it is now.] > > A fairly simple solution would be to eliminate the _cpus mask as > well, and do a for_each_possible_cpu() loop in the super-slow > loop - like dozens and dozens of other places do it in the > kernel. > (I am assuming you are referring to the lglocks problem here, and not to the ABBA deadlock/racy registration etc discussed immediately above.) We wanted to avoid doing for_each_possible_cpu() to avoid the unnecessary performance hit. In fact, that was the very first solution proposed, by Cong Meng. See this: http://thread.gmane.org/gmane.linux.file-systems/59750/ http://thread.gmane.org/gmane.linux.file-systems/59750/focus=59751 So we developed a solution that avoids for_each_possible_cpu(), and yet works. Also, another point to be noted is that (referring to your previous mail actually), doing for_each_online_cpu() at CPU_UP_PREPARE time won't really work since the cpus are marked online only much later. So, the solution we chose was to keep a consistent _cpus mask throughout the lock-unlock sequence and perform the per-cpu lock/unlock only on the cpus in that cpu mask; and ensuring that that mask won't change in between... and also by delaying any new CPU online event during that time period using the new ->cpu_lock spinlock as I mentioned in the other mail. This (complexity) explains why the commit message of e30e2fdf looks more like a mathematical theorem ;-) > At a first quick glance that way the code gets a lot simpler and > the only CPU hotplug related change needed are the CPU_* > callbacks to do the lock barrier. > Regards, Srivatsa S. Bhat IBM Linux Technology Center ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH] cpumask: fix lg_lock/br_lock. 2012-03-01 9:15 ` Srivatsa S. Bhat @ 2012-03-01 9:45 ` Ingo Molnar 2012-03-01 9:56 ` Srivatsa S. Bhat 0 siblings, 1 reply; 45+ messages in thread From: Ingo Molnar @ 2012-03-01 9:45 UTC (permalink / raw) To: Srivatsa S. Bhat Cc: Andrew Morton, Rusty Russell, Nick Piggin, linux-kernel, Alexander Viro, Andi Kleen, linux-fsdevel, Peter Zijlstra, Arjan van de Ven, Paul E. McKenney, mc * Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> wrote: > We wanted to avoid doing for_each_possible_cpu() to avoid the > unnecessary performance hit. [...] That was done at the cost of making the code rather complex. The thing is, *ANY* cpu-mask loop is an utter slowpath, so the "performance hit" is an overstatement. There's already dozens of of for_each_possible_cpu() loops in the kernel, and it's a perfectly acceptable solution in such cases. I suspect it does not matter much now as the code appears to be correct, but in general we want to opt for simpler designs for rare and fragile codepaths. Thanks, Ingo ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH] cpumask: fix lg_lock/br_lock. 2012-03-01 9:45 ` Ingo Molnar @ 2012-03-01 9:56 ` Srivatsa S. Bhat 0 siblings, 0 replies; 45+ messages in thread From: Srivatsa S. Bhat @ 2012-03-01 9:56 UTC (permalink / raw) To: Ingo Molnar Cc: Andrew Morton, Rusty Russell, Nick Piggin, linux-kernel, Alexander Viro, Andi Kleen, linux-fsdevel, Peter Zijlstra, Arjan van de Ven, Paul E. McKenney, mc On 03/01/2012 03:15 PM, Ingo Molnar wrote: > > * Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> wrote: > >> We wanted to avoid doing for_each_possible_cpu() to avoid the >> unnecessary performance hit. [...] > > That was done at the cost of making the code rather complex. > Sadly, yes.. > The thing is, *ANY* cpu-mask loop is an utter slowpath, so the > "performance hit" is an overstatement. There's already dozens of > of for_each_possible_cpu() loops in the kernel, and it's a > perfectly acceptable solution in such cases. > > I suspect it does not matter much now as the code appears to be > correct, but in general we want to opt for simpler designs for > rare and fragile codepaths. > Ok, makes sense. And now looking back at the amount of complexity we built into this just to avoid the for_each_possible_cpu() loop, I wonder why we went to such lengths at all! (considering what you said above about any cpu-mask loop..) Regards, Srivatsa S. Bhat ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH] cpumask: fix lg_lock/br_lock. 2012-02-29 11:12 ` Srivatsa S. Bhat 2012-03-01 7:38 ` Ingo Molnar @ 2012-03-01 8:12 ` Srivatsa S. Bhat 1 sibling, 0 replies; 45+ messages in thread From: Srivatsa S. Bhat @ 2012-03-01 8:12 UTC (permalink / raw) To: Ingo Molnar Cc: Andrew Morton, Rusty Russell, Nick Piggin, linux-kernel, Alexander Viro, Andi Kleen, linux-fsdevel, Peter Zijlstra, Arjan van de Ven, Paul E. McKenney, Rafael J. Wysocki, ppc-dev, David S. Miller, Paul Gortmaker, Benjamin Herrenschmidt, KOSAKI Motohiro, sparclinux On 02/29/2012 04:42 PM, Srivatsa S. Bhat wrote: > On 02/29/2012 02:47 PM, Ingo Molnar wrote: > >> >> * Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> wrote: >> >>> Hi Andrew, >>> >>> On 02/29/2012 02:57 AM, Andrew Morton wrote: >>> >>>> On Tue, 28 Feb 2012 09:43:59 +0100 >>>> Ingo Molnar <mingo@elte.hu> wrote: >>>> >>>>> This patch should also probably go upstream through the >>>>> locking/lockdep tree? Mind sending it us once you think it's >>>>> ready? >>>> >>>> Oh goody, that means you own >>>> http://marc.info/?l=linux-kernel&m=131419353511653&w=2. >>>> >>> >>> >>> That bug got fixed sometime around Dec 2011. See commit e30e2fdf >>> (VFS: Fix race between CPU hotplug and lglocks) >> >> The lglocks code is still CPU-hotplug racy AFAICS, despite the >> ->cpu_lock complication: >> >> Consider a taken global lock on a CPU: >> >> CPU#1 >> ... >> br_write_lock(vfsmount_lock); >> >> this takes the lock of all online CPUs: say CPU#1 and CPU#2. Now >> CPU#3 comes online and takes the read lock: > > > CPU#3 cannot come online! :-) > > No new CPU can come online until that corresponding br_write_unlock() > is completed. That is because br_write_lock acquires &name##_cpu_lock > and only br_write_unlock will release it. > And, CPU_UP_PREPARE callback tries to acquire that very same spinlock, > and hence will keep spinning until br_write_unlock() is run. And hence, > the CPU#3 or any new CPU online for that matter will not complete until > br_write_unlock() is done. > > It is of course debatable as to how good this design really is, but IMHO, > the lglocks code is not CPU-hotplug racy now.. > > Here is the link to the original discussion during the development of > that patch: thread.gmane.org/gmane.linux.file-systems/59750/ > >> >> CPU#3 >> br_read_lock(vfsmount_lock); >> >> This will succeed while the br_write_lock() is still active, >> because CPU#1 has only taken the locks of CPU#1 and CPU#2. >> >> Crash! >> >> The proper fix would be for CPU-online to serialize with all >> known lglocks, via the notifier callback, i.e. to do something >> like this: >> >> case CPU_UP_PREPARE: >> for_each_online_cpu(cpu) { >> spin_lock(&name##_cpu_lock); >> spin_unlock(&name##_cpu_lock); >> } >> ... >> >> I.e. in essence do: >> >> case CPU_UP_PREPARE: >> name##_global_lock_online(); >> name##_global_unlock_online(); >> >> Another detail I noticed, this bit: >> >> register_hotcpu_notifier(&name##_lg_cpu_notifier); \ >> get_online_cpus(); \ >> for_each_online_cpu(i) \ >> cpu_set(i, name##_cpus); \ >> put_online_cpus(); \ >> >> could be something simpler and loop-less, like: >> >> get_online_cpus(); >> cpumask_copy(name##_cpus, cpu_online_mask); >> register_hotcpu_notifier(&name##_lg_cpu_notifier); >> put_online_cpus(); >> > > > While the cpumask_copy is definitely better, we can't put the > register_hotcpu_notifier() within get/put_online_cpus() because it will > lead to ABBA deadlock with a newly initiated CPU Hotplug operation, the > 2 locks involved being the cpu_add_remove_lock and the cpu_hotplug lock. > > IOW, at the moment there is no "absolutely race-free way" way to do > CPU Hotplug callback registration. Some time ago, while going through the > asynchronous booting patch by Arjan [1] I had written up a patch to fix > that race because that race got transformed from "purely theoretical" > to "very real" with the async boot patch, as shown by the powerpc boot > failures [2]. > > But then I stopped short of posting that patch to the lists because I > started wondering how important that race would actually turn out to be, > in case the async booting design takes a totally different approach > altogether.. [And the reason why I didn't post it is also because it > would require lots of changes in many parts where CPU Hotplug registration > is done, and that wouldn't probably be justified (I don't know..) if the > race remained only theoretical, as it is now.] > > [1]. http://thread.gmane.org/gmane.linux.kernel/1246209 > [2]. https://lkml.org/lkml/2012/2/13/383 > Ok, now that I mentioned about my patch, let me as well show it some daylight.. It is totally untested, incomplete and probably won't even compile.. (given that I had abandoned working on it some time ago, since I was not sure in what direction the async boot design was headed, which was the original motivation for me to try to fix this race) I really hate to post it when it is in such a state, but atleast let me get the idea out, now that the discussion is around it, atleast just to get some thoughts about whether it is even worth pursuing! (I'll post the patches as a reply to this mail.) By the way, it should solve the powerpc boot failure, atleast in principle, considering what the root cause of the failure was.. Regards, Srivatsa S. Bhat ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH] cpumask: fix lg_lock/br_lock. @ 2012-03-01 8:12 ` Srivatsa S. Bhat 0 siblings, 0 replies; 45+ messages in thread From: Srivatsa S. Bhat @ 2012-03-01 8:24 UTC (permalink / raw) To: Ingo Molnar Cc: Andrew Morton, Rusty Russell, Nick Piggin, linux-kernel, Alexander Viro, Andi Kleen, linux-fsdevel, Peter Zijlstra, Arjan van de Ven, Paul E. McKenney, Rafael J. Wysocki, ppc-dev, David S. Miller, Paul Gortmaker, Benjamin Herrenschmidt, KOSAKI Motohiro, sparclinux On 02/29/2012 04:42 PM, Srivatsa S. Bhat wrote: > On 02/29/2012 02:47 PM, Ingo Molnar wrote: > >> >> * Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> wrote: >> >>> Hi Andrew, >>> >>> On 02/29/2012 02:57 AM, Andrew Morton wrote: >>> >>>> On Tue, 28 Feb 2012 09:43:59 +0100 >>>> Ingo Molnar <mingo@elte.hu> wrote: >>>> >>>>> This patch should also probably go upstream through the >>>>> locking/lockdep tree? Mind sending it us once you think it's >>>>> ready? >>>> >>>> Oh goody, that means you own >>>> http://marc.info/?l=linux-kernel&m\x131419353511653&w=2. >>>> >>> >>> >>> That bug got fixed sometime around Dec 2011. See commit e30e2fdf >>> (VFS: Fix race between CPU hotplug and lglocks) >> >> The lglocks code is still CPU-hotplug racy AFAICS, despite the >> ->cpu_lock complication: >> >> Consider a taken global lock on a CPU: >> >> CPU#1 >> ... >> br_write_lock(vfsmount_lock); >> >> this takes the lock of all online CPUs: say CPU#1 and CPU#2. Now >> CPU#3 comes online and takes the read lock: > > > CPU#3 cannot come online! :-) > > No new CPU can come online until that corresponding br_write_unlock() > is completed. That is because br_write_lock acquires &name##_cpu_lock > and only br_write_unlock will release it. > And, CPU_UP_PREPARE callback tries to acquire that very same spinlock, > and hence will keep spinning until br_write_unlock() is run. And hence, > the CPU#3 or any new CPU online for that matter will not complete until > br_write_unlock() is done. > > It is of course debatable as to how good this design really is, but IMHO, > the lglocks code is not CPU-hotplug racy now.. > > Here is the link to the original discussion during the development of > that patch: thread.gmane.org/gmane.linux.file-systems/59750/ > >> >> CPU#3 >> br_read_lock(vfsmount_lock); >> >> This will succeed while the br_write_lock() is still active, >> because CPU#1 has only taken the locks of CPU#1 and CPU#2. >> >> Crash! >> >> The proper fix would be for CPU-online to serialize with all >> known lglocks, via the notifier callback, i.e. to do something >> like this: >> >> case CPU_UP_PREPARE: >> for_each_online_cpu(cpu) { >> spin_lock(&name##_cpu_lock); >> spin_unlock(&name##_cpu_lock); >> } >> ... >> >> I.e. in essence do: >> >> case CPU_UP_PREPARE: >> name##_global_lock_online(); >> name##_global_unlock_online(); >> >> Another detail I noticed, this bit: >> >> register_hotcpu_notifier(&name##_lg_cpu_notifier); \ >> get_online_cpus(); \ >> for_each_online_cpu(i) \ >> cpu_set(i, name##_cpus); \ >> put_online_cpus(); \ >> >> could be something simpler and loop-less, like: >> >> get_online_cpus(); >> cpumask_copy(name##_cpus, cpu_online_mask); >> register_hotcpu_notifier(&name##_lg_cpu_notifier); >> put_online_cpus(); >> > > > While the cpumask_copy is definitely better, we can't put the > register_hotcpu_notifier() within get/put_online_cpus() because it will > lead to ABBA deadlock with a newly initiated CPU Hotplug operation, the > 2 locks involved being the cpu_add_remove_lock and the cpu_hotplug lock. > > IOW, at the moment there is no "absolutely race-free way" way to do > CPU Hotplug callback registration. Some time ago, while going through the > asynchronous booting patch by Arjan [1] I had written up a patch to fix > that race because that race got transformed from "purely theoretical" > to "very real" with the async boot patch, as shown by the powerpc boot > failures [2]. > > But then I stopped short of posting that patch to the lists because I > started wondering how important that race would actually turn out to be, > in case the async booting design takes a totally different approach > altogether.. [And the reason why I didn't post it is also because it > would require lots of changes in many parts where CPU Hotplug registration > is done, and that wouldn't probably be justified (I don't know..) if the > race remained only theoretical, as it is now.] > > [1]. http://thread.gmane.org/gmane.linux.kernel/1246209 > [2]. https://lkml.org/lkml/2012/2/13/383 > Ok, now that I mentioned about my patch, let me as well show it some daylight.. It is totally untested, incomplete and probably won't even compile.. (given that I had abandoned working on it some time ago, since I was not sure in what direction the async boot design was headed, which was the original motivation for me to try to fix this race) I really hate to post it when it is in such a state, but atleast let me get the idea out, now that the discussion is around it, atleast just to get some thoughts about whether it is even worth pursuing! (I'll post the patches as a reply to this mail.) By the way, it should solve the powerpc boot failure, atleast in principle, considering what the root cause of the failure was.. Regards, Srivatsa S. Bhat ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH] cpumask: fix lg_lock/br_lock. @ 2012-03-01 8:12 ` Srivatsa S. Bhat 0 siblings, 0 replies; 45+ messages in thread From: Srivatsa S. Bhat @ 2012-03-01 8:12 UTC (permalink / raw) To: Ingo Molnar Cc: sparclinux, Andi Kleen, Nick Piggin, KOSAKI Motohiro, Rusty Russell, linux-kernel, Rafael J. Wysocki, Paul Gortmaker, Alexander Viro, Arjan van de Ven, linux-fsdevel, Andrew Morton, Paul E. McKenney, ppc-dev, David S. Miller, Peter Zijlstra On 02/29/2012 04:42 PM, Srivatsa S. Bhat wrote: > On 02/29/2012 02:47 PM, Ingo Molnar wrote: > >> >> * Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> wrote: >> >>> Hi Andrew, >>> >>> On 02/29/2012 02:57 AM, Andrew Morton wrote: >>> >>>> On Tue, 28 Feb 2012 09:43:59 +0100 >>>> Ingo Molnar <mingo@elte.hu> wrote: >>>> >>>>> This patch should also probably go upstream through the >>>>> locking/lockdep tree? Mind sending it us once you think it's >>>>> ready? >>>> >>>> Oh goody, that means you own >>>> http://marc.info/?l=linux-kernel&m=131419353511653&w=2. >>>> >>> >>> >>> That bug got fixed sometime around Dec 2011. See commit e30e2fdf >>> (VFS: Fix race between CPU hotplug and lglocks) >> >> The lglocks code is still CPU-hotplug racy AFAICS, despite the >> ->cpu_lock complication: >> >> Consider a taken global lock on a CPU: >> >> CPU#1 >> ... >> br_write_lock(vfsmount_lock); >> >> this takes the lock of all online CPUs: say CPU#1 and CPU#2. Now >> CPU#3 comes online and takes the read lock: > > > CPU#3 cannot come online! :-) > > No new CPU can come online until that corresponding br_write_unlock() > is completed. That is because br_write_lock acquires &name##_cpu_lock > and only br_write_unlock will release it. > And, CPU_UP_PREPARE callback tries to acquire that very same spinlock, > and hence will keep spinning until br_write_unlock() is run. And hence, > the CPU#3 or any new CPU online for that matter will not complete until > br_write_unlock() is done. > > It is of course debatable as to how good this design really is, but IMHO, > the lglocks code is not CPU-hotplug racy now.. > > Here is the link to the original discussion during the development of > that patch: thread.gmane.org/gmane.linux.file-systems/59750/ > >> >> CPU#3 >> br_read_lock(vfsmount_lock); >> >> This will succeed while the br_write_lock() is still active, >> because CPU#1 has only taken the locks of CPU#1 and CPU#2. >> >> Crash! >> >> The proper fix would be for CPU-online to serialize with all >> known lglocks, via the notifier callback, i.e. to do something >> like this: >> >> case CPU_UP_PREPARE: >> for_each_online_cpu(cpu) { >> spin_lock(&name##_cpu_lock); >> spin_unlock(&name##_cpu_lock); >> } >> ... >> >> I.e. in essence do: >> >> case CPU_UP_PREPARE: >> name##_global_lock_online(); >> name##_global_unlock_online(); >> >> Another detail I noticed, this bit: >> >> register_hotcpu_notifier(&name##_lg_cpu_notifier); \ >> get_online_cpus(); \ >> for_each_online_cpu(i) \ >> cpu_set(i, name##_cpus); \ >> put_online_cpus(); \ >> >> could be something simpler and loop-less, like: >> >> get_online_cpus(); >> cpumask_copy(name##_cpus, cpu_online_mask); >> register_hotcpu_notifier(&name##_lg_cpu_notifier); >> put_online_cpus(); >> > > > While the cpumask_copy is definitely better, we can't put the > register_hotcpu_notifier() within get/put_online_cpus() because it will > lead to ABBA deadlock with a newly initiated CPU Hotplug operation, the > 2 locks involved being the cpu_add_remove_lock and the cpu_hotplug lock. > > IOW, at the moment there is no "absolutely race-free way" way to do > CPU Hotplug callback registration. Some time ago, while going through the > asynchronous booting patch by Arjan [1] I had written up a patch to fix > that race because that race got transformed from "purely theoretical" > to "very real" with the async boot patch, as shown by the powerpc boot > failures [2]. > > But then I stopped short of posting that patch to the lists because I > started wondering how important that race would actually turn out to be, > in case the async booting design takes a totally different approach > altogether.. [And the reason why I didn't post it is also because it > would require lots of changes in many parts where CPU Hotplug registration > is done, and that wouldn't probably be justified (I don't know..) if the > race remained only theoretical, as it is now.] > > [1]. http://thread.gmane.org/gmane.linux.kernel/1246209 > [2]. https://lkml.org/lkml/2012/2/13/383 > Ok, now that I mentioned about my patch, let me as well show it some daylight.. It is totally untested, incomplete and probably won't even compile.. (given that I had abandoned working on it some time ago, since I was not sure in what direction the async boot design was headed, which was the original motivation for me to try to fix this race) I really hate to post it when it is in such a state, but atleast let me get the idea out, now that the discussion is around it, atleast just to get some thoughts about whether it is even worth pursuing! (I'll post the patches as a reply to this mail.) By the way, it should solve the powerpc boot failure, atleast in principle, considering what the root cause of the failure was.. Regards, Srivatsa S. Bhat ^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 1/3] CPU hotplug: Fix issues with callback registration 2012-03-01 8:12 ` Srivatsa S. Bhat (?) @ 2012-03-01 8:15 ` Srivatsa S. Bhat -1 siblings, 0 replies; 45+ messages in thread From: Srivatsa S. Bhat @ 2012-03-01 8:15 UTC (permalink / raw) To: Ingo Molnar Cc: sparclinux, Andi Kleen, Nick Piggin, KOSAKI Motohiro, Rusty Russell, linux-kernel, Rafael J. Wysocki, Paul Gortmaker, Alexander Viro, Arjan van de Ven, linux-fsdevel, Andrew Morton, Paul E. McKenney, ppc-dev, David S. Miller, Peter Zijlstra Currently, there are several intertwined problems with CPU hotplug callback registration: Code which needs to get notified of CPU hotplug events and additionally wants to do something for each already online CPU, would typically do something like: register_cpu_notifier(&foobar_cpu_notifier); <============ "A" get_online_cpus(); for_each_online_cpu(cpu) { /* Do something */ } put_online_cpus(); At the point marked as "A", a CPU hotplug event could sneak in, leaving the code confused. Moving the registration to after put_online_cpus() won't help either, because we could be losing a CPU hotplug event between put_online_cpus() and the callback registration. Also, doing the registration inside the get/put_online_cpus() block is also not going to help, because it will lead to ABBA deadlock with CPU hotplug, the 2 locks being cpu_add_remove_lock and cpu_hotplug lock. It is also to be noted that, at times, we might want to do different setups or initializations depending on whether a CPU is coming online for the first time (as part of booting) or whether it is being only soft-onlined at a later point in time. To achieve this, doing something like the code shown above, with the "Do something" being different than what the registered callback does wouldn't work out, because of the race conditions mentioned above. The solution to all this is to include "history replay upon request" within the CPU hotplug callback registration code, while also providing an option for a different callback to be invoked while replaying history. Though the above mentioned race condition was mostly theoretical before, it gets all real when things like asynchronous booting[1] come into the picture, as shown by the PowerPC boot failure in [2]. So this fix is also a step forward in getting cool things like asynchronous booting to work properly. References: [1]. https://lkml.org/lkml/2012/2/14/62 --- include/linux/cpu.h | 15 +++++++++++++++ kernel/cpu.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 61 insertions(+), 3 deletions(-) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 6e53b48..90a6d76 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -124,16 +124,25 @@ enum { #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ #ifdef CONFIG_HOTPLUG_CPU extern int register_cpu_notifier(struct notifier_block *nb); +extern int register_allcpu_notifier(struct notifier_block *nb, + bool replay_history, int (*history_setup)(void)); extern void unregister_cpu_notifier(struct notifier_block *nb); #else #ifndef MODULE extern int register_cpu_notifier(struct notifier_block *nb); +extern int register_allcpu_notifier(struct notifier_block *nb, + bool replay_history, int (*history_setup)(void)); #else static inline int register_cpu_notifier(struct notifier_block *nb) { return 0; } +static inline int register_allcpu_notifier(struct notifier_block *nb, + bool replay_history, int (*history_setup)(void)) +{ + return 0; +} #endif static inline void unregister_cpu_notifier(struct notifier_block *nb) @@ -155,6 +164,12 @@ static inline int register_cpu_notifier(struct notifier_block *nb) return 0; } +static inline int register_allcpu_notifier(struct notifier_block *nb, + bool replay_history, int (*history_setup)(void)) +{ + return 0; +} + static inline void unregister_cpu_notifier(struct notifier_block *nb) { } diff --git a/kernel/cpu.c b/kernel/cpu.c index d520d34..1564c1d 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -132,12 +132,56 @@ static void cpu_hotplug_done(void) {} /* Need to know about CPUs going up/down? */ int __ref register_cpu_notifier(struct notifier_block *nb) { - int ret; + return register_allcpu_notifier(nb, false, NULL); +} +EXPORT_SYMBOL(register_cpu_notifier); + +int __ref register_allcpu_notifier(struct notifier_block *nb, + bool replay_history, int (*history_setup)(void)) +{ + int cpu, ret = 0; + + if (!replay_history && history_setup) + return -EINVAL; + cpu_maps_update_begin(); - ret = raw_notifier_chain_register(&cpu_chain, nb); + /* + * We don't race with CPU hotplug, because we just took the + * cpu_add_remove_lock. + */ + + if (!replay_history) + goto Register; + + if (history_setup) { + /* + * The caller has a special setup routine to rewrite + * history as he desires. Just invoke it. Don't + * proceed with callback registration if this setup is + * unsuccessful. + */ + ret = history_setup(); + } else { + /* + * Fallback to the usual callback, if a special handler + * for past CPU hotplug events is not specified. + * In this case, we will replay only past CPU bring-up + * events. + */ + for_each_online_cpu(cpu) { + nb->notifier_call(nb, CPU_UP_PREPARE, cpu); + nb->notifier_call(nb, CPU_ONLINE, cpu); + } + } + + Register: + if (!ret) + ret = raw_notifier_chain_register(&cpu_chain, nb); + cpu_maps_update_done(); return ret; } +EXPORT_SYMBOL(register_allcpu_notifier); static int __cpu_notify(unsigned long val, void *v, int nr_to_call, int *nr_calls) @@ -161,7 +205,6 @@ static void cpu_notify_nofail(unsigned long val, void *v) { BUG_ON(cpu_notify(val, v)); } -EXPORT_SYMBOL(register_cpu_notifier); void __ref unregister_cpu_notifier(struct notifier_block *nb) { ^ permalink raw reply related [flat|nested] 45+ messages in thread
* [PATCH 1/3] CPU hotplug: Fix issues with callback registration @ 2012-03-01 8:15 ` Srivatsa S. Bhat 0 siblings, 0 replies; 45+ messages in thread From: Srivatsa S. Bhat @ 2012-03-01 8:27 UTC (permalink / raw) To: Ingo Molnar Cc: sparclinux, Andi Kleen, Nick Piggin, KOSAKI Motohiro, Rusty Russell, linux-kernel, Rafael J. Wysocki, Paul Gortmaker, Alexander Viro, Arjan van de Ven, linux-fsdevel, Andrew Morton, Paul E. McKenney, ppc-dev, David S. Miller, Peter Zijlstra Currently, there are several intertwined problems with CPU hotplug callback registration: Code which needs to get notified of CPU hotplug events and additionally wants to do something for each already online CPU, would typically do something like: register_cpu_notifier(&foobar_cpu_notifier); <====== "A" get_online_cpus(); for_each_online_cpu(cpu) { /* Do something */ } put_online_cpus(); At the point marked as "A", a CPU hotplug event could sneak in, leaving the code confused. Moving the registration to after put_online_cpus() won't help either, because we could be losing a CPU hotplug event between put_online_cpus() and the callback registration. Also, doing the registration inside the get/put_online_cpus() block is also not going to help, because it will lead to ABBA deadlock with CPU hotplug, the 2 locks being cpu_add_remove_lock and cpu_hotplug lock. It is also to be noted that, at times, we might want to do different setups or initializations depending on whether a CPU is coming online for the first time (as part of booting) or whether it is being only soft-onlined at a later point in time. To achieve this, doing something like the code shown above, with the "Do something" being different than what the registered callback does wouldn't work out, because of the race conditions mentioned above. The solution to all this is to include "history replay upon request" within the CPU hotplug callback registration code, while also providing an option for a different callback to be invoked while replaying history. Though the above mentioned race condition was mostly theoretical before, it gets all real when things like asynchronous booting[1] come into the picture, as shown by the PowerPC boot failure in [2]. So this fix is also a step forward in getting cool things like asynchronous booting to work properly. References: [1]. https://lkml.org/lkml/2012/2/14/62 --- include/linux/cpu.h | 15 +++++++++++++++ kernel/cpu.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 61 insertions(+), 3 deletions(-) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 6e53b48..90a6d76 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -124,16 +124,25 @@ enum { #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ #ifdef CONFIG_HOTPLUG_CPU extern int register_cpu_notifier(struct notifier_block *nb); +extern int register_allcpu_notifier(struct notifier_block *nb, + bool replay_history, int (*history_setup)(void)); extern void unregister_cpu_notifier(struct notifier_block *nb); #else #ifndef MODULE extern int register_cpu_notifier(struct notifier_block *nb); +extern int register_allcpu_notifier(struct notifier_block *nb, + bool replay_history, int (*history_setup)(void)); #else static inline int register_cpu_notifier(struct notifier_block *nb) { return 0; } +static inline int register_allcpu_notifier(struct notifier_block *nb, + bool replay_history, int (*history_setup)(void)) +{ + return 0; +} #endif static inline void unregister_cpu_notifier(struct notifier_block *nb) @@ -155,6 +164,12 @@ static inline int register_cpu_notifier(struct notifier_block *nb) return 0; } +static inline int register_allcpu_notifier(struct notifier_block *nb, + bool replay_history, int (*history_setup)(void)) +{ + return 0; +} + static inline void unregister_cpu_notifier(struct notifier_block *nb) { } diff --git a/kernel/cpu.c b/kernel/cpu.c index d520d34..1564c1d 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -132,12 +132,56 @@ static void cpu_hotplug_done(void) {} /* Need to know about CPUs going up/down? */ int __ref register_cpu_notifier(struct notifier_block *nb) { - int ret; + return register_allcpu_notifier(nb, false, NULL); +} +EXPORT_SYMBOL(register_cpu_notifier); + +int __ref register_allcpu_notifier(struct notifier_block *nb, + bool replay_history, int (*history_setup)(void)) +{ + int cpu, ret = 0; + + if (!replay_history && history_setup) + return -EINVAL; + cpu_maps_update_begin(); - ret = raw_notifier_chain_register(&cpu_chain, nb); + /* + * We don't race with CPU hotplug, because we just took the + * cpu_add_remove_lock. + */ + + if (!replay_history) + goto Register; + + if (history_setup) { + /* + * The caller has a special setup routine to rewrite + * history as he desires. Just invoke it. Don't + * proceed with callback registration if this setup is + * unsuccessful. + */ + ret = history_setup(); + } else { + /* + * Fallback to the usual callback, if a special handler + * for past CPU hotplug events is not specified. + * In this case, we will replay only past CPU bring-up + * events. + */ + for_each_online_cpu(cpu) { + nb->notifier_call(nb, CPU_UP_PREPARE, cpu); + nb->notifier_call(nb, CPU_ONLINE, cpu); + } + } + + Register: + if (!ret) + ret = raw_notifier_chain_register(&cpu_chain, nb); + cpu_maps_update_done(); return ret; } +EXPORT_SYMBOL(register_allcpu_notifier); static int __cpu_notify(unsigned long val, void *v, int nr_to_call, int *nr_calls) @@ -161,7 +205,6 @@ static void cpu_notify_nofail(unsigned long val, void *v) { BUG_ON(cpu_notify(val, v)); } -EXPORT_SYMBOL(register_cpu_notifier); void __ref unregister_cpu_notifier(struct notifier_block *nb) { ^ permalink raw reply related [flat|nested] 45+ messages in thread
* [PATCH 1/3] CPU hotplug: Fix issues with callback registration @ 2012-03-01 8:15 ` Srivatsa S. Bhat 0 siblings, 0 replies; 45+ messages in thread From: Srivatsa S. Bhat @ 2012-03-01 8:15 UTC (permalink / raw) To: Ingo Molnar Cc: Andi Kleen, Nick Piggin, Paul E. McKenney, Rusty Russell, linux-kernel, Rafael J. Wysocki, Paul Gortmaker, Alexander Viro, KOSAKI Motohiro, sparclinux, linux-fsdevel, Andrew Morton, Arjan van de Ven, ppc-dev, David S. Miller, Peter Zijlstra Currently, there are several intertwined problems with CPU hotplug callback registration: Code which needs to get notified of CPU hotplug events and additionally wants to do something for each already online CPU, would typically do something like: register_cpu_notifier(&foobar_cpu_notifier); <============ "A" get_online_cpus(); for_each_online_cpu(cpu) { /* Do something */ } put_online_cpus(); At the point marked as "A", a CPU hotplug event could sneak in, leaving the code confused. Moving the registration to after put_online_cpus() won't help either, because we could be losing a CPU hotplug event between put_online_cpus() and the callback registration. Also, doing the registration inside the get/put_online_cpus() block is also not going to help, because it will lead to ABBA deadlock with CPU hotplug, the 2 locks being cpu_add_remove_lock and cpu_hotplug lock. It is also to be noted that, at times, we might want to do different setups or initializations depending on whether a CPU is coming online for the first time (as part of booting) or whether it is being only soft-onlined at a later point in time. To achieve this, doing something like the code shown above, with the "Do something" being different than what the registered callback does wouldn't work out, because of the race conditions mentioned above. The solution to all this is to include "history replay upon request" within the CPU hotplug callback registration code, while also providing an option for a different callback to be invoked while replaying history. Though the above mentioned race condition was mostly theoretical before, it gets all real when things like asynchronous booting[1] come into the picture, as shown by the PowerPC boot failure in [2]. So this fix is also a step forward in getting cool things like asynchronous booting to work properly. References: [1]. https://lkml.org/lkml/2012/2/14/62 --- include/linux/cpu.h | 15 +++++++++++++++ kernel/cpu.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 61 insertions(+), 3 deletions(-) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 6e53b48..90a6d76 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -124,16 +124,25 @@ enum { #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ #ifdef CONFIG_HOTPLUG_CPU extern int register_cpu_notifier(struct notifier_block *nb); +extern int register_allcpu_notifier(struct notifier_block *nb, + bool replay_history, int (*history_setup)(void)); extern void unregister_cpu_notifier(struct notifier_block *nb); #else #ifndef MODULE extern int register_cpu_notifier(struct notifier_block *nb); +extern int register_allcpu_notifier(struct notifier_block *nb, + bool replay_history, int (*history_setup)(void)); #else static inline int register_cpu_notifier(struct notifier_block *nb) { return 0; } +static inline int register_allcpu_notifier(struct notifier_block *nb, + bool replay_history, int (*history_setup)(void)) +{ + return 0; +} #endif static inline void unregister_cpu_notifier(struct notifier_block *nb) @@ -155,6 +164,12 @@ static inline int register_cpu_notifier(struct notifier_block *nb) return 0; } +static inline int register_allcpu_notifier(struct notifier_block *nb, + bool replay_history, int (*history_setup)(void)) +{ + return 0; +} + static inline void unregister_cpu_notifier(struct notifier_block *nb) { } diff --git a/kernel/cpu.c b/kernel/cpu.c index d520d34..1564c1d 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -132,12 +132,56 @@ static void cpu_hotplug_done(void) {} /* Need to know about CPUs going up/down? */ int __ref register_cpu_notifier(struct notifier_block *nb) { - int ret; + return register_allcpu_notifier(nb, false, NULL); +} +EXPORT_SYMBOL(register_cpu_notifier); + +int __ref register_allcpu_notifier(struct notifier_block *nb, + bool replay_history, int (*history_setup)(void)) +{ + int cpu, ret = 0; + + if (!replay_history && history_setup) + return -EINVAL; + cpu_maps_update_begin(); - ret = raw_notifier_chain_register(&cpu_chain, nb); + /* + * We don't race with CPU hotplug, because we just took the + * cpu_add_remove_lock. + */ + + if (!replay_history) + goto Register; + + if (history_setup) { + /* + * The caller has a special setup routine to rewrite + * history as he desires. Just invoke it. Don't + * proceed with callback registration if this setup is + * unsuccessful. + */ + ret = history_setup(); + } else { + /* + * Fallback to the usual callback, if a special handler + * for past CPU hotplug events is not specified. + * In this case, we will replay only past CPU bring-up + * events. + */ + for_each_online_cpu(cpu) { + nb->notifier_call(nb, CPU_UP_PREPARE, cpu); + nb->notifier_call(nb, CPU_ONLINE, cpu); + } + } + + Register: + if (!ret) + ret = raw_notifier_chain_register(&cpu_chain, nb); + cpu_maps_update_done(); return ret; } +EXPORT_SYMBOL(register_allcpu_notifier); static int __cpu_notify(unsigned long val, void *v, int nr_to_call, int *nr_calls) @@ -161,7 +205,6 @@ static void cpu_notify_nofail(unsigned long val, void *v) { BUG_ON(cpu_notify(val, v)); } -EXPORT_SYMBOL(register_cpu_notifier); void __ref unregister_cpu_notifier(struct notifier_block *nb) { ^ permalink raw reply related [flat|nested] 45+ messages in thread
* [PATCH 2/3] CPU hotplug, arch/powerpc: Fix CPU hotplug callback registration 2012-03-01 8:12 ` Srivatsa S. Bhat (?) @ 2012-03-01 8:16 ` Srivatsa S. Bhat -1 siblings, 0 replies; 45+ messages in thread From: Srivatsa S. Bhat @ 2012-03-01 8:16 UTC (permalink / raw) To: Ingo Molnar Cc: sparclinux, Andi Kleen, Nick Piggin, KOSAKI Motohiro, Rusty Russell, linux-kernel, Rafael J. Wysocki, Paul Gortmaker, Alexander Viro, Arjan van de Ven, linux-fsdevel, Andrew Morton, Paul E. McKenney, ppc-dev, David S. Miller, Peter Zijlstra Restructure CPU hotplug setup and callback registration in topology_init so as to be race-free. --- arch/powerpc/kernel/sysfs.c | 44 +++++++++++++++++++++++++++++++++++-------- arch/powerpc/mm/numa.c | 11 ++++++++--- 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 883e74c..5838b33 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -496,6 +496,38 @@ ssize_t arch_cpu_release(const char *buf, size_t count) #endif /* CONFIG_HOTPLUG_CPU */ +static void cpu_register_helper(struct cpu *c, int cpu) +{ + register_cpu(c, cpu); + device_create_file(&c->dev, &dev_attr_physical_id); +} + +static int __cpuinit sysfs_cpu_notify_first_time(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned int)(long)hcpu; + struct cpu *c = &per_cpu(cpu_devices, cpu); + + if (action == CPU_ONLINE) + if (!c->hotpluggable) /* Avoid duplicate registrations */ + cpu_register_helper(c, cpu); + register_cpu_online(cpu); + } + return NOTIFY_OK; +} +static int __cpuinit sysfs_cpu_notify_setup(void) +{ + int cpu; + + /* + * We don't race with CPU hotplug because we are called from + * the CPU hotplug callback registration function. + */ + for_each_online_cpu(cpu) + sysfs_cpu_notify_first_time(NULL, CPU_ONLINE, cpu); + + return 0; +} static int __cpuinit sysfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { @@ -637,7 +669,6 @@ static int __init topology_init(void) int cpu; register_nodes(); - register_cpu_notifier(&sysfs_cpu_nb); for_each_possible_cpu(cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); @@ -652,15 +683,12 @@ static int __init topology_init(void) if (ppc_md.cpu_die) c->hotpluggable = 1; - if (cpu_online(cpu) || c->hotpluggable) { - register_cpu(c, cpu); + if (c->hotpluggable) + cpu_register_helper(c, cpu); + } - device_create_file(&c->dev, &dev_attr_physical_id); - } + register_allcpu_notifier(&sysfs_cpu_nb, true, &sysfs_cpu_notify_setup); - if (cpu_online(cpu)) - register_cpu_online(cpu); - } #ifdef CONFIG_PPC64 sysfs_create_dscr_default(); #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 3feefc3..e326455 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1014,6 +1014,13 @@ static void __init mark_reserved_regions_for_nid(int nid) } } +static int __cpuinit cpu_numa_callback_setup(void) +{ + cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, + (void *)(unsigned long)boot_cpuid); + return 0; +} + void __init do_init_bootmem(void) { @@ -1088,9 +1095,7 @@ void __init do_init_bootmem(void) */ setup_node_to_cpumask_map(); - register_cpu_notifier(&ppc64_numa_nb); - cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, - (void *)(unsigned long)boot_cpuid); + register_allcpu_notifier(&ppc64_numa_nb, true, &cpu_numa_callback_setup); } void __init paging_init(void) ^ permalink raw reply related [flat|nested] 45+ messages in thread
* [PATCH 2/3] CPU hotplug, arch/powerpc: Fix CPU hotplug callback registration @ 2012-03-01 8:16 ` Srivatsa S. Bhat 0 siblings, 0 replies; 45+ messages in thread From: Srivatsa S. Bhat @ 2012-03-01 8:28 UTC (permalink / raw) To: Ingo Molnar Cc: sparclinux, Andi Kleen, Nick Piggin, KOSAKI Motohiro, Rusty Russell, linux-kernel, Rafael J. Wysocki, Paul Gortmaker, Alexander Viro, Arjan van de Ven, linux-fsdevel, Andrew Morton, Paul E. McKenney, ppc-dev, David S. Miller, Peter Zijlstra Restructure CPU hotplug setup and callback registration in topology_init so as to be race-free. --- arch/powerpc/kernel/sysfs.c | 44 +++++++++++++++++++++++++++++++++++-------- arch/powerpc/mm/numa.c | 11 ++++++++--- 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 883e74c..5838b33 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -496,6 +496,38 @@ ssize_t arch_cpu_release(const char *buf, size_t count) #endif /* CONFIG_HOTPLUG_CPU */ +static void cpu_register_helper(struct cpu *c, int cpu) +{ + register_cpu(c, cpu); + device_create_file(&c->dev, &dev_attr_physical_id); +} + +static int __cpuinit sysfs_cpu_notify_first_time(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned int)(long)hcpu; + struct cpu *c = &per_cpu(cpu_devices, cpu); + + if (action = CPU_ONLINE) + if (!c->hotpluggable) /* Avoid duplicate registrations */ + cpu_register_helper(c, cpu); + register_cpu_online(cpu); + } + return NOTIFY_OK; +} +static int __cpuinit sysfs_cpu_notify_setup(void) +{ + int cpu; + + /* + * We don't race with CPU hotplug because we are called from + * the CPU hotplug callback registration function. + */ + for_each_online_cpu(cpu) + sysfs_cpu_notify_first_time(NULL, CPU_ONLINE, cpu); + + return 0; +} static int __cpuinit sysfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { @@ -637,7 +669,6 @@ static int __init topology_init(void) int cpu; register_nodes(); - register_cpu_notifier(&sysfs_cpu_nb); for_each_possible_cpu(cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); @@ -652,15 +683,12 @@ static int __init topology_init(void) if (ppc_md.cpu_die) c->hotpluggable = 1; - if (cpu_online(cpu) || c->hotpluggable) { - register_cpu(c, cpu); + if (c->hotpluggable) + cpu_register_helper(c, cpu); + } - device_create_file(&c->dev, &dev_attr_physical_id); - } + register_allcpu_notifier(&sysfs_cpu_nb, true, &sysfs_cpu_notify_setup); - if (cpu_online(cpu)) - register_cpu_online(cpu); - } #ifdef CONFIG_PPC64 sysfs_create_dscr_default(); #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 3feefc3..e326455 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1014,6 +1014,13 @@ static void __init mark_reserved_regions_for_nid(int nid) } } +static int __cpuinit cpu_numa_callback_setup(void) +{ + cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, + (void *)(unsigned long)boot_cpuid); + return 0; +} + void __init do_init_bootmem(void) { @@ -1088,9 +1095,7 @@ void __init do_init_bootmem(void) */ setup_node_to_cpumask_map(); - register_cpu_notifier(&ppc64_numa_nb); - cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, - (void *)(unsigned long)boot_cpuid); + register_allcpu_notifier(&ppc64_numa_nb, true, &cpu_numa_callback_setup); } void __init paging_init(void) ^ permalink raw reply related [flat|nested] 45+ messages in thread
* [PATCH 2/3] CPU hotplug, arch/powerpc: Fix CPU hotplug callback registration @ 2012-03-01 8:16 ` Srivatsa S. Bhat 0 siblings, 0 replies; 45+ messages in thread From: Srivatsa S. Bhat @ 2012-03-01 8:16 UTC (permalink / raw) To: Ingo Molnar Cc: Andi Kleen, Nick Piggin, Paul E. McKenney, Rusty Russell, linux-kernel, Rafael J. Wysocki, Paul Gortmaker, Alexander Viro, KOSAKI Motohiro, sparclinux, linux-fsdevel, Andrew Morton, Arjan van de Ven, ppc-dev, David S. Miller, Peter Zijlstra Restructure CPU hotplug setup and callback registration in topology_init so as to be race-free. --- arch/powerpc/kernel/sysfs.c | 44 +++++++++++++++++++++++++++++++++++-------- arch/powerpc/mm/numa.c | 11 ++++++++--- 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 883e74c..5838b33 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -496,6 +496,38 @@ ssize_t arch_cpu_release(const char *buf, size_t count) #endif /* CONFIG_HOTPLUG_CPU */ +static void cpu_register_helper(struct cpu *c, int cpu) +{ + register_cpu(c, cpu); + device_create_file(&c->dev, &dev_attr_physical_id); +} + +static int __cpuinit sysfs_cpu_notify_first_time(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned int)(long)hcpu; + struct cpu *c = &per_cpu(cpu_devices, cpu); + + if (action == CPU_ONLINE) + if (!c->hotpluggable) /* Avoid duplicate registrations */ + cpu_register_helper(c, cpu); + register_cpu_online(cpu); + } + return NOTIFY_OK; +} +static int __cpuinit sysfs_cpu_notify_setup(void) +{ + int cpu; + + /* + * We don't race with CPU hotplug because we are called from + * the CPU hotplug callback registration function. + */ + for_each_online_cpu(cpu) + sysfs_cpu_notify_first_time(NULL, CPU_ONLINE, cpu); + + return 0; +} static int __cpuinit sysfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { @@ -637,7 +669,6 @@ static int __init topology_init(void) int cpu; register_nodes(); - register_cpu_notifier(&sysfs_cpu_nb); for_each_possible_cpu(cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); @@ -652,15 +683,12 @@ static int __init topology_init(void) if (ppc_md.cpu_die) c->hotpluggable = 1; - if (cpu_online(cpu) || c->hotpluggable) { - register_cpu(c, cpu); + if (c->hotpluggable) + cpu_register_helper(c, cpu); + } - device_create_file(&c->dev, &dev_attr_physical_id); - } + register_allcpu_notifier(&sysfs_cpu_nb, true, &sysfs_cpu_notify_setup); - if (cpu_online(cpu)) - register_cpu_online(cpu); - } #ifdef CONFIG_PPC64 sysfs_create_dscr_default(); #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 3feefc3..e326455 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1014,6 +1014,13 @@ static void __init mark_reserved_regions_for_nid(int nid) } } +static int __cpuinit cpu_numa_callback_setup(void) +{ + cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, + (void *)(unsigned long)boot_cpuid); + return 0; +} + void __init do_init_bootmem(void) { @@ -1088,9 +1095,7 @@ void __init do_init_bootmem(void) */ setup_node_to_cpumask_map(); - register_cpu_notifier(&ppc64_numa_nb); - cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, - (void *)(unsigned long)boot_cpuid); + register_allcpu_notifier(&ppc64_numa_nb, true, &cpu_numa_callback_setup); } void __init paging_init(void) ^ permalink raw reply related [flat|nested] 45+ messages in thread
* [PATCH 3/3] CPU hotplug, arch/sparc: Fix CPU hotplug callback registration 2012-03-01 8:12 ` Srivatsa S. Bhat (?) @ 2012-03-01 8:18 ` Srivatsa S. Bhat -1 siblings, 0 replies; 45+ messages in thread From: Srivatsa S. Bhat @ 2012-03-01 8:18 UTC (permalink / raw) To: Ingo Molnar Cc: sparclinux, Andi Kleen, Nick Piggin, KOSAKI Motohiro, Rusty Russell, linux-kernel, Rafael J. Wysocki, Paul Gortmaker, Alexander Viro, Arjan van de Ven, linux-fsdevel, Andrew Morton, Paul E. McKenney, ppc-dev, David S. Miller, Peter Zijlstra Restructure CPU hotplug setup and callback registration in topology_init so as to be race-free. --- arch/sparc/kernel/sysfs.c | 6 ++---- 1 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c index 654e8aa..22cb881 100644 --- a/arch/sparc/kernel/sysfs.c +++ b/arch/sparc/kernel/sysfs.c @@ -300,16 +300,14 @@ static int __init topology_init(void) check_mmu_stats(); - register_cpu_notifier(&sysfs_cpu_nb); - for_each_possible_cpu(cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); register_cpu(c, cpu); - if (cpu_online(cpu)) - register_cpu_online(cpu); } + register_allcpu_notifier(&sysfs_cpu_nb, true, NULL); + return 0; } ^ permalink raw reply related [flat|nested] 45+ messages in thread
* [PATCH 3/3] CPU hotplug, arch/sparc: Fix CPU hotplug callback registration @ 2012-03-01 8:18 ` Srivatsa S. Bhat 0 siblings, 0 replies; 45+ messages in thread From: Srivatsa S. Bhat @ 2012-03-01 8:30 UTC (permalink / raw) To: Ingo Molnar Cc: sparclinux, Andi Kleen, Nick Piggin, KOSAKI Motohiro, Rusty Russell, linux-kernel, Rafael J. Wysocki, Paul Gortmaker, Alexander Viro, Arjan van de Ven, linux-fsdevel, Andrew Morton, Paul E. McKenney, ppc-dev, David S. Miller, Peter Zijlstra Restructure CPU hotplug setup and callback registration in topology_init so as to be race-free. --- arch/sparc/kernel/sysfs.c | 6 ++---- 1 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c index 654e8aa..22cb881 100644 --- a/arch/sparc/kernel/sysfs.c +++ b/arch/sparc/kernel/sysfs.c @@ -300,16 +300,14 @@ static int __init topology_init(void) check_mmu_stats(); - register_cpu_notifier(&sysfs_cpu_nb); - for_each_possible_cpu(cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); register_cpu(c, cpu); - if (cpu_online(cpu)) - register_cpu_online(cpu); } + register_allcpu_notifier(&sysfs_cpu_nb, true, NULL); + return 0; } ^ permalink raw reply related [flat|nested] 45+ messages in thread
* [PATCH 3/3] CPU hotplug, arch/sparc: Fix CPU hotplug callback registration @ 2012-03-01 8:18 ` Srivatsa S. Bhat 0 siblings, 0 replies; 45+ messages in thread From: Srivatsa S. Bhat @ 2012-03-01 8:18 UTC (permalink / raw) To: Ingo Molnar Cc: Andi Kleen, Nick Piggin, Paul E. McKenney, Rusty Russell, linux-kernel, Rafael J. Wysocki, Paul Gortmaker, Alexander Viro, KOSAKI Motohiro, sparclinux, linux-fsdevel, Andrew Morton, Arjan van de Ven, ppc-dev, David S. Miller, Peter Zijlstra Restructure CPU hotplug setup and callback registration in topology_init so as to be race-free. --- arch/sparc/kernel/sysfs.c | 6 ++---- 1 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c index 654e8aa..22cb881 100644 --- a/arch/sparc/kernel/sysfs.c +++ b/arch/sparc/kernel/sysfs.c @@ -300,16 +300,14 @@ static int __init topology_init(void) check_mmu_stats(); - register_cpu_notifier(&sysfs_cpu_nb); - for_each_possible_cpu(cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); register_cpu(c, cpu); - if (cpu_online(cpu)) - register_cpu_online(cpu); } + register_allcpu_notifier(&sysfs_cpu_nb, true, NULL); + return 0; } ^ permalink raw reply related [flat|nested] 45+ messages in thread
* Re: [PATCH] cpumask: fix lg_lock/br_lock. 2012-02-28 21:27 ` Andrew Morton 2012-02-29 5:44 ` Srivatsa S. Bhat @ 2012-02-29 8:29 ` Ingo Molnar 2012-02-29 8:58 ` Peter Zijlstra 1 sibling, 1 reply; 45+ messages in thread From: Ingo Molnar @ 2012-02-29 8:29 UTC (permalink / raw) To: Andrew Morton Cc: Rusty Russell, Nick Piggin, linux-kernel, Alexander Viro, Andi Kleen, Srivatsa S. Bhat, linux-fsdevel, Peter Zijlstra * Andrew Morton <akpm@linux-foundation.org> wrote: > On Tue, 28 Feb 2012 09:43:59 +0100 > Ingo Molnar <mingo@elte.hu> wrote: > > > This patch should also probably go upstream through the > > locking/lockdep tree? Mind sending it us once you think it's > > ready? > > Oh goody, that means you own > http://marc.info/?l=linux-kernel&m=131419353511653&w=2. > > I do think the brlock code is sick, but Nick has turned into > an ex-Nick and nobody works on it. The main problem, highlighted by the above soft lockup report, is that lglocks and brlocks go outside the regular spinlock facilities and thus avoid quite a bit of generic lock debugging for no good reason. Those patches should never have gone upstream in their incomplete form via the VFS tree. As part of any cleanup they should first be converted from arch_spinlock_t to regular spinlock_t - I bet if that is done then that not only simplifies the wrappers massively, it also turns the above soft lockup report into a nice, actionable lockdep splat. Andi's deficient patch does not address that main shortcoming of brlocks/lglocks, it just addresses a symptom - while introducing a handful of new symptoms of suckage. I'll review and process resubmitted patches from Andi if he wants to submit a proper, complete series - but there's a quality threshold and in this case I'd rather keep 1970's preprocessor code that sucks very visibly and possibly attracts someone with a clue than have a sloppy patch hiding the deeper design problems done by a clueless person who is also openly hostile towards the concept of producing quality code. Thanks, Ingo ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH] cpumask: fix lg_lock/br_lock. 2012-02-29 8:29 ` [PATCH] cpumask: fix lg_lock/br_lock Ingo Molnar @ 2012-02-29 8:58 ` Peter Zijlstra 2012-02-29 9:32 ` Ingo Molnar 0 siblings, 1 reply; 45+ messages in thread From: Peter Zijlstra @ 2012-02-29 8:58 UTC (permalink / raw) To: Ingo Molnar Cc: Andrew Morton, Rusty Russell, Nick Piggin, linux-kernel, Alexander Viro, Andi Kleen, Srivatsa S. Bhat, linux-fsdevel On Wed, 2012-02-29 at 09:29 +0100, Ingo Molnar wrote: > As part of any cleanup they should first be converted from > arch_spinlock_t to regular spinlock_t - I bet if that is done > then that not only simplifies the wrappers massively, it also > turns the above soft lockup report into a nice, actionable > lockdep splat. It might help if you'd actually read the code.. that will simply not work. ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH] cpumask: fix lg_lock/br_lock. 2012-02-29 8:58 ` Peter Zijlstra @ 2012-02-29 9:32 ` Ingo Molnar 0 siblings, 0 replies; 45+ messages in thread From: Ingo Molnar @ 2012-02-29 9:32 UTC (permalink / raw) To: Peter Zijlstra Cc: Andrew Morton, Rusty Russell, Nick Piggin, linux-kernel, Alexander Viro, Andi Kleen, Srivatsa S. Bhat, linux-fsdevel * Peter Zijlstra <a.p.zijlstra@chello.nl> wrote: > On Wed, 2012-02-29 at 09:29 +0100, Ingo Molnar wrote: > > > As part of any cleanup they should first be converted from > > arch_spinlock_t to regular spinlock_t - I bet if that is > > done then that not only simplifies the wrappers massively, > > it also turns the above soft lockup report into a nice, > > actionable lockdep splat. > > It might help if you'd actually read the code.. that will > simply not work. It cannot find all bugs - such as the CPU hotplug race that is still present in the code. Still there's no excuse to go outside regular spinlock debug primitives via arch_spinlock_t. If lockdep blows up in br_write_lock() due to holding up to 4096 individual locks then we should add the exceptions to this particular write lock when the CPU count is too high - but: - do not disable the checking on saner configs - not disable all the *OTHER* lock debugging checks such as: - spin-lockup detection [this works even without ->held_locks] - allocate/free failure detection: The percpu code could be extended to run the equivalent of debug_check_no_locks_freed() over the percpu area that is going away, to make sure no held locks are freed. etc. Thanks, Ingo ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH] cpumask: fix lg_lock/br_lock. 2012-02-27 23:53 ` Andrew Morton (?) (?) @ 2012-02-28 11:24 ` Andi Kleen 2012-03-05 7:02 ` Rusty Russell ` (3 more replies) -1 siblings, 4 replies; 45+ messages in thread From: Andi Kleen @ 2012-02-28 11:24 UTC (permalink / raw) To: Andrew Morton Cc: Rusty Russell, Nick Piggin, linux-kernel, Alexander Viro, Srivatsa S. Bhat, linux-fsdevel On Mon, Feb 27, 2012 at 03:53:38PM -0800, Andrew Morton wrote: > On Tue, 28 Feb 2012 09:52:30 +1030 > Rusty Russell <rusty@rustcorp.com.au> wrote: > > > Use a cpumask_var_t instead of cpumask_t. We're doing plenty of > > allocations here anyway, so it's not really an issue, and it sets a > > good example. > > > > (cpumask_t is obsolescent, as are the cpus_* functions). > > Congratulations to yourself and Andi: Well the only way to avoid that problem is to merge it ASAP. The patch -- like most code movement patches -- is totally unsuitable for keeping around for months in trees. Usually I found the best way to do merges with code movement patches is to edit the patches itself if possible. Maybe some day we'll get patch like tools that can deal with this stuff better. Rusty: you should probably have cocci rules for this stuff, not manual patches. -Andi ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH] cpumask: fix lg_lock/br_lock. 2012-02-28 11:24 ` Andi Kleen @ 2012-03-05 7:02 ` Rusty Russell 2012-03-05 7:03 ` [PATCH 1/3] lglock: remove online variants of lock Rusty Russell ` (2 subsequent siblings) 3 siblings, 0 replies; 45+ messages in thread From: Rusty Russell @ 2012-03-05 7:02 UTC (permalink / raw) To: Andi Kleen, Andrew Morton Cc: Nick Piggin, linux-kernel, Alexander Viro, Srivatsa S. Bhat, linux-fsdevel On Tue, 28 Feb 2012 03:24:22 -0800, Andi Kleen <ak@linux.intel.com> wrote: > Rusty: you should probably have cocci rules for this stuff, not manual > patches. Yes, though hopefully this is the last. I've updated your patch based on Ingo's peevish feedback and a patch to rip out the online stuff; posting now... Thanks, Rusty. -- Please can you help my fiance Alex? http://baldalex.org ^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 1/3] lglock: remove online variants of lock 2012-02-28 11:24 ` Andi Kleen 2012-03-05 7:02 ` Rusty Russell @ 2012-03-05 7:03 ` Rusty Russell 2012-04-20 11:12 ` Nick Piggin 2012-03-05 7:04 ` [PATCH 2/3] brlocks/lglocks: API cleanups Rusty Russell 2012-03-05 7:05 ` [PATCH 3/3] brlocks/lglocks: turn into functions Rusty Russell 3 siblings, 1 reply; 45+ messages in thread From: Rusty Russell @ 2012-03-05 7:03 UTC (permalink / raw) To: Andi Kleen, Andrew Morton Cc: Nick Piggin, linux-kernel, Alexander Viro, Srivatsa S. Bhat, linux-fsdevel Optimizing the slow paths adds a lot of complexity. If you need to grab every lock often, you have other problems. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> --- include/linux/lglock.h | 58 +------------------------------------------------ 1 file changed, 2 insertions(+), 56 deletions(-) diff --git a/include/linux/lglock.h b/include/linux/lglock.h --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -28,8 +28,8 @@ #define br_lock_init(name) name##_lock_init() #define br_read_lock(name) name##_local_lock() #define br_read_unlock(name) name##_local_unlock() -#define br_write_lock(name) name##_global_lock_online() -#define br_write_unlock(name) name##_global_unlock_online() +#define br_write_lock(name) name##_global_lock() +#define br_write_unlock(name) name##_global_unlock() #define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) #define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) @@ -42,8 +42,6 @@ #define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) #define lg_global_lock(name) name##_global_lock() #define lg_global_unlock(name) name##_global_unlock() -#define lg_global_lock_online(name) name##_global_lock_online() -#define lg_global_unlock_online(name) name##_global_unlock_online() #ifdef CONFIG_DEBUG_LOCK_ALLOC #define LOCKDEP_INIT_MAP lockdep_init_map @@ -68,36 +66,13 @@ extern void name##_local_unlock_cpu(int cpu); \ extern void name##_global_lock(void); \ extern void name##_global_unlock(void); \ - extern void name##_global_lock_online(void); \ - extern void name##_global_unlock_online(void); \ #define DEFINE_LGLOCK(name) \ \ DEFINE_SPINLOCK(name##_cpu_lock); \ - cpumask_t name##_cpus __read_mostly; \ DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ DEFINE_LGLOCK_LOCKDEP(name); \ \ - static int \ - name##_lg_cpu_callback(struct notifier_block *nb, \ - unsigned long action, void *hcpu) \ - { \ - switch (action & ~CPU_TASKS_FROZEN) { \ - case CPU_UP_PREPARE: \ - spin_lock(&name##_cpu_lock); \ - cpu_set((unsigned long)hcpu, name##_cpus); \ - spin_unlock(&name##_cpu_lock); \ - break; \ - case CPU_UP_CANCELED: case CPU_DEAD: \ - spin_lock(&name##_cpu_lock); \ - cpu_clear((unsigned long)hcpu, name##_cpus); \ - spin_unlock(&name##_cpu_lock); \ - } \ - return NOTIFY_OK; \ - } \ - static struct notifier_block name##_lg_cpu_notifier = { \ - .notifier_call = name##_lg_cpu_callback, \ - }; \ void name##_lock_init(void) { \ int i; \ LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ @@ -106,11 +81,6 @@ lock = &per_cpu(name##_lock, i); \ *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ } \ - register_hotcpu_notifier(&name##_lg_cpu_notifier); \ - get_online_cpus(); \ - for_each_online_cpu(i) \ - cpu_set(i, name##_cpus); \ - put_online_cpus(); \ } \ EXPORT_SYMBOL(name##_lock_init); \ \ @@ -150,30 +120,6 @@ } \ EXPORT_SYMBOL(name##_local_unlock_cpu); \ \ - void name##_global_lock_online(void) { \ - int i; \ - spin_lock(&name##_cpu_lock); \ - rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_cpu(i, &name##_cpus) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_lock(lock); \ - } \ - } \ - EXPORT_SYMBOL(name##_global_lock_online); \ - \ - void name##_global_unlock_online(void) { \ - int i; \ - rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_cpu(i, &name##_cpus) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_unlock(lock); \ - } \ - spin_unlock(&name##_cpu_lock); \ - } \ - EXPORT_SYMBOL(name##_global_unlock_online); \ - \ void name##_global_lock(void) { \ int i; \ preempt_disable(); \ ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH 1/3] lglock: remove online variants of lock 2012-03-05 7:03 ` [PATCH 1/3] lglock: remove online variants of lock Rusty Russell @ 2012-04-20 11:12 ` Nick Piggin 0 siblings, 0 replies; 45+ messages in thread From: Nick Piggin @ 2012-04-20 11:12 UTC (permalink / raw) To: Rusty Russell Cc: Andi Kleen, Andrew Morton, Nick Piggin, linux-kernel, Alexander Viro, Srivatsa S. Bhat, linux-fsdevel On Mon, Mar 05, 2012 at 05:33:41PM +1030, Rusty Russell wrote: > Optimizing the slow paths adds a lot of complexity. If you need to > grab every lock often, you have other problems. > > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> Someone might cry about online vs possible discrepancy, but I don't really mind. Acked-by: Nick Piggin <npiggin@kernel.dk> ^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 2/3] brlocks/lglocks: API cleanups 2012-02-28 11:24 ` Andi Kleen 2012-03-05 7:02 ` Rusty Russell 2012-03-05 7:03 ` [PATCH 1/3] lglock: remove online variants of lock Rusty Russell @ 2012-03-05 7:04 ` Rusty Russell 2012-03-05 7:05 ` [PATCH 3/3] brlocks/lglocks: turn into functions Rusty Russell 3 siblings, 0 replies; 45+ messages in thread From: Rusty Russell @ 2012-03-05 7:04 UTC (permalink / raw) To: Andi Kleen, Andrew Morton Cc: Nick Piggin, linux-kernel, Alexander Viro, Srivatsa S. Bhat, linux-fsdevel From: Andi Kleen <ak@linux.intel.com> lglocks and brlocks are currently generated with some complicated macros in lglock.h. But there's no reason to not just use common utility functions and put all the data into a common data structure. In preparation, this patch changes the API to look more like normal function calls with pointers, not magic macros. The patch is rather large because I move over all users in one go to keep it bisectable. This impacts the VFS somewhat in terms of lines changed. But no actual behaviour change. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Andi Kleen <ak@linux.intel.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> --- fs/dcache.c | 4 - fs/file_table.c | 16 ++--- fs/namei.c | 24 ++++---- fs/namespace.c | 140 ++++++++++++++++++++++++++-------------------------- fs/pnode.c | 4 - fs/proc_namespace.c | 4 - 6 files changed, 96 insertions(+), 96 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2489,7 +2489,7 @@ static int prepend_path(const struct pat bool slash = false; int error = 0; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); while (dentry != root->dentry || vfsmnt != root->mnt) { struct dentry * parent; @@ -2520,7 +2520,7 @@ static int prepend_path(const struct pat error = prepend(buffer, buflen, "/", 1); out: - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return error; global_root: diff --git a/fs/file_table.c b/fs/file_table.c --- a/fs/file_table.c +++ b/fs/file_table.c @@ -422,9 +422,9 @@ static inline void __file_sb_list_add(st */ void file_sb_list_add(struct file *file, struct super_block *sb) { - lg_local_lock(files_lglock); + lg_local_lock(&files_lglock); __file_sb_list_add(file, sb); - lg_local_unlock(files_lglock); + lg_local_unlock(&files_lglock); } /** @@ -437,9 +437,9 @@ void file_sb_list_add(struct file *file, void file_sb_list_del(struct file *file) { if (!list_empty(&file->f_u.fu_list)) { - lg_local_lock_cpu(files_lglock, file_list_cpu(file)); + lg_local_lock_cpu(&files_lglock, file_list_cpu(file)); list_del_init(&file->f_u.fu_list); - lg_local_unlock_cpu(files_lglock, file_list_cpu(file)); + lg_local_unlock_cpu(&files_lglock, file_list_cpu(file)); } } @@ -486,7 +486,7 @@ void mark_files_ro(struct super_block *s struct file *f; retry: - lg_global_lock(files_lglock); + lg_global_lock(&files_lglock); do_file_list_for_each_entry(sb, f) { struct vfsmount *mnt; if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) @@ -503,12 +503,12 @@ retry: file_release_write(f); mnt = mntget(f->f_path.mnt); /* This can sleep, so we can't hold the spinlock. */ - lg_global_unlock(files_lglock); + lg_global_unlock(&files_lglock); mnt_drop_write(mnt); mntput(mnt); goto retry; } while_file_list_for_each_entry; - lg_global_unlock(files_lglock); + lg_global_unlock(&files_lglock); } void __init files_init(unsigned long mempages) @@ -526,6 +526,6 @@ void __init files_init(unsigned long mem n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); files_defer_init(); - lg_lock_init(files_lglock); + lg_lock_init(&files_lglock, "files_lglock"); percpu_counter_init(&nr_files, 0); } diff --git a/fs/namei.c b/fs/namei.c --- a/fs/namei.c +++ b/fs/namei.c @@ -462,7 +462,7 @@ static int unlazy_walk(struct nameidata mntget(nd->path.mnt); rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); nd->flags &= ~LOOKUP_RCU; return 0; @@ -520,14 +520,14 @@ static int complete_walk(struct nameidat if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { spin_unlock(&dentry->d_lock); rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return -ECHILD; } BUG_ON(nd->inode != dentry->d_inode); spin_unlock(&dentry->d_lock); mntget(nd->path.mnt); rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); } if (likely(!(nd->flags & LOOKUP_JUMPED))) @@ -694,15 +694,15 @@ int follow_up(struct path *path) struct mount *parent; struct dentry *mountpoint; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); parent = mnt->mnt_parent; if (&parent->mnt == path->mnt) { - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return 0; } mntget(&parent->mnt); mountpoint = dget(mnt->mnt_mountpoint); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); dput(path->dentry); path->dentry = mountpoint; mntput(path->mnt); @@ -960,7 +960,7 @@ failed: if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return -ECHILD; } @@ -1256,7 +1256,7 @@ static void terminate_walk(struct nameid if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); } } @@ -1515,7 +1515,7 @@ static int path_init(int dfd, const char nd->path = nd->root; nd->inode = inode; if (flags & LOOKUP_RCU) { - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); } else { @@ -1528,7 +1528,7 @@ static int path_init(int dfd, const char if (*name=='/') { if (flags & LOOKUP_RCU) { - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); set_root_rcu(nd); } else { @@ -1541,7 +1541,7 @@ static int path_init(int dfd, const char struct fs_struct *fs = current->fs; unsigned seq; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); do { @@ -1577,7 +1577,7 @@ static int path_init(int dfd, const char if (fput_needed) *fp = file; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); } else { path_get(&file->f_path); diff --git a/fs/namespace.c b/fs/namespace.c --- a/fs/namespace.c +++ b/fs/namespace.c @@ -397,7 +397,7 @@ static int mnt_make_readonly(struct moun { int ret = 0; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; /* * After storing MNT_WRITE_HOLD, we'll read the counters. This store @@ -431,15 +431,15 @@ static int mnt_make_readonly(struct moun */ smp_wmb(); mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return ret; } static void __mnt_unmake_readonly(struct mount *mnt) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt->mnt.mnt_flags &= ~MNT_READONLY; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } int sb_prepare_remount_readonly(struct super_block *sb) @@ -451,7 +451,7 @@ int sb_prepare_remount_readonly(struct s if (atomic_long_read(&sb->s_remove_count)) return -EBUSY; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { if (!(mnt->mnt.mnt_flags & MNT_READONLY)) { mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; @@ -473,7 +473,7 @@ int sb_prepare_remount_readonly(struct s if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return err; } @@ -522,14 +522,14 @@ struct vfsmount *lookup_mnt(struct path { struct mount *child_mnt; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); child_mnt = __lookup_mnt(path->mnt, path->dentry, 1); if (child_mnt) { mnt_add_count(child_mnt, 1); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return &child_mnt->mnt; } else { - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return NULL; } } @@ -714,9 +714,9 @@ vfs_kern_mount(struct file_system_type * mnt->mnt.mnt_sb = root->d_sb; mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return &mnt->mnt; } EXPORT_SYMBOL_GPL(vfs_kern_mount); @@ -745,9 +745,9 @@ static struct mount *clone_mnt(struct mo mnt->mnt.mnt_root = dget(root); mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&mnt->mnt_instance, &sb->s_mounts); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); if (flag & CL_SLAVE) { list_add(&mnt->mnt_slave, &old->mnt_slave_list); @@ -803,35 +803,36 @@ static void mntput_no_expire(struct moun { put_again: #ifdef CONFIG_SMP - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); if (likely(atomic_read(&mnt->mnt_longterm))) { mnt_add_count(mnt, -1); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return; } - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt_add_count(mnt, -1); if (mnt_get_count(mnt)) { - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return; } #else mnt_add_count(mnt, -1); if (likely(mnt_get_count(mnt))) return; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); #endif if (unlikely(mnt->mnt_pinned)) { mnt_add_count(mnt, mnt->mnt_pinned + 1); mnt->mnt_pinned = 0; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); acct_auto_close_mnt(&mnt->mnt); goto put_again; } + list_del(&mnt->mnt_instance); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); mntfree(mnt); } @@ -857,21 +858,21 @@ EXPORT_SYMBOL(mntget); void mnt_pin(struct vfsmount *mnt) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); real_mount(mnt)->mnt_pinned++; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *m) { struct mount *mnt = real_mount(m); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (mnt->mnt_pinned) { mnt_add_count(mnt, 1); mnt->mnt_pinned--; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } EXPORT_SYMBOL(mnt_unpin); @@ -988,12 +989,12 @@ int may_umount_tree(struct vfsmount *m) BUG_ON(!m); /* write lock needed for mnt_get_count */ - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); for (p = mnt; p; p = next_mnt(p, mnt)) { actual_refs += mnt_get_count(p); minimum_refs += 2; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); if (actual_refs > minimum_refs) return 0; @@ -1020,10 +1021,10 @@ int may_umount(struct vfsmount *mnt) { int ret = 1; down_read(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (propagate_mount_busy(real_mount(mnt), 2)) ret = 0; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_read(&namespace_sem); return ret; } @@ -1040,13 +1041,13 @@ void release_mounts(struct list_head *he struct dentry *dentry; struct mount *m; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); dentry = mnt->mnt_mountpoint; m = mnt->mnt_parent; mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; m->mnt_ghosts--; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); dput(dentry); mntput(&m->mnt); } @@ -1112,12 +1113,12 @@ static int do_umount(struct mount *mnt, * probably don't strictly need the lock here if we examined * all race cases, but it's a slowpath. */ - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (mnt_get_count(mnt) != 2) { - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return -EBUSY; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); if (!xchg(&mnt->mnt_expiry_mark, 1)) return -EAGAIN; @@ -1159,7 +1160,7 @@ static int do_umount(struct mount *mnt, } down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); event++; if (!(flags & MNT_DETACH)) @@ -1171,7 +1172,7 @@ static int do_umount(struct mount *mnt, umount_tree(mnt, 1, &umount_list); retval = 0; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); return retval; @@ -1286,19 +1287,19 @@ struct mount *copy_tree(struct mount *mn q = clone_mnt(p, p->mnt.mnt_root, flag); if (!q) goto Enomem; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&q->mnt_list, &res->mnt_list); attach_mnt(q, &path); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } } return res; Enomem: if (res) { LIST_HEAD(umount_list); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(res, 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); release_mounts(&umount_list); } return NULL; @@ -1318,9 +1319,9 @@ void drop_collected_mounts(struct vfsmou { LIST_HEAD(umount_list); down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(real_mount(mnt), 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); } @@ -1448,7 +1449,7 @@ static int attach_recursive_mnt(struct m if (err) goto out_cleanup_ids; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (IS_MNT_SHARED(dest_mnt)) { for (p = source_mnt; p; p = next_mnt(p, source_mnt)) @@ -1467,7 +1468,7 @@ static int attach_recursive_mnt(struct m list_del_init(&child->mnt_hash); commit_tree(child); } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return 0; @@ -1565,10 +1566,10 @@ static int do_change_type(struct path *p goto out_unlock; } - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) change_mnt_propagation(m, type); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); out_unlock: up_write(&namespace_sem); @@ -1617,9 +1618,9 @@ static int do_loopback(struct path *path err = graft_tree(mnt, path); if (err) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(mnt, 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } out2: unlock_mount(path); @@ -1677,16 +1678,16 @@ static int do_remount(struct path *path, else err = do_remount_sb(sb, flags, data, 0); if (!err) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; mnt->mnt.mnt_flags = mnt_flags; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } up_write(&sb->s_umount); if (!err) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); touch_mnt_namespace(mnt->mnt_ns); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } return err; } @@ -1893,9 +1894,9 @@ fail: /* remove m from any expiration list it may be on */ if (!list_empty(&mnt->mnt_expire)) { down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_del_init(&mnt->mnt_expire); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); } mntput(m); @@ -1911,11 +1912,11 @@ fail: void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) { down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); } EXPORT_SYMBOL(mnt_set_expiry); @@ -1935,7 +1936,7 @@ void mark_mounts_for_expiry(struct list_ return; down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); /* extract from the expiration list every vfsmount that matches the * following criteria: @@ -1954,7 +1955,7 @@ void mark_mounts_for_expiry(struct list_ touch_mnt_namespace(mnt->mnt_ns); umount_tree(mnt, 1, &umounts); } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umounts); @@ -2218,9 +2219,9 @@ void mnt_make_shortterm(struct vfsmount struct mount *mnt = real_mount(m); if (atomic_add_unless(&mnt->mnt_longterm, -1, 1)) return; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); atomic_dec(&mnt->mnt_longterm); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); #endif } @@ -2249,10 +2250,9 @@ static struct mnt_namespace *dup_mnt_ns( kfree(new_ns); return ERR_PTR(-ENOMEM); } - new_ns->root = new; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&new_ns->list, &new->mnt_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); /* * Second pass: switch the tsk->fs->* elements and mark new vfsmounts @@ -2416,9 +2416,9 @@ bool is_path_reachable(struct mount *mnt int path_is_under(struct path *path1, struct path *path2) { int res; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return res; } EXPORT_SYMBOL(path_is_under); @@ -2505,7 +2505,7 @@ SYSCALL_DEFINE2(pivot_root, const char _ /* make sure we can reach put_old from new_root */ if (!is_path_reachable(real_mount(old.mnt), old.dentry, &new)) goto out4; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); detach_mnt(new_mnt, &parent_path); detach_mnt(root_mnt, &root_parent); /* mount old root on put_old */ @@ -2513,7 +2513,7 @@ SYSCALL_DEFINE2(pivot_root, const char _ /* mount new_root on / */ attach_mnt(new_mnt, &root_parent); touch_mnt_namespace(current->nsproxy->mnt_ns); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); chroot_fs_refs(&root, &new); error = 0; out4: @@ -2576,7 +2576,7 @@ void __init mnt_init(void) for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mount_hashtable[u]); - br_lock_init(vfsmount_lock); + br_lock_init(&vfsmount_lock); err = sysfs_init(); if (err) @@ -2596,9 +2596,9 @@ void put_mnt_ns(struct mnt_namespace *ns if (!atomic_dec_and_test(&ns->count)) return; down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(ns->root, 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); kfree(ns); diff --git a/fs/pnode.c b/fs/pnode.c --- a/fs/pnode.c +++ b/fs/pnode.c @@ -257,12 +257,12 @@ int propagate_mnt(struct mount *dest_mnt prev_src_mnt = child; } out: - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); while (!list_empty(&tmp_list)) { child = list_first_entry(&tmp_list, struct mount, mnt_hash); umount_tree(child, 0, &umount_list); } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); release_mounts(&umount_list); return ret; } diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -23,12 +23,12 @@ static unsigned mounts_poll(struct file poll_wait(file, &p->ns->poll, wait); - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); if (p->m.poll_event != ns->event) { p->m.poll_event = ns->event; res |= POLLERR | POLLPRI; } - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return res; } -- Please can you help my fiance Alex? http://baldalex.org ^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 3/3] brlocks/lglocks: turn into functions 2012-02-28 11:24 ` Andi Kleen ` (2 preceding siblings ...) 2012-03-05 7:04 ` [PATCH 2/3] brlocks/lglocks: API cleanups Rusty Russell @ 2012-03-05 7:05 ` Rusty Russell 2012-04-20 11:21 ` Nick Piggin 3 siblings, 1 reply; 45+ messages in thread From: Rusty Russell @ 2012-03-05 7:05 UTC (permalink / raw) To: Andi Kleen, Andrew Morton Cc: Nick Piggin, linux-kernel, Alexander Viro, Srivatsa S. Bhat, linux-fsdevel From: Andi Kleen <ak@linux.intel.com> lglocks and brlocks are currently generated with some complicated macros in lglock.h. But there's no reason to not just use common utility functions and put all the data into a common data structure. Since there are at least two users it makes sense to share this code in a library. This is also easier maintainable than a macro forest. This will also make it later possible to dynamically allocate lglocks and also use them in modules (this would both still need some additional, but now straightforward, code) [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Andi Kleen <ak@linux.intel.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (split and completed) --- fs/file_table.c | 1 fs/internal.h | 2 include/linux/lglock.h | 125 ++++++++++--------------------------------------- kernel/Makefile | 2 kernel/lglock.c | 89 ++++++++++++++++++++++++++++++++++ 5 files changed, 117 insertions(+), 102 deletions(-) diff --git a/fs/file_table.c b/fs/file_table.c --- a/fs/file_table.c +++ b/fs/file_table.c @@ -34,7 +34,6 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -DECLARE_LGLOCK(files_lglock); DEFINE_LGLOCK(files_lglock); /* SLAB cache for file structures */ diff --git a/fs/internal.h b/fs/internal.h --- a/fs/internal.h +++ b/fs/internal.h @@ -56,7 +56,7 @@ extern int sb_prepare_remount_readonly(s extern void __init mnt_init(void); -DECLARE_BRLOCK(vfsmount_lock); +extern struct lglock vfsmount_lock; /* diff --git a/include/linux/lglock.h b/include/linux/lglock.h --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -23,26 +23,17 @@ #include <linux/lockdep.h> #include <linux/percpu.h> #include <linux/cpu.h> +#include <linux/notifier.h> /* can make br locks by using local lock for read side, global lock for write */ -#define br_lock_init(name) name##_lock_init() -#define br_read_lock(name) name##_local_lock() -#define br_read_unlock(name) name##_local_unlock() -#define br_write_lock(name) name##_global_lock() -#define br_write_unlock(name) name##_global_unlock() +#define br_lock_init(name) lg_lock_init(name, #name) +#define br_read_lock(name) lg_local_lock(name) +#define br_read_unlock(name) lg_local_unlock(name) +#define br_write_lock(name) lg_global_lock(name) +#define br_write_unlock(name) lg_global_unlock(name) -#define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) #define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) - -#define lg_lock_init(name) name##_lock_init() -#define lg_local_lock(name) name##_local_lock() -#define lg_local_unlock(name) name##_local_unlock() -#define lg_local_lock_cpu(name, cpu) name##_local_lock_cpu(cpu) -#define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) -#define lg_global_lock(name) name##_global_lock() -#define lg_global_unlock(name) name##_global_unlock() - #ifdef CONFIG_DEBUG_LOCK_ALLOC #define LOCKDEP_INIT_MAP lockdep_init_map @@ -57,90 +48,26 @@ #define DEFINE_LGLOCK_LOCKDEP(name) #endif - -#define DECLARE_LGLOCK(name) \ - extern void name##_lock_init(void); \ - extern void name##_local_lock(void); \ - extern void name##_local_unlock(void); \ - extern void name##_local_lock_cpu(int cpu); \ - extern void name##_local_unlock_cpu(int cpu); \ - extern void name##_global_lock(void); \ - extern void name##_global_unlock(void); \ +struct lglock { + arch_spinlock_t __percpu *lock; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lock_class_key lock_key; + struct lockdep_map lock_dep_map; +#endif +}; #define DEFINE_LGLOCK(name) \ - \ - DEFINE_SPINLOCK(name##_cpu_lock); \ - DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ - DEFINE_LGLOCK_LOCKDEP(name); \ - \ - void name##_lock_init(void) { \ - int i; \ - LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ - } \ - } \ - EXPORT_SYMBOL(name##_lock_init); \ - \ - void name##_local_lock(void) { \ - arch_spinlock_t *lock; \ - preempt_disable(); \ - rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ - lock = &__get_cpu_var(name##_lock); \ - arch_spin_lock(lock); \ - } \ - EXPORT_SYMBOL(name##_local_lock); \ - \ - void name##_local_unlock(void) { \ - arch_spinlock_t *lock; \ - rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ - lock = &__get_cpu_var(name##_lock); \ - arch_spin_unlock(lock); \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_local_unlock); \ - \ - void name##_local_lock_cpu(int cpu) { \ - arch_spinlock_t *lock; \ - preempt_disable(); \ - rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ - lock = &per_cpu(name##_lock, cpu); \ - arch_spin_lock(lock); \ - } \ - EXPORT_SYMBOL(name##_local_lock_cpu); \ - \ - void name##_local_unlock_cpu(int cpu) { \ - arch_spinlock_t *lock; \ - rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ - lock = &per_cpu(name##_lock, cpu); \ - arch_spin_unlock(lock); \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_local_unlock_cpu); \ - \ - void name##_global_lock(void) { \ - int i; \ - preempt_disable(); \ - rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_lock(lock); \ - } \ - } \ - EXPORT_SYMBOL(name##_global_lock); \ - \ - void name##_global_unlock(void) { \ - int i; \ - rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_unlock(lock); \ - } \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_global_unlock); + DEFINE_LGLOCK_LOCKDEP(name); \ + DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \ + = __ARCH_SPIN_LOCK_UNLOCKED; \ + struct lglock name = { .lock = &name ## _lock } + +void lg_lock_init(struct lglock *lg, char *name); +void lg_local_lock(struct lglock *lg); +void lg_local_unlock(struct lglock *lg); +void lg_local_lock_cpu(struct lglock *lg, int cpu); +void lg_local_unlock_cpu(struct lglock *lg, int cpu); +void lg_global_lock(struct lglock *lg); +void lg_global_unlock(struct lglock *lg); + #endif diff --git a/kernel/Makefile b/kernel/Makefile --- a/kernel/Makefile +++ b/kernel/Makefile @@ -10,7 +10,7 @@ obj-y = fork.o exec_domain.o panic.o kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o cred.o \ - async.o range.o groups.o + async.o range.o groups.o lglock.o ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files diff --git a/kernel/lglock.c b/kernel/lglock.c new file mode 100644 --- /dev/null +++ b/kernel/lglock.c @@ -0,0 +1,89 @@ +/* See include/linux/lglock.h for description */ +#include <linux/module.h> +#include <linux/lglock.h> +#include <linux/cpu.h> +#include <linux/string.h> + +/* + * Note there is no uninit, so lglocks cannot be defined in + * modules (but it's fine to use them from there) + * Could be added though, just undo lg_lock_init + */ + +void lg_lock_init(struct lglock *lg, char *name) +{ + LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0); +} +EXPORT_SYMBOL(lg_lock_init); + +void lg_local_lock(struct lglock *lg) +{ + arch_spinlock_t *lock; + + preempt_disable(); + rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); + lock = this_cpu_ptr(lg->lock); + arch_spin_lock(lock); +} +EXPORT_SYMBOL(lg_local_lock); + +void lg_local_unlock(struct lglock *lg) +{ + arch_spinlock_t *lock; + + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock = this_cpu_ptr(lg->lock); + arch_spin_unlock(lock); + preempt_enable(); +} +EXPORT_SYMBOL(lg_local_unlock); + +void lg_local_lock_cpu(struct lglock *lg, int cpu) +{ + arch_spinlock_t *lock; + + preempt_disable(); + rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); + lock = per_cpu_ptr(lg->lock, cpu); + arch_spin_lock(lock); +} +EXPORT_SYMBOL(lg_local_lock_cpu); + +void lg_local_unlock_cpu(struct lglock *lg, int cpu) +{ + arch_spinlock_t *lock; + + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock = per_cpu_ptr(lg->lock, cpu); + arch_spin_unlock(lock); + preempt_enable(); +} +EXPORT_SYMBOL(lg_local_unlock_cpu); + +void lg_global_lock(struct lglock *lg) +{ + int i; + + preempt_disable(); + rwlock_acquire(&lg->lock_dep_map, 0, 0, _RET_IP_); + for_each_possible_cpu(i) { + arch_spinlock_t *lock; + lock = per_cpu_ptr(lg->lock, i); + arch_spin_lock(lock); + } +} +EXPORT_SYMBOL(lg_global_lock); + +void lg_global_unlock(struct lglock *lg) +{ + int i; + + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + for_each_possible_cpu(i) { + arch_spinlock_t *lock; + lock = per_cpu_ptr(lg->lock, i); + arch_spin_unlock(lock); + } + preempt_enable(); +} +EXPORT_SYMBOL(lg_global_unlock); ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH 3/3] brlocks/lglocks: turn into functions 2012-03-05 7:05 ` [PATCH 3/3] brlocks/lglocks: turn into functions Rusty Russell @ 2012-04-20 11:21 ` Nick Piggin 2012-05-07 3:39 ` Rusty Russell 0 siblings, 1 reply; 45+ messages in thread From: Nick Piggin @ 2012-04-20 11:21 UTC (permalink / raw) To: Rusty Russell Cc: Andi Kleen, Andrew Morton, Nick Piggin, linux-kernel, Alexander Viro, Srivatsa S. Bhat, linux-fsdevel This still not merged? On Mon, Mar 05, 2012 at 05:35:28PM +1030, Rusty Russell wrote: > From: Andi Kleen <ak@linux.intel.com> > > lglocks and brlocks are currently generated with some complicated macros > in lglock.h. But there's no reason to not just use common utility > functions and put all the data into a common data structure. There is a reason, which is performance. Extra function call, but also IIRC the percpu accessor was not so fast doing it this way. Maybe that's improved... So what's the performance difference? > > Since there are at least two users it makes sense to share this code in a > library. This is also easier maintainable than a macro forest. > > This will also make it later possible to dynamically allocate lglocks and > also use them in modules (this would both still need some additional, but > now straightforward, code) Yes, but let's not do either of those things :) I was slightly crazy when committing that patch to the kernel, I'll admit. So if performance isn't significantly affected, then definitely. If it is... well, it's much easier to gain 1% performance by maintaining 100 self contained lines of hilarious code like this than to actually use your brain to improve somewhere else! Thanks, Nick ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH 3/3] brlocks/lglocks: turn into functions 2012-04-20 11:21 ` Nick Piggin @ 2012-05-07 3:39 ` Rusty Russell 2012-05-07 5:46 ` Al Viro 2012-05-09 7:35 ` Nick Piggin 0 siblings, 2 replies; 45+ messages in thread From: Rusty Russell @ 2012-05-07 3:39 UTC (permalink / raw) To: Nick Piggin Cc: Andi Kleen, Andrew Morton, Nick Piggin, linux-kernel, Alexander Viro, Srivatsa S. Bhat, linux-fsdevel On Fri, 20 Apr 2012 21:21:49 +1000, Nick Piggin <npiggin@kernel.dk> wrote: > This still not merged? No, I've been away. I've put it in -next for tomorrow, though I'm not sure what the best way to get it to Linus next merge window. > There is a reason, which is performance. Extra function call, but also > IIRC the percpu accessor was not so fast doing it this way. Maybe > that's improved... > > So what's the performance difference? What benchmarks you usually run? Feel free to try it out and report back; I only have small hardware here. > > > > Since there are at least two users it makes sense to share this code in a > > library. This is also easier maintainable than a macro forest. > > > > This will also make it later possible to dynamically allocate lglocks and > > also use them in modules (this would both still need some additional, but > > now straightforward, code) > > Yes, but let's not do either of those things :) > > I was slightly crazy when committing that patch to the kernel, I'll > admit. So if performance isn't significantly affected, then definitely. > If it is... well, it's much easier to gain 1% performance by maintaining > 100 self contained lines of hilarious code like this than to actually > use your brain to improve somewhere else! Cheers, Rusty. ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH 3/3] brlocks/lglocks: turn into functions 2012-05-07 3:39 ` Rusty Russell @ 2012-05-07 5:46 ` Al Viro 2012-05-08 3:59 ` [PATCH 1/3] lglock: remove online variants of lock Rusty Russell ` (2 more replies) 2012-05-09 7:35 ` Nick Piggin 1 sibling, 3 replies; 45+ messages in thread From: Al Viro @ 2012-05-07 5:46 UTC (permalink / raw) To: Rusty Russell Cc: Nick Piggin, Andi Kleen, Andrew Morton, linux-kernel, Srivatsa S. Bhat, linux-fsdevel On Mon, May 07, 2012 at 01:09:04PM +0930, Rusty Russell wrote: > On Fri, 20 Apr 2012 21:21:49 +1000, Nick Piggin <npiggin@kernel.dk> wrote: > > This still not merged? > > No, I've been away. I've put it in -next for tomorrow, though I'm not > sure what the best way to get it to Linus next merge window. Resend and I'll pick it. ^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 1/3] lglock: remove online variants of lock 2012-05-07 5:46 ` Al Viro @ 2012-05-08 3:59 ` Rusty Russell 2012-05-08 4:50 ` Al Viro 2012-05-08 4:02 ` [PATCH 2/3] brlocks/lglocks: API cleanups Rusty Russell 2012-05-08 4:02 ` [PATCH 3/3] brlocks/lglocks: turn into functions Rusty Russell 2 siblings, 1 reply; 45+ messages in thread From: Rusty Russell @ 2012-05-08 3:59 UTC (permalink / raw) To: Al Viro Cc: Nick Piggin, Andi Kleen, Andrew Morton, linux-kernel, Srivatsa S. Bhat, linux-fsdevel Optimizing the slow paths adds a lot of complexity. If you need to grab every lock often, you have other problems. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> Acked-by: Nick Piggin <npiggin@kernel.dk> --- include/linux/lglock.h | 58 +------------------------------------------------ 1 file changed, 2 insertions(+), 56 deletions(-) diff --git a/include/linux/lglock.h b/include/linux/lglock.h --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -28,8 +28,8 @@ #define br_lock_init(name) name##_lock_init() #define br_read_lock(name) name##_local_lock() #define br_read_unlock(name) name##_local_unlock() -#define br_write_lock(name) name##_global_lock_online() -#define br_write_unlock(name) name##_global_unlock_online() +#define br_write_lock(name) name##_global_lock() +#define br_write_unlock(name) name##_global_unlock() #define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) #define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) @@ -42,8 +42,6 @@ #define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) #define lg_global_lock(name) name##_global_lock() #define lg_global_unlock(name) name##_global_unlock() -#define lg_global_lock_online(name) name##_global_lock_online() -#define lg_global_unlock_online(name) name##_global_unlock_online() #ifdef CONFIG_DEBUG_LOCK_ALLOC #define LOCKDEP_INIT_MAP lockdep_init_map @@ -68,36 +66,13 @@ extern void name##_local_unlock_cpu(int cpu); \ extern void name##_global_lock(void); \ extern void name##_global_unlock(void); \ - extern void name##_global_lock_online(void); \ - extern void name##_global_unlock_online(void); \ #define DEFINE_LGLOCK(name) \ \ DEFINE_SPINLOCK(name##_cpu_lock); \ - cpumask_t name##_cpus __read_mostly; \ DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ DEFINE_LGLOCK_LOCKDEP(name); \ \ - static int \ - name##_lg_cpu_callback(struct notifier_block *nb, \ - unsigned long action, void *hcpu) \ - { \ - switch (action & ~CPU_TASKS_FROZEN) { \ - case CPU_UP_PREPARE: \ - spin_lock(&name##_cpu_lock); \ - cpu_set((unsigned long)hcpu, name##_cpus); \ - spin_unlock(&name##_cpu_lock); \ - break; \ - case CPU_UP_CANCELED: case CPU_DEAD: \ - spin_lock(&name##_cpu_lock); \ - cpu_clear((unsigned long)hcpu, name##_cpus); \ - spin_unlock(&name##_cpu_lock); \ - } \ - return NOTIFY_OK; \ - } \ - static struct notifier_block name##_lg_cpu_notifier = { \ - .notifier_call = name##_lg_cpu_callback, \ - }; \ void name##_lock_init(void) { \ int i; \ LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ @@ -106,11 +81,6 @@ lock = &per_cpu(name##_lock, i); \ *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ } \ - register_hotcpu_notifier(&name##_lg_cpu_notifier); \ - get_online_cpus(); \ - for_each_online_cpu(i) \ - cpu_set(i, name##_cpus); \ - put_online_cpus(); \ } \ EXPORT_SYMBOL(name##_lock_init); \ \ @@ -150,30 +120,6 @@ } \ EXPORT_SYMBOL(name##_local_unlock_cpu); \ \ - void name##_global_lock_online(void) { \ - int i; \ - spin_lock(&name##_cpu_lock); \ - rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_cpu(i, &name##_cpus) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_lock(lock); \ - } \ - } \ - EXPORT_SYMBOL(name##_global_lock_online); \ - \ - void name##_global_unlock_online(void) { \ - int i; \ - rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_cpu(i, &name##_cpus) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_unlock(lock); \ - } \ - spin_unlock(&name##_cpu_lock); \ - } \ - EXPORT_SYMBOL(name##_global_unlock_online); \ - \ void name##_global_lock(void) { \ int i; \ preempt_disable(); \ ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH 1/3] lglock: remove online variants of lock 2012-05-08 3:59 ` [PATCH 1/3] lglock: remove online variants of lock Rusty Russell @ 2012-05-08 4:50 ` Al Viro 2012-05-08 6:12 ` Rusty Russell 0 siblings, 1 reply; 45+ messages in thread From: Al Viro @ 2012-05-08 4:50 UTC (permalink / raw) To: Rusty Russell Cc: Nick Piggin, Andi Kleen, Andrew Morton, linux-kernel, Srivatsa S. Bhat, linux-fsdevel On Tue, May 08, 2012 at 01:29:45PM +0930, Rusty Russell wrote: > Optimizing the slow paths adds a lot of complexity. If you need to > grab every lock often, you have other problems. Applied, but I'm not too happy about the situation with vfsmount_lock ;-/ On kernels built for a lot of possible CPUs the loss of ..._online() versions will get painful. OTOH, we can always put the map + single spinlock + single notifier into lglock.c and reproduce the old logics. I'll do a patch along those lines and put it on a separate branch; then we'll be able to test and compare. Contention from cpu map spinlock becoming shared between different lglocks (all two of them) is not an issue - we never use the files_lock one anyway (there we can't use _online variants). ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH 1/3] lglock: remove online variants of lock 2012-05-08 4:50 ` Al Viro @ 2012-05-08 6:12 ` Rusty Russell 0 siblings, 0 replies; 45+ messages in thread From: Rusty Russell @ 2012-05-08 6:12 UTC (permalink / raw) To: Al Viro Cc: Nick Piggin, Andi Kleen, Andrew Morton, linux-kernel, Srivatsa S. Bhat, linux-fsdevel On Tue, 8 May 2012 05:50:44 +0100, Al Viro <viro@ZenIV.linux.org.uk> wrote: > On Tue, May 08, 2012 at 01:29:45PM +0930, Rusty Russell wrote: > > Optimizing the slow paths adds a lot of complexity. If you need to > > grab every lock often, you have other problems. > > Applied, but I'm not too happy about the situation with vfsmount_lock ;-/ > On kernels built for a lot of possible CPUs the loss of ..._online() > versions will get painful. My original motivation was to get rid of that cpumask_t (and replace it with a cpumask_var_t). A simple enough patch, but I couldn't bring myself to leave that complex logic in place without clear justification. > OTOH, we can always put the map + single > spinlock + single notifier into lglock.c and reproduce the old logics. > I'll do a patch along those lines and put it on a separate branch; > then we'll be able to test and compare. I'll be interested in the results: virtual systems are classic for wanting large # of CPUs which aren't actually online, so we might actually care. I'd also like to get rid of the cpu_possible_map altogether, and just have NR_CPUS/nr_cpu_ids, since last I checked no arch really wants sparse numbers. Cheers, Rusty. ^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 2/3] brlocks/lglocks: API cleanups 2012-05-07 5:46 ` Al Viro 2012-05-08 3:59 ` [PATCH 1/3] lglock: remove online variants of lock Rusty Russell @ 2012-05-08 4:02 ` Rusty Russell 2012-05-08 4:02 ` [PATCH 3/3] brlocks/lglocks: turn into functions Rusty Russell 2 siblings, 0 replies; 45+ messages in thread From: Rusty Russell @ 2012-05-08 4:02 UTC (permalink / raw) To: Al Viro Cc: Nick Piggin, Andi Kleen, Andrew Morton, linux-kernel, Srivatsa S. Bhat, linux-fsdevel From: Andi Kleen <ak@linux.intel.com> lglocks and brlocks are currently generated with some complicated macros in lglock.h. But there's no reason to not just use common utility functions and put all the data into a common data structure. In preparation, this patch changes the API to look more like normal function calls with pointers, not magic macros. The patch is rather large because I move over all users in one go to keep it bisectable. This impacts the VFS somewhat in terms of lines changed. But no actual behaviour change. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Andi Kleen <ak@linux.intel.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> --- fs/dcache.c | 4 - fs/file_table.c | 16 ++--- fs/namei.c | 24 ++++---- fs/namespace.c | 140 ++++++++++++++++++++++++++-------------------------- fs/pnode.c | 4 - fs/proc_namespace.c | 4 - 6 files changed, 96 insertions(+), 96 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2489,7 +2489,7 @@ static int prepend_path(const struct pat bool slash = false; int error = 0; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); while (dentry != root->dentry || vfsmnt != root->mnt) { struct dentry * parent; @@ -2520,7 +2520,7 @@ static int prepend_path(const struct pat error = prepend(buffer, buflen, "/", 1); out: - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return error; global_root: diff --git a/fs/file_table.c b/fs/file_table.c --- a/fs/file_table.c +++ b/fs/file_table.c @@ -422,9 +422,9 @@ static inline void __file_sb_list_add(st */ void file_sb_list_add(struct file *file, struct super_block *sb) { - lg_local_lock(files_lglock); + lg_local_lock(&files_lglock); __file_sb_list_add(file, sb); - lg_local_unlock(files_lglock); + lg_local_unlock(&files_lglock); } /** @@ -437,9 +437,9 @@ void file_sb_list_add(struct file *file, void file_sb_list_del(struct file *file) { if (!list_empty(&file->f_u.fu_list)) { - lg_local_lock_cpu(files_lglock, file_list_cpu(file)); + lg_local_lock_cpu(&files_lglock, file_list_cpu(file)); list_del_init(&file->f_u.fu_list); - lg_local_unlock_cpu(files_lglock, file_list_cpu(file)); + lg_local_unlock_cpu(&files_lglock, file_list_cpu(file)); } } @@ -486,7 +486,7 @@ void mark_files_ro(struct super_block *s struct file *f; retry: - lg_global_lock(files_lglock); + lg_global_lock(&files_lglock); do_file_list_for_each_entry(sb, f) { struct vfsmount *mnt; if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) @@ -503,12 +503,12 @@ retry: file_release_write(f); mnt = mntget(f->f_path.mnt); /* This can sleep, so we can't hold the spinlock. */ - lg_global_unlock(files_lglock); + lg_global_unlock(&files_lglock); mnt_drop_write(mnt); mntput(mnt); goto retry; } while_file_list_for_each_entry; - lg_global_unlock(files_lglock); + lg_global_unlock(&files_lglock); } void __init files_init(unsigned long mempages) @@ -526,6 +526,6 @@ void __init files_init(unsigned long mem n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); files_defer_init(); - lg_lock_init(files_lglock); + lg_lock_init(&files_lglock, "files_lglock"); percpu_counter_init(&nr_files, 0); } diff --git a/fs/namei.c b/fs/namei.c --- a/fs/namei.c +++ b/fs/namei.c @@ -462,7 +462,7 @@ static int unlazy_walk(struct nameidata mntget(nd->path.mnt); rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); nd->flags &= ~LOOKUP_RCU; return 0; @@ -520,14 +520,14 @@ static int complete_walk(struct nameidat if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { spin_unlock(&dentry->d_lock); rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return -ECHILD; } BUG_ON(nd->inode != dentry->d_inode); spin_unlock(&dentry->d_lock); mntget(nd->path.mnt); rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); } if (likely(!(nd->flags & LOOKUP_JUMPED))) @@ -694,15 +694,15 @@ int follow_up(struct path *path) struct mount *parent; struct dentry *mountpoint; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); parent = mnt->mnt_parent; if (&parent->mnt == path->mnt) { - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return 0; } mntget(&parent->mnt); mountpoint = dget(mnt->mnt_mountpoint); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); dput(path->dentry); path->dentry = mountpoint; mntput(path->mnt); @@ -960,7 +960,7 @@ failed: if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return -ECHILD; } @@ -1256,7 +1256,7 @@ static void terminate_walk(struct nameid if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); } } @@ -1515,7 +1515,7 @@ static int path_init(int dfd, const char nd->path = nd->root; nd->inode = inode; if (flags & LOOKUP_RCU) { - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); } else { @@ -1528,7 +1528,7 @@ static int path_init(int dfd, const char if (*name=='/') { if (flags & LOOKUP_RCU) { - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); set_root_rcu(nd); } else { @@ -1541,7 +1541,7 @@ static int path_init(int dfd, const char struct fs_struct *fs = current->fs; unsigned seq; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); do { @@ -1577,7 +1577,7 @@ static int path_init(int dfd, const char if (fput_needed) *fp = file; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); } else { path_get(&file->f_path); diff --git a/fs/namespace.c b/fs/namespace.c --- a/fs/namespace.c +++ b/fs/namespace.c @@ -397,7 +397,7 @@ static int mnt_make_readonly(struct moun { int ret = 0; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; /* * After storing MNT_WRITE_HOLD, we'll read the counters. This store @@ -431,15 +431,15 @@ static int mnt_make_readonly(struct moun */ smp_wmb(); mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return ret; } static void __mnt_unmake_readonly(struct mount *mnt) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt->mnt.mnt_flags &= ~MNT_READONLY; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } int sb_prepare_remount_readonly(struct super_block *sb) @@ -451,7 +451,7 @@ int sb_prepare_remount_readonly(struct s if (atomic_long_read(&sb->s_remove_count)) return -EBUSY; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { if (!(mnt->mnt.mnt_flags & MNT_READONLY)) { mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; @@ -473,7 +473,7 @@ int sb_prepare_remount_readonly(struct s if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return err; } @@ -522,14 +522,14 @@ struct vfsmount *lookup_mnt(struct path { struct mount *child_mnt; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); child_mnt = __lookup_mnt(path->mnt, path->dentry, 1); if (child_mnt) { mnt_add_count(child_mnt, 1); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return &child_mnt->mnt; } else { - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return NULL; } } @@ -714,9 +714,9 @@ vfs_kern_mount(struct file_system_type * mnt->mnt.mnt_sb = root->d_sb; mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return &mnt->mnt; } EXPORT_SYMBOL_GPL(vfs_kern_mount); @@ -745,9 +745,9 @@ static struct mount *clone_mnt(struct mo mnt->mnt.mnt_root = dget(root); mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&mnt->mnt_instance, &sb->s_mounts); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); if (flag & CL_SLAVE) { list_add(&mnt->mnt_slave, &old->mnt_slave_list); @@ -803,35 +803,36 @@ static void mntput_no_expire(struct moun { put_again: #ifdef CONFIG_SMP - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); if (likely(atomic_read(&mnt->mnt_longterm))) { mnt_add_count(mnt, -1); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return; } - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt_add_count(mnt, -1); if (mnt_get_count(mnt)) { - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return; } #else mnt_add_count(mnt, -1); if (likely(mnt_get_count(mnt))) return; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); #endif if (unlikely(mnt->mnt_pinned)) { mnt_add_count(mnt, mnt->mnt_pinned + 1); mnt->mnt_pinned = 0; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); acct_auto_close_mnt(&mnt->mnt); goto put_again; } + list_del(&mnt->mnt_instance); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); mntfree(mnt); } @@ -857,21 +858,21 @@ EXPORT_SYMBOL(mntget); void mnt_pin(struct vfsmount *mnt) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); real_mount(mnt)->mnt_pinned++; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *m) { struct mount *mnt = real_mount(m); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (mnt->mnt_pinned) { mnt_add_count(mnt, 1); mnt->mnt_pinned--; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } EXPORT_SYMBOL(mnt_unpin); @@ -988,12 +989,12 @@ int may_umount_tree(struct vfsmount *m) BUG_ON(!m); /* write lock needed for mnt_get_count */ - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); for (p = mnt; p; p = next_mnt(p, mnt)) { actual_refs += mnt_get_count(p); minimum_refs += 2; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); if (actual_refs > minimum_refs) return 0; @@ -1020,10 +1021,10 @@ int may_umount(struct vfsmount *mnt) { int ret = 1; down_read(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (propagate_mount_busy(real_mount(mnt), 2)) ret = 0; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_read(&namespace_sem); return ret; } @@ -1040,13 +1041,13 @@ void release_mounts(struct list_head *he struct dentry *dentry; struct mount *m; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); dentry = mnt->mnt_mountpoint; m = mnt->mnt_parent; mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; m->mnt_ghosts--; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); dput(dentry); mntput(&m->mnt); } @@ -1112,12 +1113,12 @@ static int do_umount(struct mount *mnt, * probably don't strictly need the lock here if we examined * all race cases, but it's a slowpath. */ - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (mnt_get_count(mnt) != 2) { - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return -EBUSY; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); if (!xchg(&mnt->mnt_expiry_mark, 1)) return -EAGAIN; @@ -1159,7 +1160,7 @@ static int do_umount(struct mount *mnt, } down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); event++; if (!(flags & MNT_DETACH)) @@ -1171,7 +1172,7 @@ static int do_umount(struct mount *mnt, umount_tree(mnt, 1, &umount_list); retval = 0; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); return retval; @@ -1286,19 +1287,19 @@ struct mount *copy_tree(struct mount *mn q = clone_mnt(p, p->mnt.mnt_root, flag); if (!q) goto Enomem; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&q->mnt_list, &res->mnt_list); attach_mnt(q, &path); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } } return res; Enomem: if (res) { LIST_HEAD(umount_list); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(res, 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); release_mounts(&umount_list); } return NULL; @@ -1318,9 +1319,9 @@ void drop_collected_mounts(struct vfsmou { LIST_HEAD(umount_list); down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(real_mount(mnt), 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); } @@ -1448,7 +1449,7 @@ static int attach_recursive_mnt(struct m if (err) goto out_cleanup_ids; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (IS_MNT_SHARED(dest_mnt)) { for (p = source_mnt; p; p = next_mnt(p, source_mnt)) @@ -1467,7 +1468,7 @@ static int attach_recursive_mnt(struct m list_del_init(&child->mnt_hash); commit_tree(child); } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return 0; @@ -1565,10 +1566,10 @@ static int do_change_type(struct path *p goto out_unlock; } - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) change_mnt_propagation(m, type); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); out_unlock: up_write(&namespace_sem); @@ -1617,9 +1618,9 @@ static int do_loopback(struct path *path err = graft_tree(mnt, path); if (err) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(mnt, 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } out2: unlock_mount(path); @@ -1677,16 +1678,16 @@ static int do_remount(struct path *path, else err = do_remount_sb(sb, flags, data, 0); if (!err) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; mnt->mnt.mnt_flags = mnt_flags; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } up_write(&sb->s_umount); if (!err) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); touch_mnt_namespace(mnt->mnt_ns); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } return err; } @@ -1893,9 +1894,9 @@ fail: /* remove m from any expiration list it may be on */ if (!list_empty(&mnt->mnt_expire)) { down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_del_init(&mnt->mnt_expire); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); } mntput(m); @@ -1911,11 +1912,11 @@ fail: void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) { down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); } EXPORT_SYMBOL(mnt_set_expiry); @@ -1935,7 +1936,7 @@ void mark_mounts_for_expiry(struct list_ return; down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); /* extract from the expiration list every vfsmount that matches the * following criteria: @@ -1954,7 +1955,7 @@ void mark_mounts_for_expiry(struct list_ touch_mnt_namespace(mnt->mnt_ns); umount_tree(mnt, 1, &umounts); } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umounts); @@ -2218,9 +2219,9 @@ void mnt_make_shortterm(struct vfsmount struct mount *mnt = real_mount(m); if (atomic_add_unless(&mnt->mnt_longterm, -1, 1)) return; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); atomic_dec(&mnt->mnt_longterm); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); #endif } @@ -2249,10 +2250,9 @@ static struct mnt_namespace *dup_mnt_ns( kfree(new_ns); return ERR_PTR(-ENOMEM); } - new_ns->root = new; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&new_ns->list, &new->mnt_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); /* * Second pass: switch the tsk->fs->* elements and mark new vfsmounts @@ -2416,9 +2416,9 @@ bool is_path_reachable(struct mount *mnt int path_is_under(struct path *path1, struct path *path2) { int res; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return res; } EXPORT_SYMBOL(path_is_under); @@ -2505,7 +2505,7 @@ SYSCALL_DEFINE2(pivot_root, const char _ /* make sure we can reach put_old from new_root */ if (!is_path_reachable(real_mount(old.mnt), old.dentry, &new)) goto out4; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); detach_mnt(new_mnt, &parent_path); detach_mnt(root_mnt, &root_parent); /* mount old root on put_old */ @@ -2513,7 +2513,7 @@ SYSCALL_DEFINE2(pivot_root, const char _ /* mount new_root on / */ attach_mnt(new_mnt, &root_parent); touch_mnt_namespace(current->nsproxy->mnt_ns); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); chroot_fs_refs(&root, &new); error = 0; out4: @@ -2576,7 +2576,7 @@ void __init mnt_init(void) for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mount_hashtable[u]); - br_lock_init(vfsmount_lock); + br_lock_init(&vfsmount_lock); err = sysfs_init(); if (err) @@ -2596,9 +2596,9 @@ void put_mnt_ns(struct mnt_namespace *ns if (!atomic_dec_and_test(&ns->count)) return; down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(ns->root, 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); kfree(ns); diff --git a/fs/pnode.c b/fs/pnode.c --- a/fs/pnode.c +++ b/fs/pnode.c @@ -257,12 +257,12 @@ int propagate_mnt(struct mount *dest_mnt prev_src_mnt = child; } out: - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); while (!list_empty(&tmp_list)) { child = list_first_entry(&tmp_list, struct mount, mnt_hash); umount_tree(child, 0, &umount_list); } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); release_mounts(&umount_list); return ret; } diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -23,12 +23,12 @@ static unsigned mounts_poll(struct file poll_wait(file, &p->ns->poll, wait); - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); if (p->m.poll_event != ns->event) { p->m.poll_event = ns->event; res |= POLLERR | POLLPRI; } - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return res; } ^ permalink raw reply [flat|nested] 45+ messages in thread
* [PATCH 3/3] brlocks/lglocks: turn into functions 2012-05-07 5:46 ` Al Viro 2012-05-08 3:59 ` [PATCH 1/3] lglock: remove online variants of lock Rusty Russell 2012-05-08 4:02 ` [PATCH 2/3] brlocks/lglocks: API cleanups Rusty Russell @ 2012-05-08 4:02 ` Rusty Russell 2 siblings, 0 replies; 45+ messages in thread From: Rusty Russell @ 2012-05-08 4:02 UTC (permalink / raw) To: Al Viro Cc: Nick Piggin, Andi Kleen, Andrew Morton, linux-kernel, Srivatsa S. Bhat, linux-fsdevel From: Andi Kleen <ak@linux.intel.com> lglocks and brlocks are currently generated with some complicated macros in lglock.h. But there's no reason to not just use common utility functions and put all the data into a common data structure. Since there are at least two users it makes sense to share this code in a library. This is also easier maintainable than a macro forest. This will also make it later possible to dynamically allocate lglocks and also use them in modules (this would both still need some additional, but now straightforward, code) [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Andi Kleen <ak@linux.intel.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> --- fs/file_table.c | 1 fs/internal.h | 2 include/linux/lglock.h | 125 ++++++++++--------------------------------------- kernel/Makefile | 2 kernel/lglock.c | 89 ++++++++++++++++++++++++++++++++++ 5 files changed, 117 insertions(+), 102 deletions(-) diff --git a/fs/file_table.c b/fs/file_table.c --- a/fs/file_table.c +++ b/fs/file_table.c @@ -34,7 +34,6 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -DECLARE_LGLOCK(files_lglock); DEFINE_LGLOCK(files_lglock); /* SLAB cache for file structures */ diff --git a/fs/internal.h b/fs/internal.h --- a/fs/internal.h +++ b/fs/internal.h @@ -56,7 +56,7 @@ extern int sb_prepare_remount_readonly(s extern void __init mnt_init(void); -DECLARE_BRLOCK(vfsmount_lock); +extern struct lglock vfsmount_lock; /* diff --git a/include/linux/lglock.h b/include/linux/lglock.h --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -23,26 +23,17 @@ #include <linux/lockdep.h> #include <linux/percpu.h> #include <linux/cpu.h> +#include <linux/notifier.h> /* can make br locks by using local lock for read side, global lock for write */ -#define br_lock_init(name) name##_lock_init() -#define br_read_lock(name) name##_local_lock() -#define br_read_unlock(name) name##_local_unlock() -#define br_write_lock(name) name##_global_lock() -#define br_write_unlock(name) name##_global_unlock() +#define br_lock_init(name) lg_lock_init(name, #name) +#define br_read_lock(name) lg_local_lock(name) +#define br_read_unlock(name) lg_local_unlock(name) +#define br_write_lock(name) lg_global_lock(name) +#define br_write_unlock(name) lg_global_unlock(name) -#define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) #define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) - -#define lg_lock_init(name) name##_lock_init() -#define lg_local_lock(name) name##_local_lock() -#define lg_local_unlock(name) name##_local_unlock() -#define lg_local_lock_cpu(name, cpu) name##_local_lock_cpu(cpu) -#define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) -#define lg_global_lock(name) name##_global_lock() -#define lg_global_unlock(name) name##_global_unlock() - #ifdef CONFIG_DEBUG_LOCK_ALLOC #define LOCKDEP_INIT_MAP lockdep_init_map @@ -57,90 +48,26 @@ #define DEFINE_LGLOCK_LOCKDEP(name) #endif - -#define DECLARE_LGLOCK(name) \ - extern void name##_lock_init(void); \ - extern void name##_local_lock(void); \ - extern void name##_local_unlock(void); \ - extern void name##_local_lock_cpu(int cpu); \ - extern void name##_local_unlock_cpu(int cpu); \ - extern void name##_global_lock(void); \ - extern void name##_global_unlock(void); \ +struct lglock { + arch_spinlock_t __percpu *lock; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lock_class_key lock_key; + struct lockdep_map lock_dep_map; +#endif +}; #define DEFINE_LGLOCK(name) \ - \ - DEFINE_SPINLOCK(name##_cpu_lock); \ - DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ - DEFINE_LGLOCK_LOCKDEP(name); \ - \ - void name##_lock_init(void) { \ - int i; \ - LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ - } \ - } \ - EXPORT_SYMBOL(name##_lock_init); \ - \ - void name##_local_lock(void) { \ - arch_spinlock_t *lock; \ - preempt_disable(); \ - rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ - lock = &__get_cpu_var(name##_lock); \ - arch_spin_lock(lock); \ - } \ - EXPORT_SYMBOL(name##_local_lock); \ - \ - void name##_local_unlock(void) { \ - arch_spinlock_t *lock; \ - rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ - lock = &__get_cpu_var(name##_lock); \ - arch_spin_unlock(lock); \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_local_unlock); \ - \ - void name##_local_lock_cpu(int cpu) { \ - arch_spinlock_t *lock; \ - preempt_disable(); \ - rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ - lock = &per_cpu(name##_lock, cpu); \ - arch_spin_lock(lock); \ - } \ - EXPORT_SYMBOL(name##_local_lock_cpu); \ - \ - void name##_local_unlock_cpu(int cpu) { \ - arch_spinlock_t *lock; \ - rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ - lock = &per_cpu(name##_lock, cpu); \ - arch_spin_unlock(lock); \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_local_unlock_cpu); \ - \ - void name##_global_lock(void) { \ - int i; \ - preempt_disable(); \ - rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_lock(lock); \ - } \ - } \ - EXPORT_SYMBOL(name##_global_lock); \ - \ - void name##_global_unlock(void) { \ - int i; \ - rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_unlock(lock); \ - } \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_global_unlock); + DEFINE_LGLOCK_LOCKDEP(name); \ + DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \ + = __ARCH_SPIN_LOCK_UNLOCKED; \ + struct lglock name = { .lock = &name ## _lock } + +void lg_lock_init(struct lglock *lg, char *name); +void lg_local_lock(struct lglock *lg); +void lg_local_unlock(struct lglock *lg); +void lg_local_lock_cpu(struct lglock *lg, int cpu); +void lg_local_unlock_cpu(struct lglock *lg, int cpu); +void lg_global_lock(struct lglock *lg); +void lg_global_unlock(struct lglock *lg); + #endif diff --git a/kernel/Makefile b/kernel/Makefile --- a/kernel/Makefile +++ b/kernel/Makefile @@ -10,7 +10,7 @@ obj-y = fork.o exec_domain.o panic.o kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o cred.o \ - async.o range.o groups.o + async.o range.o groups.o lglock.o ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files diff --git a/kernel/lglock.c b/kernel/lglock.c new file mode 100644 --- /dev/null +++ b/kernel/lglock.c @@ -0,0 +1,89 @@ +/* See include/linux/lglock.h for description */ +#include <linux/module.h> +#include <linux/lglock.h> +#include <linux/cpu.h> +#include <linux/string.h> + +/* + * Note there is no uninit, so lglocks cannot be defined in + * modules (but it's fine to use them from there) + * Could be added though, just undo lg_lock_init + */ + +void lg_lock_init(struct lglock *lg, char *name) +{ + LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0); +} +EXPORT_SYMBOL(lg_lock_init); + +void lg_local_lock(struct lglock *lg) +{ + arch_spinlock_t *lock; + + preempt_disable(); + rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); + lock = this_cpu_ptr(lg->lock); + arch_spin_lock(lock); +} +EXPORT_SYMBOL(lg_local_lock); + +void lg_local_unlock(struct lglock *lg) +{ + arch_spinlock_t *lock; + + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock = this_cpu_ptr(lg->lock); + arch_spin_unlock(lock); + preempt_enable(); +} +EXPORT_SYMBOL(lg_local_unlock); + +void lg_local_lock_cpu(struct lglock *lg, int cpu) +{ + arch_spinlock_t *lock; + + preempt_disable(); + rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); + lock = per_cpu_ptr(lg->lock, cpu); + arch_spin_lock(lock); +} +EXPORT_SYMBOL(lg_local_lock_cpu); + +void lg_local_unlock_cpu(struct lglock *lg, int cpu) +{ + arch_spinlock_t *lock; + + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock = per_cpu_ptr(lg->lock, cpu); + arch_spin_unlock(lock); + preempt_enable(); +} +EXPORT_SYMBOL(lg_local_unlock_cpu); + +void lg_global_lock(struct lglock *lg) +{ + int i; + + preempt_disable(); + rwlock_acquire(&lg->lock_dep_map, 0, 0, _RET_IP_); + for_each_possible_cpu(i) { + arch_spinlock_t *lock; + lock = per_cpu_ptr(lg->lock, i); + arch_spin_lock(lock); + } +} +EXPORT_SYMBOL(lg_global_lock); + +void lg_global_unlock(struct lglock *lg) +{ + int i; + + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + for_each_possible_cpu(i) { + arch_spinlock_t *lock; + lock = per_cpu_ptr(lg->lock, i); + arch_spin_unlock(lock); + } + preempt_enable(); +} +EXPORT_SYMBOL(lg_global_unlock); ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH 3/3] brlocks/lglocks: turn into functions 2012-05-07 3:39 ` Rusty Russell @ 2012-05-09 7:35 ` Nick Piggin 2012-05-09 7:35 ` Nick Piggin 1 sibling, 0 replies; 45+ messages in thread From: Nick Piggin @ 2012-05-09 7:35 UTC (permalink / raw) To: Rusty Russell Cc: Nick Piggin, Andi Kleen, Andrew Morton, linux-kernel, Alexander Viro, Srivatsa S. Bhat, linux-fsdevel On 7 May 2012 13:39, Rusty Russell <rusty@rustcorp.com.au> wrote: > On Fri, 20 Apr 2012 21:21:49 +1000, Nick Piggin <npiggin@kernel.dk> wrote: >> This still not merged? > > No, I've been away. I've put it in -next for tomorrow, though I'm not > sure what the best way to get it to Linus next merge window. > >> There is a reason, which is performance. Extra function call, but also >> IIRC the percpu accessor was not so fast doing it this way. Maybe >> that's improved... >> >> So what's the performance difference? > > What benchmarks you usually run? Feel free to try it out and report > back; I only have small hardware here. Nothing big. The actual scalability should be unchanged, because you're not going to be dirtying any shared cachelines. It's the use of dynamic percpu allocator and out of line calls etc which will slow down straight line performance. How many microseconds does it take to stat("./dir1/dir2/myfile"), for example? `git diff` of a large tree, for something more realistic but still going to exercise that path. ^ permalink raw reply [flat|nested] 45+ messages in thread
* Re: [PATCH 3/3] brlocks/lglocks: turn into functions @ 2012-05-09 7:35 ` Nick Piggin 0 siblings, 0 replies; 45+ messages in thread From: Nick Piggin @ 2012-05-09 7:35 UTC (permalink / raw) To: Rusty Russell Cc: Nick Piggin, Andi Kleen, Andrew Morton, linux-kernel, Alexander Viro, Srivatsa S. Bhat, linux-fsdevel On 7 May 2012 13:39, Rusty Russell <rusty@rustcorp.com.au> wrote: > On Fri, 20 Apr 2012 21:21:49 +1000, Nick Piggin <npiggin@kernel.dk> wrote: >> This still not merged? > > No, I've been away. I've put it in -next for tomorrow, though I'm not > sure what the best way to get it to Linus next merge window. > >> There is a reason, which is performance. Extra function call, but also >> IIRC the percpu accessor was not so fast doing it this way. Maybe >> that's improved... >> >> So what's the performance difference? > > What benchmarks you usually run? Feel free to try it out and report > back; I only have small hardware here. Nothing big. The actual scalability should be unchanged, because you're not going to be dirtying any shared cachelines. It's the use of dynamic percpu allocator and out of line calls etc which will slow down straight line performance. How many microseconds does it take to stat("./dir1/dir2/myfile"), for example? `git diff` of a large tree, for something more realistic but still going to exercise that path. -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 45+ messages in thread
end of thread, other threads:[~2012-05-09 7:35 UTC | newest] Thread overview: 45+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2012-02-27 23:22 [PATCH] cpumask: fix lg_lock/br_lock Rusty Russell 2012-02-27 23:53 ` Andrew Morton 2012-02-27 23:53 ` Andrew Morton 2012-02-28 8:43 ` Ingo Molnar 2012-02-28 11:25 ` Andi Kleen 2012-02-28 12:51 ` Ingo Molnar 2012-02-28 21:27 ` Andrew Morton 2012-02-29 5:44 ` Srivatsa S. Bhat 2012-02-29 9:17 ` Ingo Molnar 2012-02-29 11:12 ` Srivatsa S. Bhat 2012-03-01 7:38 ` Ingo Molnar 2012-03-01 9:15 ` Srivatsa S. Bhat 2012-03-01 9:45 ` Ingo Molnar 2012-03-01 9:56 ` Srivatsa S. Bhat 2012-03-01 8:12 ` Srivatsa S. Bhat 2012-03-01 8:24 ` Srivatsa S. Bhat 2012-03-01 8:12 ` Srivatsa S. Bhat 2012-03-01 8:15 ` [PATCH 1/3] CPU hotplug: Fix issues with callback registration Srivatsa S. Bhat 2012-03-01 8:27 ` Srivatsa S. Bhat 2012-03-01 8:15 ` Srivatsa S. Bhat 2012-03-01 8:16 ` [PATCH 2/3] CPU hotplug, arch/powerpc: Fix CPU hotplug " Srivatsa S. Bhat 2012-03-01 8:28 ` Srivatsa S. Bhat 2012-03-01 8:16 ` Srivatsa S. Bhat 2012-03-01 8:18 ` [PATCH 3/3] CPU hotplug, arch/sparc: " Srivatsa S. Bhat 2012-03-01 8:30 ` Srivatsa S. Bhat 2012-03-01 8:18 ` Srivatsa S. Bhat 2012-02-29 8:29 ` [PATCH] cpumask: fix lg_lock/br_lock Ingo Molnar 2012-02-29 8:58 ` Peter Zijlstra 2012-02-29 9:32 ` Ingo Molnar 2012-02-28 11:24 ` Andi Kleen 2012-03-05 7:02 ` Rusty Russell 2012-03-05 7:03 ` [PATCH 1/3] lglock: remove online variants of lock Rusty Russell 2012-04-20 11:12 ` Nick Piggin 2012-03-05 7:04 ` [PATCH 2/3] brlocks/lglocks: API cleanups Rusty Russell 2012-03-05 7:05 ` [PATCH 3/3] brlocks/lglocks: turn into functions Rusty Russell 2012-04-20 11:21 ` Nick Piggin 2012-05-07 3:39 ` Rusty Russell 2012-05-07 5:46 ` Al Viro 2012-05-08 3:59 ` [PATCH 1/3] lglock: remove online variants of lock Rusty Russell 2012-05-08 4:50 ` Al Viro 2012-05-08 6:12 ` Rusty Russell 2012-05-08 4:02 ` [PATCH 2/3] brlocks/lglocks: API cleanups Rusty Russell 2012-05-08 4:02 ` [PATCH 3/3] brlocks/lglocks: turn into functions Rusty Russell 2012-05-09 7:35 ` Nick Piggin 2012-05-09 7:35 ` Nick Piggin
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.