From: david singleton <dsingleton@mvista.com>
To: Andrew Morton <akpm@osdl.org>
Cc: mingo@elte.hu, linux-kernel@vger.kernel.org, drepper@gmail.com
Subject: [robust-futex-5] futex: robust futex support
Date: Thu, 19 Jan 2006 16:47:03 -0800 [thread overview]
Message-ID: <46429B1C-894E-11DA-ABB2-000A959BB91E@mvista.com> (raw)
In-Reply-To: <20060118212256.1553b0ec.akpm@osdl.org>
[-- Attachment #1: Type: text/plain, Size: 586 bytes --]
Andrew,
I have incorporated the changes you suggested, with the exception of
the check
for a NULL mm in exit_futex. Apparently there is a task that exits in
the boot path
that has a null mm and causes the kernel to crash on boot without this
check.
fs/inode.c | 2
include/linux/fs.h | 4
include/linux/futex.h | 33 ++++
init/Kconfig | 9 +
kernel/exit.c | 2
kernel/futex.c | 398
++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 448 insertions(+)
David
[-- Attachment #2: robust-futex-5 --]
[-- Type: application/octet-stream, Size: 16275 bytes --]
Signed-off-by: David Singleton <dsingleton@mvista.com>
Incorporated changes suggested by Andrew Morton
fs/inode.c | 2
include/linux/fs.h | 4
include/linux/futex.h | 33 ++++
init/Kconfig | 9 +
kernel/exit.c | 2
kernel/futex.c | 398 ++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 448 insertions(+)
Index: linux-2.6.15/include/linux/fs.h
===================================================================
--- linux-2.6.15.orig/include/linux/fs.h
+++ linux-2.6.15/include/linux/fs.h
@@ -9,6 +9,7 @@
#include <linux/config.h>
#include <linux/limits.h>
#include <linux/ioctl.h>
+#include <linux/futex.h>
/*
* It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -383,6 +384,9 @@ struct address_space {
spinlock_t private_lock; /* for use by the address_space */
struct list_head private_list; /* ditto */
struct address_space *assoc_mapping; /* ditto */
+#ifdef CONFIG_ROBUST_FUTEX
+ struct futex_head *robust_head; /* list of robust futexes */
+#endif
} __attribute__((aligned(sizeof(long))));
/*
* On most architectures that alignment is already the case; but
Index: linux-2.6.15/include/linux/futex.h
===================================================================
--- linux-2.6.15.orig/include/linux/futex.h
+++ linux-2.6.15/include/linux/futex.h
@@ -10,6 +10,38 @@
#define FUTEX_REQUEUE 3
#define FUTEX_CMP_REQUEUE 4
#define FUTEX_WAKE_OP 5
+#define FUTEX_REGISTER 6
+#define FUTEX_DEREGISTER 7
+#define FUTEX_RECOVER 8
+
+#define FUTEX_WAITERS 0x80000000
+#define FUTEX_OWNER_DIED 0x40000000
+#define FUTEX_NOT_RECOVERABLE 0x20000000
+#define FUTEX_FLAGS (FUTEX_WAITERS | FUTEX_OWNER_DIED | FUTEX_NOT_RECOVERABLE)
+#define FUTEX_PID ~(FUTEX_FLAGS)
+
+#define FUTEX_ATTR_SHARED 0x10000000
+
+#ifdef __KERNEL__
+#include <linux/list.h>
+#include <linux/mutex.h>
+
+#ifdef CONFIG_ROBUST_FUTEX
+
+struct futex_head {
+ struct list_head robust_list;
+ struct mutex robust_mutex;
+};
+
+struct inode;
+struct task_struct;
+extern void futex_free_robust_list(struct inode *inode);
+extern void exit_futex(struct task_struct *tsk);
+#else
+# define futex_free_robust_list(a) do { } while (0)
+# define exit_futex(b) do { } while (0)
+#define futex_init_inode(a) do { } while (0)
+#endif
long do_futex(unsigned long uaddr, int op, int val,
unsigned long timeout, unsigned long uaddr2, int val2,
@@ -41,3 +73,4 @@ long do_futex(unsigned long uaddr, int o
| ((oparg & 0xfff) << 12) | (cmparg & 0xfff))
#endif
+#endif
Index: linux-2.6.15/kernel/exit.c
===================================================================
--- linux-2.6.15.orig/kernel/exit.c
+++ linux-2.6.15/kernel/exit.c
@@ -31,6 +31,7 @@
#include <linux/signal.h>
#include <linux/cn_proc.h>
#include <linux/mutex.h>
+#include <linux/futex.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -847,6 +848,7 @@ fastcall NORET_TYPE void do_exit(long co
exit_itimers(tsk->signal);
acct_process(code);
}
+ exit_futex(tsk);
exit_mm(tsk);
exit_sem(tsk);
Index: linux-2.6.15/kernel/futex.c
===================================================================
--- linux-2.6.15.orig/kernel/futex.c
+++ linux-2.6.15/kernel/futex.c
@@ -8,6 +8,9 @@
* Removed page pinning, fix privately mapped COW pages and other cleanups
* (C) Copyright 2003, 2004 Jamie Lokier
*
+ * Robust futexes added by Todd Kneisel
+ * (C) Copyright 2005, Bull HN.
+ *
* Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
* enough at me, Linus for the original (flawed) idea, Matthew
* Kirkwood for proof-of-concept implementation.
@@ -40,7 +43,9 @@
#include <linux/pagemap.h>
#include <linux/syscalls.h>
#include <linux/signal.h>
+#include <linux/mutex.h>
#include <asm/futex.h>
+#include <asm/uaccess.h>
#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
@@ -829,6 +834,382 @@ error:
goto out;
}
+#ifdef CONFIG_ROBUST_FUTEX
+/*
+ * Robust futexes provide a locking mechanism that can be shared between
+ * user mode processes. The major difference between robust futexes and
+ * regular futexes is that when the owner of a robust futex dies, the
+ * next task waiting on the futex will be awakened, will get ownership
+ * of the futex lock, and will receive the error status EOWNERDEAD.
+ *
+ * A robust futex is a 32 bit integer stored in user mode shared memory.
+ * Bit 31 indicates that there are tasks waiting on the futex.
+ * Bit 30 indicates that the task that owned the futex has died.
+ * Bit 29 indicates that the futex is not recoverable and cannot be used.
+ * Bits 0-28 are the pid of the task that owns the futex lock, or zero if
+ * the futex is not locked.
+ */
+
+static kmem_cache_t *robust_futex_cachep;
+static kmem_cache_t *file_futex_cachep;
+/*
+ * Used to track registered robust futexes. Attached to linked list in inodes.
+ */
+struct futex_robust {
+ struct list_head list;
+ union futex_key key;
+};
+
+/**
+ * futex_free_robust_list - release the list of registered futexes.
+ * @inode: inode that may be a memory mapped file
+ *
+ * Called from dput() when a dentry reference count reaches zero.
+ * If the dentry is associated with a memory mapped file, then
+ * release the list of registered robust futexes that are contained
+ * in that mapping.
+ */
+void futex_free_robust_list(struct inode *inode)
+{
+ struct address_space *mapping = inode->i_mapping;
+ struct list_head *head;
+ struct futex_robust *this, *next;
+ struct futex_head *futex_head = NULL;
+
+ futex_head = mapping->robust_head;
+
+ if (futex_head == NULL)
+ return;
+
+ if (list_empty(&futex_head->robust_list))
+ return;
+
+ mutex_lock(&futex_head->robust_mutex);
+
+ head = &futex_head->robust_list;
+
+ list_for_each_entry_safe(this, next, head, list) {
+ kmem_cache_free(robust_futex_cachep, this);
+ }
+
+ mutex_unlock(&futex_head->robust_mutex);
+
+ kmem_cache_free(file_futex_cachep, futex_head);
+ mapping->robust_head = NULL;
+
+ return;
+}
+
+/**
+ * get_private_uaddr - convert a private futex_key to a user addr
+ * @key: the futex_key that identifies a futex.
+ *
+ * Private futex_keys identify a futex that is in non-shared memory.
+ * Robust futexes should never result in private futex_keys, but keep
+ * this code for completeness.
+ * Returns zero if futex is not contained in current task's mm
+ */
+static unsigned long get_private_uaddr(union futex_key *key)
+{
+ unsigned long uaddr = 0;
+
+ if (key->private.mm == current->mm)
+ uaddr = key->private.uaddr;
+ return uaddr;
+}
+
+/**
+ * get_shared_uaddr - convert a shared futex_key to a user addr.
+ * @key: a futex_key that identifies a futex.
+ * @vma: a vma that may contain the futex
+ *
+ * Shared futex_keys identify a futex that is contained in a vma,
+ * and so may be shared.
+ * Returns zero if futex is not contained in @vma
+ */
+static unsigned long get_shared_uaddr(union futex_key *key,
+ struct vm_area_struct *vma)
+{
+ unsigned long uaddr = 0;
+ unsigned long tmpaddr;
+ struct address_space *mapping;
+
+ mapping = vma->vm_file->f_mapping;
+ if (key->shared.inode == mapping->host) {
+ tmpaddr = ((key->shared.pgoff - vma->vm_pgoff) << PAGE_SHIFT)
+ + (key->shared.offset & ~0x1)
+ + vma->vm_start;
+ if (tmpaddr >= vma->vm_start && tmpaddr < vma->vm_end)
+ uaddr = tmpaddr;
+ }
+
+ return uaddr;
+}
+
+/**
+ * get_futex_uaddr - convert a futex_key to a user addr.
+ * @key: futex_key that identifies a futex
+ * @vma: vma that may contain the futex
+ *
+ * Converts both shared and private futex_keys.
+ * Returns zero if futex is not contained in @vma or in the current
+ * task's mm.
+ */
+static unsigned long get_futex_uaddr(union futex_key *key,
+ struct vm_area_struct *vma)
+{
+ unsigned long uaddr;
+
+ if ((key->both.offset & 0x1) == 0)
+ uaddr = get_private_uaddr(key);
+ else
+ uaddr = get_shared_uaddr(key,vma);
+
+ return uaddr;
+}
+
+/**
+ * release_owned_futex - find futexes owned by the current task
+ * @tsk: task that is exiting
+ * @vma: vma containing robust futexes
+ * @head: list head for list of robust futexes
+ * @mutex: mutex that protects the list
+ *
+ * Walk the list of registered robust futexes for this @vma,
+ * setting the %FUTEX_OWNER_DIED flag on those futexes owned
+ * by the current, exiting task.
+ */
+static void
+release_owned_futex(struct task_struct *tsk, struct vm_area_struct *vma,
+ struct futex_head *robust)
+{
+ struct futex_robust *this, *next;
+ struct mutex *mutex = &robust->robust_mutex;
+ struct list_head *head = &robust->robust_list;
+ unsigned long uaddr;
+ int value;
+
+ mutex_lock(mutex);
+
+ list_for_each_entry_safe(this, next, head, list) {
+
+ uaddr = get_futex_uaddr(&this->key, vma);
+ if (uaddr == 0)
+ continue;
+
+ mutex_unlock(mutex);
+ up_read(&tsk->mm->mmap_sem);
+ get_user(value, (int __user *)uaddr);
+ if ((value & FUTEX_PID) == tsk->pid) {
+ value |= FUTEX_OWNER_DIED;
+ put_user(value, (int *__user)uaddr);
+ futex_wake(uaddr, 1);
+ }
+ down_read(&tsk->mm->mmap_sem);
+ mutex_lock(mutex);
+ }
+
+ mutex_unlock(mutex);
+}
+
+/**
+ * exit_futex - futex processing when a task exits.
+ *
+ * Called from do_exit() when a task exits. Mark all robust futexes
+ * that are owned by the current terminating task as %FUTEX_OWNER_DIED.
+ */
+
+void exit_futex(struct task_struct *tsk)
+{
+ struct mm_struct *mm = tsk->mm;
+ struct list_head *robust;
+ struct futex_head *head;
+ struct vm_area_struct *vma;
+ struct address_space *addr;
+
+ if (mm == NULL)
+ return;
+
+ down_read(&mm->mmap_sem);
+
+ for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
+ if (vma->vm_file == NULL)
+ continue;
+
+ addr = vma->vm_file->f_mapping;
+ if (addr == NULL)
+ continue;
+
+ if (addr->robust_head == NULL)
+ continue;
+
+ head = addr->robust_head;
+ robust = &head->robust_list;
+
+ if (list_empty(robust))
+ continue;
+
+ release_owned_futex(tsk, vma, head);
+ }
+
+ up_read(&mm->mmap_sem);
+}
+
+static void init_robust_list(struct address_space *f, struct futex_head *head)
+{
+ f->robust_head = head;
+ INIT_LIST_HEAD(&f->robust_head->robust_list);
+ mutex_init(&f->robust_head->robust_mutex);
+}
+
+/**
+ * futex_register - Record the existence of a robust futex in a vma.
+ * @uaddr: user space address of the robust futex
+ *
+ * Called from user space (through sys_futex syscall) when a robust
+ * futex is created. Looks up the vma that contains the futex and
+ * adds an entry to the list of all robust futexes in the vma.
+ */
+static int futex_register(unsigned long uaddr, unsigned int attr)
+{
+ int ret = 0;
+ struct futex_robust *robust;
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ struct address_space *addr = NULL;
+ struct list_head *head = NULL;
+ struct mutex *mutex = NULL;
+ struct futex_head *file_futex = NULL;
+
+ if ((attr & FUTEX_ATTR_SHARED) == 0)
+ return -EADDRNOTAVAIL;
+
+ down_read(&mm->mmap_sem);
+
+ vma = find_extend_vma(mm, uaddr);
+
+ if (vma->vm_file) {
+ addr = vma->vm_file->f_mapping;
+ if (addr == NULL) {
+ ret = -EADDRNOTAVAIL;
+ goto out;
+ }
+ robust = kmem_cache_alloc(robust_futex_cachep, GFP_KERNEL);
+ if (!robust) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (addr->robust_head == NULL) {
+ file_futex = kmem_cache_alloc(file_futex_cachep, GFP_KERNEL);
+ if (!file_futex) {
+ kmem_cache_free(robust_futex_cachep, robust);
+ ret = -ENOMEM;
+ goto out;
+ }
+ init_robust_list(addr, file_futex);
+ }
+ head = &addr->robust_head->robust_list;
+ mutex = &addr->robust_head->robust_mutex;
+ } else {
+ ret = -EADDRNOTAVAIL;
+ goto out;
+ }
+
+ mutex_lock(mutex);
+ list_add_tail(&robust->list, head);
+ mutex_unlock(mutex);
+
+out:
+ up_read(&mm->mmap_sem);
+
+ return ret;
+}
+
+/**
+ * futex_deregister - Delete robust futex registration from a vma
+ * @uaddr: user space address of the robust futex
+ *
+ * Called from user space (through sys_futex syscall) when a robust
+ * futex is destroyed. Looks up the vma that contains the futex and
+ * removes the futex entry from the list of all robust futexes in
+ * the vma.
+ */
+static int futex_deregister(unsigned long uaddr)
+{
+ union futex_key key;
+ struct mm_struct *mm = current->mm;
+ struct list_head *head = NULL;
+ struct mutex *mutex = NULL;
+ struct vm_area_struct *vma;
+ struct address_space *addr;
+ struct futex_robust *this, *next;
+ int ret;
+
+ down_read(&mm->mmap_sem);
+
+ ret = get_futex_key(uaddr, &key);
+ if (unlikely(ret != 0))
+ goto out;
+ vma = find_extend_vma(mm, uaddr);
+ if (vma->vm_file) {
+ addr = vma->vm_file->f_mapping;
+ if (addr && addr->robust_head) {
+ mutex = &addr->robust_head->robust_mutex;
+ head = &addr->robust_head->robust_list;
+ }
+ }
+ if (head == NULL) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ mutex_lock(mutex);
+
+ list_for_each_entry_safe(this, next, head, list) {
+ if (match_futex(&this->key, &key)) {
+ futex_wake(uaddr, 1);
+ list_del(&this->list);
+ kmem_cache_free(robust_futex_cachep, this);
+ break;
+ }
+ }
+
+ mutex_unlock(mutex);
+out:
+ up_read(&mm->mmap_sem);
+ return ret;
+}
+
+/**
+ * futex_recover - Recover a futex after its owner died
+ * @uaddr: user space address of the robust futex
+ *
+ * Called from user space (through sys_futex syscall).
+ * When a task dies while owning a robust futex, the futex is
+ * marked with %FUTEX_OWNER_DIED and ownership is transferred
+ * to the next waiting task. That task can choose to restore
+ * the futex to a useful state by calling this function.
+ */
+static int futex_recover(unsigned long uaddr)
+{
+ int ret = 0;
+ int value = 0;
+ union futex_key key;
+
+ down_read(¤t->mm->mmap_sem);
+ ret = get_futex_key(uaddr, &key);
+ up_read(¤t->mm->mmap_sem);
+ if (ret != 0)
+ return ret;
+
+ if (get_user(value, (int *__user)uaddr))
+ return ret;
+
+ value &= ~FUTEX_OWNER_DIED;
+ return put_user(value, (int *__user)uaddr);
+}
+#endif
+
long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout,
unsigned long uaddr2, int val2, int val3)
{
@@ -854,6 +1235,17 @@ long do_futex(unsigned long uaddr, int o
case FUTEX_WAKE_OP:
ret = futex_wake_op(uaddr, uaddr2, val, val2, val3);
break;
+#ifdef CONFIG_ROBUST_FUTEX
+ case FUTEX_REGISTER:
+ ret = futex_register(uaddr, val);
+ break;
+ case FUTEX_DEREGISTER:
+ ret = futex_deregister(uaddr);
+ break;
+ case FUTEX_RECOVER:
+ ret = futex_recover(uaddr);
+ break;
+#endif
default:
ret = -ENOSYS;
}
@@ -901,6 +1293,12 @@ static int __init init(void)
{
unsigned int i;
+#ifdef CONFIG_ROBUST_FUTEX
+ robust_futex_cachep = kmem_cache_create("robust_futex",
+ sizeof(struct futex_robust), 0, 0, NULL, NULL);
+ file_futex_cachep = kmem_cache_create("file_futex",
+ sizeof(struct futex_head), 0, 0, NULL, NULL);
+#endif
register_filesystem(&futex_fs_type);
futex_mnt = kern_mount(&futex_fs_type);
Index: linux-2.6.15/init/Kconfig
===================================================================
--- linux-2.6.15.orig/init/Kconfig
+++ linux-2.6.15/init/Kconfig
@@ -348,6 +348,15 @@ config FUTEX
support for "fast userspace mutexes". The resulting kernel may not
run glibc-based applications correctly.
+config ROBUST_FUTEX
+ bool "Enable robust futex support"
+ depends on FUTEX
+ default y
+ help
+ Enable this option if you want to use robust user space mutexes.
+ Enabling this option slows down the exit path of the kernel for
+ all processes. Robust futexes will run glibc-based applications correctly.
+
config EPOLL
bool "Enable eventpoll support" if EMBEDDED
default y
Index: linux-2.6.15/fs/inode.c
===================================================================
--- linux-2.6.15.orig/fs/inode.c
+++ linux-2.6.15/fs/inode.c
@@ -23,6 +23,7 @@
#include <linux/bootmem.h>
#include <linux/inotify.h>
#include <linux/mount.h>
+#include <linux/futex.h>
/*
* This is needed for the following functions:
@@ -175,6 +176,7 @@ void destroy_inode(struct inode *inode)
if (inode_has_buffers(inode))
BUG();
security_inode_free(inode);
+ futex_free_robust_list(inode);
if (inode->i_sb->s_op->destroy_inode)
inode->i_sb->s_op->destroy_inode(inode);
else
[-- Attachment #3: Type: text/plain, Size: 9008 bytes --]
On Jan 18, 2006, at 9:22 PM, Andrew Morton wrote:
> david singleton <dsingleton@mvista.com> wrote:
>>
>> [-ENOCHANGELOG]
>>
>
>> @@ -383,6 +384,7 @@ struct address_space {
>> spinlock_t private_lock; /* for use by the address_space */
>> struct list_head private_list; /* ditto */
>> struct address_space *assoc_mapping; /* ditto */
>> + struct futex_head *robust_head; /* list of robust futexes */
>> } __attribute__((aligned(sizeof(long))));
>
> It's worth putting this field inside CONFIG_ROBUST_FUTEX
>
>> +#else
>> +# define futex_free_robust_list(a) do { } while (0)
>> +# define exit_futex(b) do { } while (0)
>> +#define futex_init_inode(a) do { } while (0)
>> +#endif
>
> Indenting went wonky.
>
>> +/*
>> + * Robust futexes provide a locking mechanism that can be shared
>> between
>> + * user mode processes. The major difference between robust futexes
>> and
>> + * regular futexes is that when the owner of a robust futex dies, the
>> + * next task waiting on the futex will be awakened, will get
>> ownership
>> + * of the futex lock, and will receive the error status EOWNERDEAD.
>> + *
>> + * A robust futex is a 32 bit integer stored in user mode shared
>> memory.
>> + * Bit 31 indicates that there are tasks waiting on the futex.
>> + * Bit 30 indicates that the task that owned the futex has died.
>> + * Bit 29 indicates that the futex is not recoverable and cannot be
>> used.
>> + * Bits 0-28 are the pid of the task that owns the futex lock, or
>> zero if
>> + * the futex is not locked.
>> + */
>
> Nice comment!
>
>> +/**
>> + * futex_free_robust_list - release the list of registered futexes.
>> + * @inode: inode that may be a memory mapped file
>> + *
>> + * Called from dput() when a dentry reference count reaches zero.
>> + * If the dentry is associated with a memory mapped file, then
>> + * release the list of registered robust futexes that are contained
>> + * in that mapping.
>> + */
>> +void futex_free_robust_list(struct inode *inode)
>> +{
>> + struct address_space *mapping;
>> + struct list_head *head;
>> + struct futex_robust *this, *next;
>> + struct futex_head *futex_head = NULL;
>> +
>> + if (inode == NULL)
>> + return;
>
> Is this test needed?
>
> This function is called when a dentry's refcount falls to zero. But
> there
> could be other refs to this inode which might get upset at having their
> robust futexes thrown away. Shouldn't this be based on inode
> destruction
> rather than dentry?
>
>> + mapping = inode->i_mapping;
>> + if (mapping == NULL)
>> + return;
>
> inodes never have a null ->i_mapping
>
>> + if (mapping->robust_head == NULL)
>> + return;
>> +
>> + if (list_empty(&mapping->robust_head->robust_list))
>> + return;
>> +
>> + mutex_lock(&mapping->robust_head->robust_mutex);
>> +
>> + head = &mapping->robust_head->robust_list;
>> + futex_head = mapping->robust_head;
>> +
>> + list_for_each_entry_safe(this, next, head, list) {
>> + list_del(&this->list);
>> + kmem_cache_free(robust_futex_cachep, this);
>> + }
>
> If we're throwing away the entire contents of the list, there's no
> need to
> detach items as we go.
>
>> +/**
>> + * get_shared_uaddr - convert a shared futex_key to a user addr.
>> + * @key: a futex_key that identifies a futex.
>> + * @vma: a vma that may contain the futex
>> + *
>> + * Shared futex_keys identify a futex that is contained in a vma,
>> + * and so may be shared.
>> + * Returns zero if futex is not contained in @vma
>> + */
>> +static unsigned long get_shared_uaddr(union futex_key *key,
>> + struct vm_area_struct *vma)
>> +{
>> + unsigned long uaddr = 0;
>> + unsigned long tmpaddr;
>> + struct address_space *mapping;
>> +
>> + mapping = vma->vm_file->f_mapping;
>> + if (key->shared.inode == mapping->host) {
>> + tmpaddr = ((key->shared.pgoff - vma->vm_pgoff) << PAGE_SHIFT)
>> + + (key->shared.offset & ~0x1)
>> + + vma->vm_start;
>> + if (tmpaddr >= vma->vm_start && tmpaddr < vma->vm_end)
>> + uaddr = tmpaddr;
>> + }
>> +
>> + return uaddr;
>> +}
>
> What locking guarantees that vma->vm_file->f_mapping continues to
> exist?
> Bear in mind that another thread which shares this thread's files can
> close
> fds, unlink files, munmap files, etc.
This is called under the mmap_sem.
>
>> +/**
>> + * find_owned_futex - find futexes owned by the current task
>> + * @tsk: task that is exiting
>> + * @vma: vma containing robust futexes
>> + * @head: list head for list of robust futexes
>> + * @mutex: mutex that protects the list
>> + *
>> + * Walk the list of registered robust futexes for this @vma,
>> + * setting the %FUTEX_OWNER_DIED flag on those futexes owned
>> + * by the current, exiting task.
>> + */
>> +static void
>> +find_owned_futex(struct task_struct *tsk, struct vm_area_struct *vma,
>> + struct list_head *head, struct mutex *mutex)
>> +{
>> + struct futex_robust *this, *next;
>> + unsigned long uaddr;
>> + int value;
>> +
>> + mutex_lock(mutex);
>> +
>> + list_for_each_entry_safe(this, next, head, list) {
>> +
>> + uaddr = get_futex_uaddr(&this->key, vma);
>> + if (uaddr == 0)
>> + continue;
>> +
>> + mutex_unlock(mutex);
>> + up_read(&tsk->mm->mmap_sem);
>> + get_user(value, (int __user *)uaddr);
>> + if ((value & FUTEX_PID) == tsk->pid) {
>> + value |= FUTEX_OWNER_DIED;
>> + futex_wake(uaddr, 1);
>> + put_user(value, (int *__user)uaddr);
>
> That's a bit unconventional - normally we'd perform the
> other-task-visible
> write and _then_ wake up the other task. What prevents the woken task
> from
> waking then seeing the not-yet-written-to value?
>
>> +void exit_futex(struct task_struct *tsk)
>> +{
>> + struct mm_struct *mm;
>> + struct list_head *robust;
>> + struct vm_area_struct *vma;
>> + struct mutex *mutex;
>> +
>> + mm = current->mm;
>> + if (mm==NULL)
>> + return;
>> +
>> + down_read(&mm->mmap_sem);
>> +
>> + for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
>> + if (vma->vm_file == NULL)
>> + continue;
>> +
>> + if (vma->vm_file->f_mapping == NULL)
>> + continue;
>> +
>> + if (vma->vm_file->f_mapping->robust_head == NULL)
>> + continue;
>> +
>> + robust = &vma->vm_file->f_mapping->robust_head->robust_list;
>> +
>> + if (list_empty(robust))
>> + continue;
>> +
>> + mutex = &vma->vm_file->f_mapping->robust_head->robust_mutex;
>> +
>> + find_owned_futex(tsk, vma, robust, mutex);
>
> The name "find_owned_mutex" is a bit misleading - it implies that it is
> some lookup function which has no side-effects. But find_owned_futex()
> actually makes significant state changes.
>
>> +
>> + if (vma->vm_file && vma->vm_file->f_mapping) {
>> + if (vma->vm_file->f_mapping->robust_head == NULL)
>> + init_robust_list(vma->vm_file->f_mapping, file_futex);
>> + else
>> + kmem_cache_free(file_futex_cachep, file_futex);
>> + head = &vma->vm_file->f_mapping->robust_head->robust_list;
>> + mutex = &vma->vm_file->f_mapping->robust_head->robust_mutex;
>
> The patch in general does an awful lot of these lengthy pointer chases.
> It's more readable to create temporaries to avoid this. Sometimes it
> generates better code, too.
>
> The kmem_cache_free above is a bit sad. It means that in the common
> case
> we'll allocate a file_futex and then free it again. It's legal to do
> GFP_KERNEL allocations within mmap_sem, so I suggest you switch this to
> allocate-only-if-needed.
>
>> + } else {
>> + ret = -EADDRNOTAVAIL;
>> + kmem_cache_free(robust_futex_cachep, robust);
>> + kmem_cache_free(file_futex_cachep, file_futex);
>> + goto out_unlock;
>> + }
>
> Again, we could have checked whether we needed to allocate these before
> allocating them.
>
>> + if (vma->vm_file && vma->vm_file->f_mapping &&
>> + vma->vm_file->f_mapping->robust_head) {
>> + mutex = &vma->vm_file->f_mapping->robust_head->robust_mutex;
>> + head = &vma->vm_file->f_mapping->robust_head->robust_list;
>
> Pointer chasing, again...
>
>> +
>> + list_for_each_entry_safe(this, next, head, list) {
>> + if (match_futex (&this->key, &key)) {
> ^
> A stray space got in there.
>
>>
>> +#ifdef CONFIG_ROBUST_FUTEX
>> + robust_futex_cachep = kmem_cache_create("robust_futex",
>> sizeof(struct futex_robust), 0, 0, NULL, NULL);
>> + file_futex_cachep = kmem_cache_create("file_futex", sizeof(struct
>> futex_head), 0, 0, NULL, NULL);
>> +#endif
>
> A bit of 80-column wrapping needed there please.
>
> Are futex_heads likely to be allocated in sufficient volume to justify
> their own slab cache, rather than using kmalloc()? The speed is the
> same -
> if anything, kmalloc() will be faster because its text and data are
> more
> likely to be in CPU cache.
My tests typically use a slab to a slab and a half of futex_heads. In
the real world
I honestly don't know how many will be allocated. Can we leave it in
it's own
cache for testing? It sure helps debugging if the entries in the
futex_head cache match
exactly what the test application is using.
>
next prev parent reply other threads:[~2006-01-20 0:47 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-01-14 1:00 [robust-futex-1] futex: robust futex support David Singleton
2006-01-15 0:02 ` Ulrich Drepper
2006-01-15 0:04 ` david singleton
2006-01-15 5:18 ` Ulrich Drepper
2006-01-15 20:00 ` David Singleton
2006-01-17 2:27 ` [robust-futex-3] " david singleton
2006-01-17 17:32 ` Ulrich Drepper
2006-01-17 17:50 ` Ulrich Drepper
2006-01-19 2:26 ` [robust-futex-4] " david singleton
2006-01-19 5:22 ` Andrew Morton
2006-01-20 0:47 ` david singleton [this message]
2006-01-20 17:41 ` Ingo Oeser
2006-01-20 22:18 ` Andrew Morton
2006-01-21 2:30 ` david singleton
2006-01-23 18:20 ` Todd Kneisel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=46429B1C-894E-11DA-ABB2-000A959BB91E@mvista.com \
--to=dsingleton@mvista.com \
--cc=akpm@osdl.org \
--cc=drepper@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).