linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: david singleton <dsingleton@mvista.com>
To: Andrew Morton <akpm@osdl.org>
Cc: mingo@elte.hu, linux-kernel@vger.kernel.org, drepper@gmail.com
Subject: [robust-futex-5] futex: robust futex support
Date: Thu, 19 Jan 2006 16:47:03 -0800	[thread overview]
Message-ID: <46429B1C-894E-11DA-ABB2-000A959BB91E@mvista.com> (raw)
In-Reply-To: <20060118212256.1553b0ec.akpm@osdl.org>

[-- Attachment #1: Type: text/plain, Size: 586 bytes --]

         Andrew,
		I have incorporated the changes you suggested, with the exception of 
the check
	for a NULL mm in exit_futex.  Apparently there is a task that exits in 
the boot path
	that has a null mm and causes the kernel to crash on boot without this 
check.

  fs/inode.c                    |    2
  include/linux/fs.h       |    4
  include/linux/futex.h |   33 ++++
  init/Kconfig                 |    9 +
  kernel/exit.c               |    2
  kernel/futex.c             |  398 
++++++++++++++++++++++++++++++++++++++++++++++++++
  6 files changed, 448 insertions(+)

David


[-- Attachment #2: robust-futex-5 --]
[-- Type: application/octet-stream, Size: 16275 bytes --]

Signed-off-by: David Singleton <dsingleton@mvista.com>

	Incorporated changes suggested by Andrew Morton

 fs/inode.c            |    2 
 include/linux/fs.h    |    4 
 include/linux/futex.h |   33 ++++
 init/Kconfig          |    9 +
 kernel/exit.c         |    2 
 kernel/futex.c        |  398 ++++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 448 insertions(+)

Index: linux-2.6.15/include/linux/fs.h
===================================================================
--- linux-2.6.15.orig/include/linux/fs.h
+++ linux-2.6.15/include/linux/fs.h
@@ -9,6 +9,7 @@
 #include <linux/config.h>
 #include <linux/limits.h>
 #include <linux/ioctl.h>
+#include <linux/futex.h>
 
 /*
  * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -383,6 +384,9 @@ struct address_space {
 	spinlock_t		private_lock;	/* for use by the address_space */
 	struct list_head	private_list;	/* ditto */
 	struct address_space	*assoc_mapping;	/* ditto */
+#ifdef CONFIG_ROBUST_FUTEX
+	struct futex_head	*robust_head;	/* list of robust futexes */
+#endif
 } __attribute__((aligned(sizeof(long))));
 	/*
 	 * On most architectures that alignment is already the case; but
Index: linux-2.6.15/include/linux/futex.h
===================================================================
--- linux-2.6.15.orig/include/linux/futex.h
+++ linux-2.6.15/include/linux/futex.h
@@ -10,6 +10,38 @@
 #define FUTEX_REQUEUE		3
 #define FUTEX_CMP_REQUEUE	4
 #define FUTEX_WAKE_OP		5
+#define FUTEX_REGISTER          6
+#define FUTEX_DEREGISTER        7
+#define FUTEX_RECOVER           8
+
+#define FUTEX_WAITERS				0x80000000
+#define FUTEX_OWNER_DIED			0x40000000
+#define FUTEX_NOT_RECOVERABLE			0x20000000
+#define FUTEX_FLAGS (FUTEX_WAITERS | FUTEX_OWNER_DIED | FUTEX_NOT_RECOVERABLE)
+#define FUTEX_PID                             ~(FUTEX_FLAGS)
+
+#define FUTEX_ATTR_SHARED                       0x10000000
+
+#ifdef __KERNEL__
+#include <linux/list.h>
+#include <linux/mutex.h>
+
+#ifdef CONFIG_ROBUST_FUTEX
+
+struct futex_head {
+	struct list_head robust_list;
+	struct mutex robust_mutex;
+};
+
+struct inode;
+struct task_struct;
+extern void futex_free_robust_list(struct inode *inode);
+extern void exit_futex(struct task_struct *tsk);
+#else
+# define futex_free_robust_list(a)	do { } while (0)
+# define exit_futex(b)			do { } while (0)
+#define futex_init_inode(a)		do { } while (0)
+#endif
 
 long do_futex(unsigned long uaddr, int op, int val,
 		unsigned long timeout, unsigned long uaddr2, int val2,
@@ -41,3 +73,4 @@ long do_futex(unsigned long uaddr, int o
    | ((oparg & 0xfff) << 12) | (cmparg & 0xfff))
 
 #endif
+#endif
Index: linux-2.6.15/kernel/exit.c
===================================================================
--- linux-2.6.15.orig/kernel/exit.c
+++ linux-2.6.15/kernel/exit.c
@@ -31,6 +31,7 @@
 #include <linux/signal.h>
 #include <linux/cn_proc.h>
 #include <linux/mutex.h>
+#include <linux/futex.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -847,6 +848,7 @@ fastcall NORET_TYPE void do_exit(long co
 		exit_itimers(tsk->signal);
 		acct_process(code);
 	}
+	exit_futex(tsk);
 	exit_mm(tsk);
 
 	exit_sem(tsk);
Index: linux-2.6.15/kernel/futex.c
===================================================================
--- linux-2.6.15.orig/kernel/futex.c
+++ linux-2.6.15/kernel/futex.c
@@ -8,6 +8,9 @@
  *  Removed page pinning, fix privately mapped COW pages and other cleanups
  *  (C) Copyright 2003, 2004 Jamie Lokier
  *
+ *  Robust futexes added by Todd Kneisel
+ *  (C) Copyright 2005, Bull HN.
+ *
  *  Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
  *  enough at me, Linus for the original (flawed) idea, Matthew
  *  Kirkwood for proof-of-concept implementation.
@@ -40,7 +43,9 @@
 #include <linux/pagemap.h>
 #include <linux/syscalls.h>
 #include <linux/signal.h>
+#include <linux/mutex.h>
 #include <asm/futex.h>
+#include <asm/uaccess.h>
 
 #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
 
@@ -829,6 +834,382 @@ error:
 	goto out;
 }
 
+#ifdef CONFIG_ROBUST_FUTEX
+/*
+ * Robust futexes provide a locking mechanism that can be shared between
+ * user mode processes. The major difference between robust futexes and
+ * regular futexes is that when the owner of a robust futex dies, the
+ * next task waiting on the futex will be awakened, will get ownership
+ * of the futex lock, and will receive the error status EOWNERDEAD.
+ *
+ * A robust futex is a 32 bit integer stored in user mode shared memory.
+ * Bit 31 indicates that there are tasks waiting on the futex.
+ * Bit 30 indicates that the task that owned the futex has died.
+ * Bit 29 indicates that the futex is not recoverable and cannot be used.
+ * Bits 0-28 are the pid of the task that owns the futex lock, or zero if
+ * the futex is not locked.
+ */
+
+static kmem_cache_t *robust_futex_cachep;
+static kmem_cache_t *file_futex_cachep;
+/*
+ * Used to track registered robust futexes. Attached to linked list in inodes.
+ */
+struct futex_robust {
+	struct list_head list;
+	union futex_key key;
+};
+
+/**
+ * futex_free_robust_list - release the list of registered futexes.
+ * @inode: inode that may be a memory mapped file
+ *
+ * Called from dput() when a dentry reference count reaches zero.
+ * If the dentry is associated with a memory mapped file, then
+ * release the list of registered robust futexes that are contained
+ * in that mapping.
+ */
+void futex_free_robust_list(struct inode *inode)
+{
+	struct address_space *mapping = inode->i_mapping;
+	struct list_head *head;
+ 	struct futex_robust *this, *next;
+	struct futex_head *futex_head = NULL;
+
+	futex_head = mapping->robust_head;
+
+	if (futex_head == NULL)
+		return;
+
+	if (list_empty(&futex_head->robust_list))
+		return;
+
+	mutex_lock(&futex_head->robust_mutex);
+
+	head = &futex_head->robust_list;
+
+	list_for_each_entry_safe(this, next, head, list) {
+		kmem_cache_free(robust_futex_cachep, this);
+	}
+
+	mutex_unlock(&futex_head->robust_mutex);
+
+	kmem_cache_free(file_futex_cachep, futex_head);
+	mapping->robust_head = NULL;
+
+	return;
+}
+
+/**
+ * get_private_uaddr - convert a private futex_key to a user addr
+ * @key: the futex_key that identifies a futex.
+ *
+ * Private futex_keys identify a futex that is in non-shared memory.
+ * Robust futexes should never result in private futex_keys, but keep
+ * this code for completeness.
+ * Returns zero if futex is not contained in current task's mm
+ */
+static unsigned long get_private_uaddr(union futex_key *key)
+{
+	unsigned long uaddr = 0;
+
+	if (key->private.mm == current->mm)
+		uaddr = key->private.uaddr;
+	return uaddr;
+}
+
+/**
+ * get_shared_uaddr - convert a shared futex_key to a user addr.
+ * @key: a futex_key that identifies a futex.
+ * @vma: a vma that may contain the futex
+ *
+ * Shared futex_keys identify a futex that is contained in a vma,
+ * and so may be shared.
+ * Returns zero if futex is not contained in @vma
+ */
+static unsigned long get_shared_uaddr(union futex_key *key,
+				      struct vm_area_struct *vma)
+{
+	unsigned long uaddr = 0;
+	unsigned long tmpaddr;
+	struct address_space *mapping;
+
+	mapping = vma->vm_file->f_mapping;
+	if (key->shared.inode == mapping->host) {
+		tmpaddr = ((key->shared.pgoff - vma->vm_pgoff) << PAGE_SHIFT)
+				+ (key->shared.offset & ~0x1)
+				+ vma->vm_start;
+		if (tmpaddr >= vma->vm_start && tmpaddr < vma->vm_end)
+			uaddr = tmpaddr;
+	}
+
+	return uaddr;
+}
+
+/**
+ * get_futex_uaddr - convert a futex_key to a user addr.
+ * @key: futex_key that identifies a futex
+ * @vma: vma that may contain the futex
+ *
+ * Converts both shared and private futex_keys.
+ * Returns zero if futex is not contained in @vma or in the current
+ * task's mm.
+ */
+static unsigned long get_futex_uaddr(union futex_key *key,
+				     struct vm_area_struct *vma)
+{
+	unsigned long uaddr;
+
+	if ((key->both.offset & 0x1) == 0)
+		uaddr = get_private_uaddr(key);
+	else
+		uaddr = get_shared_uaddr(key,vma);
+
+	return uaddr;
+}
+
+/**
+ * release_owned_futex - find futexes owned by the current task
+ * @tsk: task that is exiting
+ * @vma: vma containing robust futexes
+ * @head: list head for list of robust futexes
+ * @mutex: mutex that protects the list
+ *
+ * Walk the list of registered robust futexes for this @vma,
+ * setting the %FUTEX_OWNER_DIED flag on those futexes owned
+ * by the current, exiting task.
+ */
+static void
+release_owned_futex(struct task_struct *tsk, struct vm_area_struct *vma,
+		    struct futex_head *robust)
+{
+	struct futex_robust *this, *next;
+	struct mutex *mutex = &robust->robust_mutex;
+	struct list_head *head = &robust->robust_list;
+ 	unsigned long uaddr;
+	int value;
+
+	mutex_lock(mutex);
+
+	list_for_each_entry_safe(this, next, head, list) {
+
+		uaddr = get_futex_uaddr(&this->key, vma);
+		if (uaddr == 0)
+			continue;
+
+		mutex_unlock(mutex);
+		up_read(&tsk->mm->mmap_sem);
+		get_user(value, (int __user *)uaddr);
+		if ((value & FUTEX_PID) == tsk->pid) {
+			value |= FUTEX_OWNER_DIED;
+			put_user(value, (int *__user)uaddr);
+			futex_wake(uaddr, 1);
+		}
+		down_read(&tsk->mm->mmap_sem);
+		mutex_lock(mutex);
+	}
+
+	mutex_unlock(mutex);
+}
+
+/**
+ * exit_futex - futex processing when a task exits.
+ *
+ * Called from do_exit() when a task exits. Mark all robust futexes
+ * that are owned by the current terminating task as %FUTEX_OWNER_DIED.
+ */
+
+void exit_futex(struct task_struct *tsk)
+{
+	struct mm_struct *mm = tsk->mm;
+	struct list_head *robust;
+	struct futex_head *head;
+	struct vm_area_struct *vma;
+	struct address_space *addr;
+
+	if (mm == NULL)
+		return;
+
+	down_read(&mm->mmap_sem);
+
+	for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
+		if (vma->vm_file == NULL)
+			continue;
+
+		addr = vma->vm_file->f_mapping;
+		if (addr == NULL)
+			continue;
+
+		if (addr->robust_head == NULL)
+			continue;
+
+		head = addr->robust_head;
+		robust = &head->robust_list;
+
+		if (list_empty(robust))
+			continue;
+
+		release_owned_futex(tsk, vma, head);
+	}
+
+	up_read(&mm->mmap_sem);
+}
+
+static void init_robust_list(struct address_space *f, struct futex_head *head)
+{
+	f->robust_head = head;
+	INIT_LIST_HEAD(&f->robust_head->robust_list);
+	mutex_init(&f->robust_head->robust_mutex);
+}
+
+/**
+ * futex_register - Record the existence of a robust futex in a vma.
+ * @uaddr: user space address of the robust futex
+ *
+ * Called from user space (through sys_futex syscall) when a robust
+ * futex is created. Looks up the vma that contains the futex and
+ * adds an entry to the list of all robust futexes in the vma.
+ */
+static int futex_register(unsigned long uaddr, unsigned int attr)
+{
+	int ret = 0;
+	struct futex_robust *robust;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct address_space *addr = NULL;
+	struct list_head *head = NULL;
+	struct mutex *mutex = NULL;
+	struct futex_head *file_futex = NULL;
+
+	if ((attr & FUTEX_ATTR_SHARED) == 0)
+		return -EADDRNOTAVAIL;
+
+	down_read(&mm->mmap_sem);
+
+	vma = find_extend_vma(mm, uaddr);
+
+	if (vma->vm_file) {
+		addr = vma->vm_file->f_mapping;
+		if (addr == NULL) {
+			ret = -EADDRNOTAVAIL;
+			goto out;
+		}
+		robust = kmem_cache_alloc(robust_futex_cachep, GFP_KERNEL);
+		if (!robust) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		if (addr->robust_head == NULL) {
+			file_futex = kmem_cache_alloc(file_futex_cachep, GFP_KERNEL);
+			if (!file_futex) {
+				kmem_cache_free(robust_futex_cachep, robust);
+				ret = -ENOMEM;
+				goto out;
+			}
+			init_robust_list(addr, file_futex);
+		}
+		head = &addr->robust_head->robust_list;
+		mutex = &addr->robust_head->robust_mutex;
+	} else {
+		ret = -EADDRNOTAVAIL;
+		goto out;
+	}
+
+	mutex_lock(mutex);
+	list_add_tail(&robust->list, head);
+	mutex_unlock(mutex);
+
+out:
+	up_read(&mm->mmap_sem);
+
+	return ret;
+}
+
+/**
+ * futex_deregister - Delete robust futex registration from a vma
+ * @uaddr: user space address of the robust futex
+ *
+ * Called from user space (through sys_futex syscall) when a robust
+ * futex is destroyed. Looks up the vma that contains the futex and
+ * removes the futex entry from the list of all robust futexes in
+ * the vma.
+ */
+static int futex_deregister(unsigned long uaddr)
+{
+	union futex_key key;
+	struct mm_struct *mm = current->mm;
+	struct list_head *head = NULL;
+	struct mutex *mutex = NULL;
+	struct vm_area_struct *vma;
+	struct address_space *addr;
+	struct futex_robust *this, *next;
+	int ret;
+
+	down_read(&mm->mmap_sem);
+
+	ret = get_futex_key(uaddr, &key);
+	if (unlikely(ret != 0))
+		goto out;
+	vma = find_extend_vma(mm, uaddr);
+	if (vma->vm_file) {
+		addr = vma->vm_file->f_mapping;
+		if (addr && addr->robust_head) {
+			mutex = &addr->robust_head->robust_mutex;
+			head = &addr->robust_head->robust_list;
+		}
+	}
+	if (head == NULL) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	mutex_lock(mutex);
+
+	list_for_each_entry_safe(this, next, head, list) {
+		if (match_futex(&this->key, &key)) {
+			futex_wake(uaddr, 1);
+			list_del(&this->list);
+			kmem_cache_free(robust_futex_cachep, this);
+			break;
+		}
+	}
+
+	mutex_unlock(mutex);
+out:
+	up_read(&mm->mmap_sem);
+	return ret;
+}
+
+/**
+ * futex_recover - Recover a futex after its owner died
+ * @uaddr: user space address of the robust futex
+ *
+ * Called from user space (through sys_futex syscall).
+ * When a task dies while owning a robust futex, the futex is
+ * marked with %FUTEX_OWNER_DIED and ownership is transferred
+ * to the next waiting task. That task can choose to restore
+ * the futex to a useful state by calling this function.
+ */
+static int futex_recover(unsigned long uaddr)
+{
+	int ret = 0;
+	int value = 0;
+	union futex_key key;
+
+	down_read(&current->mm->mmap_sem);
+	ret = get_futex_key(uaddr, &key);
+	up_read(&current->mm->mmap_sem);
+	if (ret != 0)
+		return ret;
+
+	if (get_user(value, (int *__user)uaddr))
+		return ret;
+
+	value &= ~FUTEX_OWNER_DIED;
+	return put_user(value, (int *__user)uaddr);
+}
+#endif
+
 long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout,
 		unsigned long uaddr2, int val2, int val3)
 {
@@ -854,6 +1235,17 @@ long do_futex(unsigned long uaddr, int o
 	case FUTEX_WAKE_OP:
 		ret = futex_wake_op(uaddr, uaddr2, val, val2, val3);
 		break;
+#ifdef CONFIG_ROBUST_FUTEX
+	case FUTEX_REGISTER:
+		ret = futex_register(uaddr, val);
+		break;
+	case FUTEX_DEREGISTER:
+		ret = futex_deregister(uaddr);
+		break;
+	case FUTEX_RECOVER:
+		ret = futex_recover(uaddr);
+		break;
+#endif
 	default:
 		ret = -ENOSYS;
 	}
@@ -901,6 +1293,12 @@ static int __init init(void)
 {
 	unsigned int i;
 
+#ifdef CONFIG_ROBUST_FUTEX
+	robust_futex_cachep = kmem_cache_create("robust_futex",
+			       sizeof(struct futex_robust), 0, 0, NULL, NULL);
+	file_futex_cachep = kmem_cache_create("file_futex",
+			       sizeof(struct futex_head), 0, 0, NULL, NULL);
+#endif
 	register_filesystem(&futex_fs_type);
 	futex_mnt = kern_mount(&futex_fs_type);
 
Index: linux-2.6.15/init/Kconfig
===================================================================
--- linux-2.6.15.orig/init/Kconfig
+++ linux-2.6.15/init/Kconfig
@@ -348,6 +348,15 @@ config FUTEX
 	  support for "fast userspace mutexes".  The resulting kernel may not
 	  run glibc-based applications correctly.
 
+config ROBUST_FUTEX
+	bool "Enable robust futex support"
+	depends on FUTEX
+	default y
+	help
+	  Enable this option if you want to use robust user space mutexes.
+	  Enabling this option slows down the exit path of the kernel for
+	  all processes.  Robust futexes will run glibc-based applications correctly.
+
 config EPOLL
 	bool "Enable eventpoll support" if EMBEDDED
 	default y
Index: linux-2.6.15/fs/inode.c
===================================================================
--- linux-2.6.15.orig/fs/inode.c
+++ linux-2.6.15/fs/inode.c
@@ -23,6 +23,7 @@
 #include <linux/bootmem.h>
 #include <linux/inotify.h>
 #include <linux/mount.h>
+#include <linux/futex.h>
 
 /*
  * This is needed for the following functions:
@@ -175,6 +176,7 @@ void destroy_inode(struct inode *inode) 
 	if (inode_has_buffers(inode))
 		BUG();
 	security_inode_free(inode);
+	futex_free_robust_list(inode);
 	if (inode->i_sb->s_op->destroy_inode)
 		inode->i_sb->s_op->destroy_inode(inode);
 	else

[-- Attachment #3: Type: text/plain, Size: 9008 bytes --]




On Jan 18, 2006, at 9:22 PM, Andrew Morton wrote:

> david singleton <dsingleton@mvista.com> wrote:
>>
>> [-ENOCHANGELOG]
>>
>
>> @@ -383,6 +384,7 @@ struct address_space {
>>  	spinlock_t		private_lock;	/* for use by the address_space */
>>  	struct list_head	private_list;	/* ditto */
>>  	struct address_space	*assoc_mapping;	/* ditto */
>> +	struct futex_head	*robust_head;	/* list of robust futexes */
>>  } __attribute__((aligned(sizeof(long))));
>
> It's worth putting this field inside CONFIG_ROBUST_FUTEX
>
>> +#else
>> +# define futex_free_robust_list(a)      do { } while (0)
>> +# define exit_futex(b)                  do { } while (0)
>> +#define futex_init_inode(a) 		do { } while (0)
>> +#endif
>
> Indenting went wonky.
>
>> +/*
>> + * Robust futexes provide a locking mechanism that can be shared 
>> between
>> + * user mode processes. The major difference between robust futexes 
>> and
>> + * regular futexes is that when the owner of a robust futex dies, the
>> + * next task waiting on the futex will be awakened, will get 
>> ownership
>> + * of the futex lock, and will receive the error status EOWNERDEAD.
>> + *
>> + * A robust futex is a 32 bit integer stored in user mode shared 
>> memory.
>> + * Bit 31 indicates that there are tasks waiting on the futex.
>> + * Bit 30 indicates that the task that owned the futex has died.
>> + * Bit 29 indicates that the futex is not recoverable and cannot be 
>> used.
>> + * Bits 0-28 are the pid of the task that owns the futex lock, or 
>> zero if
>> + * the futex is not locked.
>> + */
>
> Nice comment!
>
>> +/**
>> + * futex_free_robust_list - release the list of registered futexes.
>> + * @inode: inode that may be a memory mapped file
>> + *
>> + * Called from dput() when a dentry reference count reaches zero.
>> + * If the dentry is associated with a memory mapped file, then
>> + * release the list of registered robust futexes that are contained
>> + * in that mapping.
>> + */
>> +void futex_free_robust_list(struct inode *inode)
>> +{
>> +	struct address_space *mapping;
>> +	struct list_head *head;
>> + 	struct futex_robust *this, *next;
>> +	struct futex_head *futex_head = NULL;
>> +
>> +	if (inode == NULL)
>> +		return;
>
> Is this test needed?
>
> This function is called when a dentry's refcount falls to zero.  But 
> there
> could be other refs to this inode which might get upset at having their
> robust futexes thrown away.  Shouldn't this be based on inode 
> destruction
> rather than dentry?
>
>> +	mapping = inode->i_mapping;
>> +	if (mapping == NULL)
>> +		return;
>
> inodes never have a null ->i_mapping
>
>> +	if (mapping->robust_head == NULL)
>> +		return;
>> +
>> +	if (list_empty(&mapping->robust_head->robust_list))
>> +		return;
>> +
>> +	mutex_lock(&mapping->robust_head->robust_mutex);
>> +
>> +	head = &mapping->robust_head->robust_list;
>> +	futex_head = mapping->robust_head;
>> +
>> +	list_for_each_entry_safe(this, next, head, list) {
>> +		list_del(&this->list);
>> +		kmem_cache_free(robust_futex_cachep, this);
>> +	}
>
> If we're throwing away the entire contents of the list, there's no 
> need to
> detach items as we go.
>
>> +/**
>> + * get_shared_uaddr - convert a shared futex_key to a user addr.
>> + * @key: a futex_key that identifies a futex.
>> + * @vma: a vma that may contain the futex
>> + *
>> + * Shared futex_keys identify a futex that is contained in a vma,
>> + * and so may be shared.
>> + * Returns zero if futex is not contained in @vma
>> + */
>> +static unsigned long get_shared_uaddr(union futex_key *key,
>> +				      struct vm_area_struct *vma)
>> +{
>> +	unsigned long uaddr = 0;
>> +	unsigned long tmpaddr;
>> +	struct address_space *mapping;
>> +
>> +	mapping = vma->vm_file->f_mapping;
>> +	if (key->shared.inode == mapping->host) {
>> +		tmpaddr = ((key->shared.pgoff - vma->vm_pgoff) << PAGE_SHIFT)
>> +				+ (key->shared.offset & ~0x1)
>> +				+ vma->vm_start;
>> +		if (tmpaddr >= vma->vm_start && tmpaddr < vma->vm_end)
>> +			uaddr = tmpaddr;
>> +	}
>> +
>> +	return uaddr;
>> +}
>
> What locking guarantees that vma->vm_file->f_mapping continues to 
> exist?
> Bear in mind that another thread which shares this thread's files can 
> close
> fds, unlink files, munmap files, etc.

This is called under the mmap_sem.

>
>> +/**
>> + * find_owned_futex - find futexes owned by the current task
>> + * @tsk: task that is exiting
>> + * @vma: vma containing robust futexes
>> + * @head: list head for list of robust futexes
>> + * @mutex: mutex that protects the list
>> + *
>> + * Walk the list of registered robust futexes for this @vma,
>> + * setting the %FUTEX_OWNER_DIED flag on those futexes owned
>> + * by the current, exiting task.
>> + */
>> +static void
>> +find_owned_futex(struct task_struct *tsk, struct vm_area_struct *vma,
>> +		 struct list_head *head, struct mutex *mutex)
>> +{
>> +	struct futex_robust *this, *next;
>> + 	unsigned long uaddr;
>> +	int value;
>> +
>> +	mutex_lock(mutex);
>> +
>> +	list_for_each_entry_safe(this, next, head, list) {
>> +
>> +		uaddr = get_futex_uaddr(&this->key, vma);
>> +		if (uaddr == 0)
>> +			continue;
>> +
>> +		mutex_unlock(mutex);
>> +		up_read(&tsk->mm->mmap_sem);
>> +		get_user(value, (int __user *)uaddr);
>> +		if ((value & FUTEX_PID) == tsk->pid) {
>> +			value |= FUTEX_OWNER_DIED;
>> +			futex_wake(uaddr, 1);
>> +			put_user(value, (int *__user)uaddr);
>
> That's a bit unconventional - normally we'd perform the 
> other-task-visible
> write and _then_ wake up the other task.  What prevents the woken task 
> from
> waking then seeing the not-yet-written-to value?
>
>> +void exit_futex(struct task_struct *tsk)
>> +{
>> +	struct mm_struct *mm;
>> +	struct list_head *robust;
>> +	struct vm_area_struct *vma;
>> +	struct mutex *mutex;
>> +
>> +	mm = current->mm;
>> +	if (mm==NULL)
>> +		return;
>> +
>> +	down_read(&mm->mmap_sem);
>> +
>> +	for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
>> +		if (vma->vm_file == NULL)
>> +			continue;
>> +
>> +		if (vma->vm_file->f_mapping == NULL)
>> +			continue;
>> +
>> +		if (vma->vm_file->f_mapping->robust_head == NULL)
>> +			continue;
>> +
>> +		robust = &vma->vm_file->f_mapping->robust_head->robust_list;
>> +
>> +		if (list_empty(robust))
>> +			continue;
>> +
>> +		mutex = &vma->vm_file->f_mapping->robust_head->robust_mutex;
>> +
>> +		find_owned_futex(tsk, vma, robust, mutex);
>
> The name "find_owned_mutex" is a bit misleading - it implies that it is
> some lookup function which has no side-effects.  But find_owned_futex()
> actually makes significant state changes.
>
>> +
>> +	if (vma->vm_file && vma->vm_file->f_mapping) {
>> +		if (vma->vm_file->f_mapping->robust_head == NULL)
>> +			init_robust_list(vma->vm_file->f_mapping, file_futex);
>> +		else
>> +			kmem_cache_free(file_futex_cachep, file_futex);
>> +		head = &vma->vm_file->f_mapping->robust_head->robust_list;
>> +		mutex = &vma->vm_file->f_mapping->robust_head->robust_mutex;
>
> The patch in general does an awful lot of these lengthy pointer chases.
> It's more readable to create temporaries to avoid this.  Sometimes it
> generates better code, too.
>
> The kmem_cache_free above is a bit sad.  It means that in the common 
> case
> we'll allocate a file_futex and then free it again.  It's legal to do
> GFP_KERNEL allocations within mmap_sem, so I suggest you switch this to
> allocate-only-if-needed.
>
>> +	} else {
>> +		ret = -EADDRNOTAVAIL;
>> +		kmem_cache_free(robust_futex_cachep, robust);
>> +		kmem_cache_free(file_futex_cachep, file_futex);
>> +		goto out_unlock;
>> +	}
>
> Again, we could have checked whether we needed to allocate these before
> allocating them.
>
>> +	if (vma->vm_file && vma->vm_file->f_mapping &&
>> +	    vma->vm_file->f_mapping->robust_head) {
>> +		mutex = &vma->vm_file->f_mapping->robust_head->robust_mutex;
>> +		head = &vma->vm_file->f_mapping->robust_head->robust_list;
>
> Pointer chasing, again...
>
>> +
>> +	list_for_each_entry_safe(this, next, head, list) {
>> +		if (match_futex (&this->key, &key)) {
>                                ^
>                                 A stray space got in there.
>
>>
>> +#ifdef CONFIG_ROBUST_FUTEX
>> +	robust_futex_cachep = kmem_cache_create("robust_futex", 
>> sizeof(struct futex_robust), 0, 0, NULL, NULL);
>> +	file_futex_cachep = kmem_cache_create("file_futex", sizeof(struct 
>> futex_head), 0, 0, NULL, NULL);
>> +#endif
>
> A bit of 80-column wrapping needed there please.
>
> Are futex_heads likely to be allocated in sufficient volume to justify
> their own slab cache, rather than using kmalloc()?  The speed is the 
> same -
> if anything, kmalloc() will be faster because its text and data are 
> more
> likely to be in CPU cache.

My tests typically use a slab to a slab and a half of futex_heads.  In 
the real world
I honestly don't know how many will be allocated.  Can we leave it in 
it's own
cache for testing?  It sure helps debugging if the entries in the 
futex_head cache match
exactly what the test application is using.


>

  reply	other threads:[~2006-01-20  0:47 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-01-14  1:00 [robust-futex-1] futex: robust futex support David Singleton
2006-01-15  0:02 ` Ulrich Drepper
2006-01-15  0:04   ` david singleton
2006-01-15  5:18     ` Ulrich Drepper
2006-01-15 20:00       ` David Singleton
2006-01-17  2:27       ` [robust-futex-3] " david singleton
2006-01-17 17:32         ` Ulrich Drepper
2006-01-17 17:50         ` Ulrich Drepper
2006-01-19  2:26           ` [robust-futex-4] " david singleton
2006-01-19  5:22             ` Andrew Morton
2006-01-20  0:47               ` david singleton [this message]
2006-01-20 17:41               ` Ingo Oeser
2006-01-20 22:18                 ` Andrew Morton
2006-01-21  2:30                   ` david singleton
2006-01-23 18:20               ` Todd Kneisel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=46429B1C-894E-11DA-ABB2-000A959BB91E@mvista.com \
    --to=dsingleton@mvista.com \
    --cc=akpm@osdl.org \
    --cc=drepper@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).