On Jan 18, 2006, at 9:22 PM, Andrew Morton wrote: > david singleton wrote: >> >> [-ENOCHANGELOG] >> > >> @@ -383,6 +384,7 @@ struct address_space { >> spinlock_t private_lock; /* for use by the address_space */ >> struct list_head private_list; /* ditto */ >> struct address_space *assoc_mapping; /* ditto */ >> + struct futex_head *robust_head; /* list of robust futexes */ >> } __attribute__((aligned(sizeof(long)))); > > It's worth putting this field inside CONFIG_ROBUST_FUTEX > >> +#else >> +# define futex_free_robust_list(a) do { } while (0) >> +# define exit_futex(b) do { } while (0) >> +#define futex_init_inode(a) do { } while (0) >> +#endif > > Indenting went wonky. > >> +/* >> + * Robust futexes provide a locking mechanism that can be shared >> between >> + * user mode processes. The major difference between robust futexes >> and >> + * regular futexes is that when the owner of a robust futex dies, the >> + * next task waiting on the futex will be awakened, will get >> ownership >> + * of the futex lock, and will receive the error status EOWNERDEAD. >> + * >> + * A robust futex is a 32 bit integer stored in user mode shared >> memory. >> + * Bit 31 indicates that there are tasks waiting on the futex. >> + * Bit 30 indicates that the task that owned the futex has died. >> + * Bit 29 indicates that the futex is not recoverable and cannot be >> used. >> + * Bits 0-28 are the pid of the task that owns the futex lock, or >> zero if >> + * the futex is not locked. >> + */ > > Nice comment! > >> +/** >> + * futex_free_robust_list - release the list of registered futexes. >> + * @inode: inode that may be a memory mapped file >> + * >> + * Called from dput() when a dentry reference count reaches zero. >> + * If the dentry is associated with a memory mapped file, then >> + * release the list of registered robust futexes that are contained >> + * in that mapping. >> + */ >> +void futex_free_robust_list(struct inode *inode) >> +{ >> + struct address_space *mapping; >> + struct list_head *head; >> + struct futex_robust *this, *next; >> + struct futex_head *futex_head = NULL; >> + >> + if (inode == NULL) >> + return; > > Is this test needed? > > This function is called when a dentry's refcount falls to zero. But > there > could be other refs to this inode which might get upset at having their > robust futexes thrown away. Shouldn't this be based on inode > destruction > rather than dentry? > >> + mapping = inode->i_mapping; >> + if (mapping == NULL) >> + return; > > inodes never have a null ->i_mapping > >> + if (mapping->robust_head == NULL) >> + return; >> + >> + if (list_empty(&mapping->robust_head->robust_list)) >> + return; >> + >> + mutex_lock(&mapping->robust_head->robust_mutex); >> + >> + head = &mapping->robust_head->robust_list; >> + futex_head = mapping->robust_head; >> + >> + list_for_each_entry_safe(this, next, head, list) { >> + list_del(&this->list); >> + kmem_cache_free(robust_futex_cachep, this); >> + } > > If we're throwing away the entire contents of the list, there's no > need to > detach items as we go. > >> +/** >> + * get_shared_uaddr - convert a shared futex_key to a user addr. >> + * @key: a futex_key that identifies a futex. >> + * @vma: a vma that may contain the futex >> + * >> + * Shared futex_keys identify a futex that is contained in a vma, >> + * and so may be shared. >> + * Returns zero if futex is not contained in @vma >> + */ >> +static unsigned long get_shared_uaddr(union futex_key *key, >> + struct vm_area_struct *vma) >> +{ >> + unsigned long uaddr = 0; >> + unsigned long tmpaddr; >> + struct address_space *mapping; >> + >> + mapping = vma->vm_file->f_mapping; >> + if (key->shared.inode == mapping->host) { >> + tmpaddr = ((key->shared.pgoff - vma->vm_pgoff) << PAGE_SHIFT) >> + + (key->shared.offset & ~0x1) >> + + vma->vm_start; >> + if (tmpaddr >= vma->vm_start && tmpaddr < vma->vm_end) >> + uaddr = tmpaddr; >> + } >> + >> + return uaddr; >> +} > > What locking guarantees that vma->vm_file->f_mapping continues to > exist? > Bear in mind that another thread which shares this thread's files can > close > fds, unlink files, munmap files, etc. This is called under the mmap_sem. > >> +/** >> + * find_owned_futex - find futexes owned by the current task >> + * @tsk: task that is exiting >> + * @vma: vma containing robust futexes >> + * @head: list head for list of robust futexes >> + * @mutex: mutex that protects the list >> + * >> + * Walk the list of registered robust futexes for this @vma, >> + * setting the %FUTEX_OWNER_DIED flag on those futexes owned >> + * by the current, exiting task. >> + */ >> +static void >> +find_owned_futex(struct task_struct *tsk, struct vm_area_struct *vma, >> + struct list_head *head, struct mutex *mutex) >> +{ >> + struct futex_robust *this, *next; >> + unsigned long uaddr; >> + int value; >> + >> + mutex_lock(mutex); >> + >> + list_for_each_entry_safe(this, next, head, list) { >> + >> + uaddr = get_futex_uaddr(&this->key, vma); >> + if (uaddr == 0) >> + continue; >> + >> + mutex_unlock(mutex); >> + up_read(&tsk->mm->mmap_sem); >> + get_user(value, (int __user *)uaddr); >> + if ((value & FUTEX_PID) == tsk->pid) { >> + value |= FUTEX_OWNER_DIED; >> + futex_wake(uaddr, 1); >> + put_user(value, (int *__user)uaddr); > > That's a bit unconventional - normally we'd perform the > other-task-visible > write and _then_ wake up the other task. What prevents the woken task > from > waking then seeing the not-yet-written-to value? > >> +void exit_futex(struct task_struct *tsk) >> +{ >> + struct mm_struct *mm; >> + struct list_head *robust; >> + struct vm_area_struct *vma; >> + struct mutex *mutex; >> + >> + mm = current->mm; >> + if (mm==NULL) >> + return; >> + >> + down_read(&mm->mmap_sem); >> + >> + for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { >> + if (vma->vm_file == NULL) >> + continue; >> + >> + if (vma->vm_file->f_mapping == NULL) >> + continue; >> + >> + if (vma->vm_file->f_mapping->robust_head == NULL) >> + continue; >> + >> + robust = &vma->vm_file->f_mapping->robust_head->robust_list; >> + >> + if (list_empty(robust)) >> + continue; >> + >> + mutex = &vma->vm_file->f_mapping->robust_head->robust_mutex; >> + >> + find_owned_futex(tsk, vma, robust, mutex); > > The name "find_owned_mutex" is a bit misleading - it implies that it is > some lookup function which has no side-effects. But find_owned_futex() > actually makes significant state changes. > >> + >> + if (vma->vm_file && vma->vm_file->f_mapping) { >> + if (vma->vm_file->f_mapping->robust_head == NULL) >> + init_robust_list(vma->vm_file->f_mapping, file_futex); >> + else >> + kmem_cache_free(file_futex_cachep, file_futex); >> + head = &vma->vm_file->f_mapping->robust_head->robust_list; >> + mutex = &vma->vm_file->f_mapping->robust_head->robust_mutex; > > The patch in general does an awful lot of these lengthy pointer chases. > It's more readable to create temporaries to avoid this. Sometimes it > generates better code, too. > > The kmem_cache_free above is a bit sad. It means that in the common > case > we'll allocate a file_futex and then free it again. It's legal to do > GFP_KERNEL allocations within mmap_sem, so I suggest you switch this to > allocate-only-if-needed. > >> + } else { >> + ret = -EADDRNOTAVAIL; >> + kmem_cache_free(robust_futex_cachep, robust); >> + kmem_cache_free(file_futex_cachep, file_futex); >> + goto out_unlock; >> + } > > Again, we could have checked whether we needed to allocate these before > allocating them. > >> + if (vma->vm_file && vma->vm_file->f_mapping && >> + vma->vm_file->f_mapping->robust_head) { >> + mutex = &vma->vm_file->f_mapping->robust_head->robust_mutex; >> + head = &vma->vm_file->f_mapping->robust_head->robust_list; > > Pointer chasing, again... > >> + >> + list_for_each_entry_safe(this, next, head, list) { >> + if (match_futex (&this->key, &key)) { > ^ > A stray space got in there. > >> >> +#ifdef CONFIG_ROBUST_FUTEX >> + robust_futex_cachep = kmem_cache_create("robust_futex", >> sizeof(struct futex_robust), 0, 0, NULL, NULL); >> + file_futex_cachep = kmem_cache_create("file_futex", sizeof(struct >> futex_head), 0, 0, NULL, NULL); >> +#endif > > A bit of 80-column wrapping needed there please. > > Are futex_heads likely to be allocated in sufficient volume to justify > their own slab cache, rather than using kmalloc()? The speed is the > same - > if anything, kmalloc() will be faster because its text and data are > more > likely to be in CPU cache. My tests typically use a slab to a slab and a half of futex_heads. In the real world I honestly don't know how many will be allocated. Can we leave it in it's own cache for testing? It sure helps debugging if the entries in the futex_head cache match exactly what the test application is using. >