All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Futex Asynchronous Interface
@ 2002-06-06  7:26 Rusty Russell
  2002-06-02  0:10 ` Pavel Machek
  2002-06-06 16:36 ` Linus Torvalds
  0 siblings, 2 replies; 45+ messages in thread
From: Rusty Russell @ 2002-06-06  7:26 UTC (permalink / raw)
  To: linux-kernel, frankeh; +Cc: alan, torvalds

These two patches (requiring the other patches I sent to the list
which can also be found on my kernel.org page) add the ability to tie
a futex to a file descriptor, for use with poll/select or SIGIO
(required by NGPT).

The method is: open /dev/futex, use sys_futex(FUTEX_AWAIT) to attach
it to a particular futex, then use select or poll (or set the fd up
for sigio signals, and expect a SIGIO).

You need to use FUTEX_AWAIT again after poll succeeds or SIGIO
(ie. it's oneshot).  Calling it while a futex is already outstanding
forgets about the old futex.

The reason for this method is that it's pretty convenient for
programs, and since each one pins a page down, tying that to a struct
file * means we have an implicit limit.

Code below.  Feedback welcome.
Rusty.
--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

Name: Waker can unpin page, rather than waiting process
Author: Rusty Russell
Status: Tested in 2.5.20
Depends: Futex/copy-from-user.patch.gz Futex/unpin-page-fix.patch.gz
Depends: Futex/waitq.patch.gz

D: This changes the implementation so that the waker actually unpins
D: the page.  This is preparation for the async interface, where the
D: process which registered interest is not in the kernel.


diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.20.19104/kernel/futex.c linux-2.5.20.19104.updated/kernel/futex.c
--- linux-2.5.20.19104/kernel/futex.c	Thu Jun  6 17:13:46 2002
+++ linux-2.5.20.19104.updated/kernel/futex.c	Thu Jun  6 17:14:30 2002
@@ -98,11 +98,13 @@
 		if (this->page == page && this->offset == offset) {
 			list_del_init(i);
 			tell_waiter(this);
+			unpin_page(this->page);
 			num_woken++;
 			if (num_woken >= num) break;
 		}
 	}
 	spin_unlock(&futex_lock);
+	unpin_page(page);
 	return num_woken;
 }
 
@@ -192,9 +194,10 @@
 	}
  out:
 	set_current_state(TASK_RUNNING);
-	/* Were we woken up anyway? */
+	/* Were we woken up anyway?  If so, it unpinned page. */
 	if (!unqueue_me(&q))
 		return 0;
+	unpin_page(page);
 	return ret;
 }
 
@@ -225,6 +228,7 @@
 	if (IS_ERR(page))
 		return PTR_ERR(page);
 
+	/* On success, these routines unpin the pages themselves. */
 	head = hash_futex(page, pos_in_page);
 	switch (op) {
 	case FUTEX_WAIT:
@@ -236,7 +240,8 @@
 	default:
 		ret = -EINVAL;
 	}
-	unpin_page(page);
+	if (ret < 0)
+		unpin_page(page);
 
 	return ret;
 }
Name: Asynchronous interface for futexes
Author: Rusty Russell
Status: Tested on 2.5.20
Depends: Futex/comment-fix.patch.gz Futex/copy-from-user.patch.gz
Depends: Futex/no-write-needed.patch.gz Futex/unpin-page-fix.patch.gz
Depends: Futex/waitq.patch.gz Futex/waker-unpin-page.patch.gz

D: This patch adds a FUTEX_AWAIT and /dev/futex, for attaching futexes
D: to file descriptors, which can be used with poll, select or SIGIO.

diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.20.15557/include/linux/futex.h linux-2.5.20.15557.updated/include/linux/futex.h
--- linux-2.5.20.15557/include/linux/futex.h	Sat May 25 14:34:59 2002
+++ linux-2.5.20.15557.updated/include/linux/futex.h	Wed Jun  5 22:01:44 2002
@@ -4,5 +4,6 @@
 /* Second argument to futex syscall */
 #define FUTEX_WAIT (0)
 #define FUTEX_WAKE (1)
+#define FUTEX_AWAIT (2)
 
 #endif
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.20.15557/kernel/futex.c linux-2.5.20.15557.updated/kernel/futex.c
--- linux-2.5.20.15557/kernel/futex.c	Wed Jun  5 22:01:41 2002
+++ linux-2.5.20.15557.updated/kernel/futex.c	Wed Jun  5 22:02:09 2002
@@ -34,6 +34,10 @@
 #include <linux/highmem.h>
 #include <linux/time.h>
 #include <linux/pagemap.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/devfs_fs_kernel.h>
+#include <linux/poll.h>
 #include <asm/uaccess.h>
 
 /* Simple "sleep if unchanged" interface. */
@@ -41,11 +45,18 @@
 /* FIXME: This may be way too small. --RR */
 #define FUTEX_HASHBITS 6
 
+extern void send_sigio(struct fown_struct *fown, int fd, int band);
+
 /* We use this instead of a normal wait_queue_t, so we can wake only
    the relevent ones (hashed queues may be shared) */
 struct futex_q {
 	struct list_head list;
 	wait_queue_head_t waiters;
+
+	/* For AWAIT, sigio sent using these. */
+	int fd;
+	struct file *filp;
+
 	/* Page struct and offset within it. */
 	struct page *page;
 	unsigned int offset;
@@ -54,6 +65,7 @@
 /* The key for the hash is the address + index + offset within page */
 static struct list_head futex_queues[1<<FUTEX_HASHBITS];
 static spinlock_t futex_lock = SPIN_LOCK_UNLOCKED;
+extern struct file_operations futex_fops;
 
 static inline struct list_head *hash_futex(struct page *page,
 					   unsigned long offset)
@@ -73,9 +85,12 @@
 	page_cache_release(page);
 }
 
+/* Waiter may be sitting in FUTEX_WAIT or poll, or async */
 static inline void tell_waiter(struct futex_q *q)
 {
 	wake_up_all(&q->waiters);
+	if (q->fd != -1)
+		send_sigio(&q->filp->f_owner, q->fd, POLL_IN);
 }
 
 static int futex_wake(struct list_head *head,
@@ -113,6 +128,7 @@
 	add_wait_queue(&q->waiters, wait);
 	q->page = page;
 	q->offset = offset;
+	q->fd = -1;
 
 	spin_lock(&futex_lock);
 	list_add_tail(&q->list, head);
@@ -196,6 +212,38 @@
 	return ret;
 }
 
+static int futex_await(struct list_head *head,
+		       struct page *page,
+		       int offset,
+		       int fd)
+{
+	struct file *filp;
+	struct futex_q *q;
+
+	filp = fget(fd);
+	if (!filp || filp->f_op != &futex_fops)
+		return -EBADF;
+	q = filp->private_data;
+
+	spin_lock(&futex_lock);
+	/* Eliminate any old notification, wake any pollers, release page. */
+	if (!list_empty(&q->list)) {
+		list_del(&q->list);
+		wake_up_all(&q->waiters);
+		unpin_page(q->page);
+	}
+
+	q->filp = filp;
+	q->fd = fd;
+	q->page = page;
+	q->offset = offset;
+	list_add_tail(&q->list, head);
+	spin_unlock(&futex_lock);
+	fput(filp);
+
+	return 0;
+}
+
 asmlinkage int sys_futex(void *uaddr, int op, int val, struct timespec *utime)
 {
 	int ret;
@@ -229,6 +277,9 @@
 	case FUTEX_WAIT:
 		ret = futex_wait(head, page, pos_in_page, val, uaddr, time);
 		break;
+	case FUTEX_AWAIT:
+		ret = futex_await(head, page, pos_in_page, val);
+		break;
 	case FUTEX_WAKE:
 		ret = futex_wake(head, page, pos_in_page, val);
 		break;
@@ -241,12 +292,68 @@
 	return ret;
 }
 
+static int futex_open(struct inode *inode, struct file *filp)
+{
+	struct futex_q *q;
+
+	q = kmalloc(sizeof(*q), GFP_KERNEL);
+	if (!q)
+		return -ENOMEM;
+	INIT_LIST_HEAD(&q->list);
+	init_waitqueue_head(&q->waiters);
+
+	filp->private_data = q;
+	return 0;
+}
+
+static int futex_close(struct inode *inode, struct file *filp)
+{
+	struct futex_q *q = filp->private_data;
+
+	spin_lock(&futex_lock);
+	if (!list_empty(&q->list)) {
+		list_del(&q->list);
+		unpin_page(q->page);
+		BUG_ON(waitqueue_active(&q->waiters));
+	}
+	spin_unlock(&futex_lock);
+	kfree(filp->private_data);
+	return 0;
+}
+
+/* You need to do a FUTEX_AWAIT to arm this after each successful poll */
+static unsigned int futex_poll(struct file *filp,
+			       struct poll_table_struct *wait)
+{
+	struct futex_q *q = filp->private_data;
+	int ret = 0;
+
+	spin_lock(&futex_lock);
+	if (!list_empty(&q->list))
+		poll_wait(filp, &q->waiters, wait);
+	else
+		ret = POLLIN | POLLRDNORM;
+	spin_unlock(&futex_lock);
+
+	return ret;
+}
+
+static struct file_operations futex_fops = {
+	open:		futex_open,
+	release:	futex_close,
+	poll:		futex_poll,
+};
+
 static int __init init(void)
 {
+	int futex_major;
 	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(futex_queues); i++)
 		INIT_LIST_HEAD(&futex_queues[i]);
+	futex_major = devfs_register_chrdev(0, "futex", &futex_fops);
+	devfs_register(NULL, "futex", DEVFS_FL_NONE, futex_major,
+		       0, S_IFCHR | 0666, &futex_fops, NULL);
 	return 0;
 }
 __initcall(init);

^ permalink raw reply	[flat|nested] 45+ messages in thread
* Re: [PATCH] Futex Asynchronous Interface
@ 2002-06-06 16:08 Martin Wirth
  2002-06-06 22:59 ` Rusty Russell
  0 siblings, 1 reply; 45+ messages in thread
From: Martin Wirth @ 2002-06-06 16:08 UTC (permalink / raw)
  To: Rusty Russell, linux-kernel

Hi Rusty,

>if (this->page == page && this->offset == offset) {
> 			list_del_init(i);
> 			tell_waiter(this);
>+			unpin_page(this->page);
> 			num_woken++;
> 			if (num_woken >= num) break;
> 		}
> 	}
> 	spin_unlock(&futex_lock);
>+	unpin_page(page);
> 	return num_woken;

If I understand right you shouldn't unpin the page if you are not sure that
all waiters for a specific (page,offset)-combination are woken up and deleted
from the waitqueue. Otherwise a second call to futex_wake may look on the wrong
hash_queue or wake the wrong waiters.

In general, I think fast userspace synchronization primitives and asynchronous 
notification are different enough to keep them logically more separated. 
Your double use of the hashed wait queues and sys_call make the code difficult
to grasp and thus open for subtle error.

Martin

Martin 



^ permalink raw reply	[flat|nested] 45+ messages in thread

end of thread, other threads:[~2002-06-13 16:40 UTC | newest]

Thread overview: 45+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-06-06  7:26 [PATCH] Futex Asynchronous Interface Rusty Russell
2002-06-02  0:10 ` Pavel Machek
2002-06-10  6:57   ` Rusty Russell
2002-06-06 16:36 ` Linus Torvalds
2002-06-06 19:27   ` Alan Cox
2002-06-06 23:21   ` Rusty Russell
2002-06-07  8:33     ` Peter Wächtler
2002-06-08 22:28       ` Linus Torvalds
2002-06-09  9:49         ` Kai Henningsen
2002-06-09 18:09           ` Linus Torvalds
2002-06-09 19:06             ` Thunder from the hill
2002-06-10  6:39             ` Kai Henningsen
2002-06-10  7:55             ` Helge Hafting
2002-06-10 14:10               ` Thunder from the hill
2002-06-10 20:46                 ` Kai Henningsen
2002-06-11 14:14                   ` john slee
2002-06-10 15:11               ` Linus Torvalds
2002-06-11 15:06                 ` Eric W. Biederman
2002-06-10 20:57             ` H. Peter Anvin
2002-06-09 10:07         ` Peter Wächtler
2002-06-09 17:49           ` Linus Torvalds
2002-06-07  9:06   ` Rusty Russell
2002-06-08 22:42     ` Linus Torvalds
2002-06-11  9:15       ` Rusty Russell
2002-06-11 16:53         ` Linus Torvalds
2002-06-12  5:32           ` Rusty Russell
2002-06-12  9:16             ` Peter Wächtler
2002-06-12 14:19               ` Hubertus Franke
2002-06-12 16:50                 ` Peter Wächtler
2002-06-12 18:15                   ` Vladimir Zidar
2002-06-12 15:39               ` Linus Torvalds
2002-06-12 16:29                 ` Peter Wächtler
2002-06-12 16:52                   ` Linus Torvalds
2002-06-12 17:07                     ` Peter Wächtler
2002-06-12 18:32                     ` Saurabh Desai
2002-06-12 20:05                     ` Oliver Xymoron
2002-06-12 20:16                       ` Linus Torvalds
2002-06-13  2:57                     ` Rusty Russell
2002-06-13  9:37                       ` Peter Wächtler
2002-06-13  9:55                         ` Rusty Russell
2002-06-13 16:38                     ` Gabriel Paubert
2002-06-13 16:40                       ` Linus Torvalds
2002-06-13  1:32               ` Rusty Russell
2002-06-06 16:08 Martin Wirth
2002-06-06 22:59 ` Rusty Russell

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.