All of lore.kernel.org
 help / color / mirror / Atom feed
From: Rusty Russell <rusty@rustcorp.com.au>
To: linux-kernel@vger.kernel.org, frankeh@watson.ibm.com
Cc: alan@lxorguk.ukuu.org.uk, torvalds@transmeta.com
Subject: [PATCH] Futex Asynchronous Interface
Date: Thu, 06 Jun 2002 17:26:35 +1000	[thread overview]
Message-ID: <E17FrfH-0006Gt-00@wagner.rustcorp.com.au> (raw)

These two patches (requiring the other patches I sent to the list
which can also be found on my kernel.org page) add the ability to tie
a futex to a file descriptor, for use with poll/select or SIGIO
(required by NGPT).

The method is: open /dev/futex, use sys_futex(FUTEX_AWAIT) to attach
it to a particular futex, then use select or poll (or set the fd up
for sigio signals, and expect a SIGIO).

You need to use FUTEX_AWAIT again after poll succeeds or SIGIO
(ie. it's oneshot).  Calling it while a futex is already outstanding
forgets about the old futex.

The reason for this method is that it's pretty convenient for
programs, and since each one pins a page down, tying that to a struct
file * means we have an implicit limit.

Code below.  Feedback welcome.
Rusty.
--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

Name: Waker can unpin page, rather than waiting process
Author: Rusty Russell
Status: Tested in 2.5.20
Depends: Futex/copy-from-user.patch.gz Futex/unpin-page-fix.patch.gz
Depends: Futex/waitq.patch.gz

D: This changes the implementation so that the waker actually unpins
D: the page.  This is preparation for the async interface, where the
D: process which registered interest is not in the kernel.


diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.20.19104/kernel/futex.c linux-2.5.20.19104.updated/kernel/futex.c
--- linux-2.5.20.19104/kernel/futex.c	Thu Jun  6 17:13:46 2002
+++ linux-2.5.20.19104.updated/kernel/futex.c	Thu Jun  6 17:14:30 2002
@@ -98,11 +98,13 @@
 		if (this->page == page && this->offset == offset) {
 			list_del_init(i);
 			tell_waiter(this);
+			unpin_page(this->page);
 			num_woken++;
 			if (num_woken >= num) break;
 		}
 	}
 	spin_unlock(&futex_lock);
+	unpin_page(page);
 	return num_woken;
 }
 
@@ -192,9 +194,10 @@
 	}
  out:
 	set_current_state(TASK_RUNNING);
-	/* Were we woken up anyway? */
+	/* Were we woken up anyway?  If so, it unpinned page. */
 	if (!unqueue_me(&q))
 		return 0;
+	unpin_page(page);
 	return ret;
 }
 
@@ -225,6 +228,7 @@
 	if (IS_ERR(page))
 		return PTR_ERR(page);
 
+	/* On success, these routines unpin the pages themselves. */
 	head = hash_futex(page, pos_in_page);
 	switch (op) {
 	case FUTEX_WAIT:
@@ -236,7 +240,8 @@
 	default:
 		ret = -EINVAL;
 	}
-	unpin_page(page);
+	if (ret < 0)
+		unpin_page(page);
 
 	return ret;
 }
Name: Asynchronous interface for futexes
Author: Rusty Russell
Status: Tested on 2.5.20
Depends: Futex/comment-fix.patch.gz Futex/copy-from-user.patch.gz
Depends: Futex/no-write-needed.patch.gz Futex/unpin-page-fix.patch.gz
Depends: Futex/waitq.patch.gz Futex/waker-unpin-page.patch.gz

D: This patch adds a FUTEX_AWAIT and /dev/futex, for attaching futexes
D: to file descriptors, which can be used with poll, select or SIGIO.

diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.20.15557/include/linux/futex.h linux-2.5.20.15557.updated/include/linux/futex.h
--- linux-2.5.20.15557/include/linux/futex.h	Sat May 25 14:34:59 2002
+++ linux-2.5.20.15557.updated/include/linux/futex.h	Wed Jun  5 22:01:44 2002
@@ -4,5 +4,6 @@
 /* Second argument to futex syscall */
 #define FUTEX_WAIT (0)
 #define FUTEX_WAKE (1)
+#define FUTEX_AWAIT (2)
 
 #endif
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.20.15557/kernel/futex.c linux-2.5.20.15557.updated/kernel/futex.c
--- linux-2.5.20.15557/kernel/futex.c	Wed Jun  5 22:01:41 2002
+++ linux-2.5.20.15557.updated/kernel/futex.c	Wed Jun  5 22:02:09 2002
@@ -34,6 +34,10 @@
 #include <linux/highmem.h>
 #include <linux/time.h>
 #include <linux/pagemap.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/devfs_fs_kernel.h>
+#include <linux/poll.h>
 #include <asm/uaccess.h>
 
 /* Simple "sleep if unchanged" interface. */
@@ -41,11 +45,18 @@
 /* FIXME: This may be way too small. --RR */
 #define FUTEX_HASHBITS 6
 
+extern void send_sigio(struct fown_struct *fown, int fd, int band);
+
 /* We use this instead of a normal wait_queue_t, so we can wake only
    the relevent ones (hashed queues may be shared) */
 struct futex_q {
 	struct list_head list;
 	wait_queue_head_t waiters;
+
+	/* For AWAIT, sigio sent using these. */
+	int fd;
+	struct file *filp;
+
 	/* Page struct and offset within it. */
 	struct page *page;
 	unsigned int offset;
@@ -54,6 +65,7 @@
 /* The key for the hash is the address + index + offset within page */
 static struct list_head futex_queues[1<<FUTEX_HASHBITS];
 static spinlock_t futex_lock = SPIN_LOCK_UNLOCKED;
+extern struct file_operations futex_fops;
 
 static inline struct list_head *hash_futex(struct page *page,
 					   unsigned long offset)
@@ -73,9 +85,12 @@
 	page_cache_release(page);
 }
 
+/* Waiter may be sitting in FUTEX_WAIT or poll, or async */
 static inline void tell_waiter(struct futex_q *q)
 {
 	wake_up_all(&q->waiters);
+	if (q->fd != -1)
+		send_sigio(&q->filp->f_owner, q->fd, POLL_IN);
 }
 
 static int futex_wake(struct list_head *head,
@@ -113,6 +128,7 @@
 	add_wait_queue(&q->waiters, wait);
 	q->page = page;
 	q->offset = offset;
+	q->fd = -1;
 
 	spin_lock(&futex_lock);
 	list_add_tail(&q->list, head);
@@ -196,6 +212,38 @@
 	return ret;
 }
 
+static int futex_await(struct list_head *head,
+		       struct page *page,
+		       int offset,
+		       int fd)
+{
+	struct file *filp;
+	struct futex_q *q;
+
+	filp = fget(fd);
+	if (!filp || filp->f_op != &futex_fops)
+		return -EBADF;
+	q = filp->private_data;
+
+	spin_lock(&futex_lock);
+	/* Eliminate any old notification, wake any pollers, release page. */
+	if (!list_empty(&q->list)) {
+		list_del(&q->list);
+		wake_up_all(&q->waiters);
+		unpin_page(q->page);
+	}
+
+	q->filp = filp;
+	q->fd = fd;
+	q->page = page;
+	q->offset = offset;
+	list_add_tail(&q->list, head);
+	spin_unlock(&futex_lock);
+	fput(filp);
+
+	return 0;
+}
+
 asmlinkage int sys_futex(void *uaddr, int op, int val, struct timespec *utime)
 {
 	int ret;
@@ -229,6 +277,9 @@
 	case FUTEX_WAIT:
 		ret = futex_wait(head, page, pos_in_page, val, uaddr, time);
 		break;
+	case FUTEX_AWAIT:
+		ret = futex_await(head, page, pos_in_page, val);
+		break;
 	case FUTEX_WAKE:
 		ret = futex_wake(head, page, pos_in_page, val);
 		break;
@@ -241,12 +292,68 @@
 	return ret;
 }
 
+static int futex_open(struct inode *inode, struct file *filp)
+{
+	struct futex_q *q;
+
+	q = kmalloc(sizeof(*q), GFP_KERNEL);
+	if (!q)
+		return -ENOMEM;
+	INIT_LIST_HEAD(&q->list);
+	init_waitqueue_head(&q->waiters);
+
+	filp->private_data = q;
+	return 0;
+}
+
+static int futex_close(struct inode *inode, struct file *filp)
+{
+	struct futex_q *q = filp->private_data;
+
+	spin_lock(&futex_lock);
+	if (!list_empty(&q->list)) {
+		list_del(&q->list);
+		unpin_page(q->page);
+		BUG_ON(waitqueue_active(&q->waiters));
+	}
+	spin_unlock(&futex_lock);
+	kfree(filp->private_data);
+	return 0;
+}
+
+/* You need to do a FUTEX_AWAIT to arm this after each successful poll */
+static unsigned int futex_poll(struct file *filp,
+			       struct poll_table_struct *wait)
+{
+	struct futex_q *q = filp->private_data;
+	int ret = 0;
+
+	spin_lock(&futex_lock);
+	if (!list_empty(&q->list))
+		poll_wait(filp, &q->waiters, wait);
+	else
+		ret = POLLIN | POLLRDNORM;
+	spin_unlock(&futex_lock);
+
+	return ret;
+}
+
+static struct file_operations futex_fops = {
+	open:		futex_open,
+	release:	futex_close,
+	poll:		futex_poll,
+};
+
 static int __init init(void)
 {
+	int futex_major;
 	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(futex_queues); i++)
 		INIT_LIST_HEAD(&futex_queues[i]);
+	futex_major = devfs_register_chrdev(0, "futex", &futex_fops);
+	devfs_register(NULL, "futex", DEVFS_FL_NONE, futex_major,
+		       0, S_IFCHR | 0666, &futex_fops, NULL);
 	return 0;
 }
 __initcall(init);

             reply	other threads:[~2002-06-06  7:23 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-06-06  7:26 Rusty Russell [this message]
2002-06-02  0:10 ` [PATCH] Futex Asynchronous Interface Pavel Machek
2002-06-10  6:57   ` Rusty Russell
2002-06-06 16:36 ` Linus Torvalds
2002-06-06 19:27   ` Alan Cox
2002-06-06 23:21   ` Rusty Russell
2002-06-07  8:33     ` Peter Wächtler
2002-06-08 22:28       ` Linus Torvalds
2002-06-09  9:49         ` Kai Henningsen
2002-06-09 18:09           ` Linus Torvalds
2002-06-09 19:06             ` Thunder from the hill
2002-06-10  6:39             ` Kai Henningsen
2002-06-10  7:55             ` Helge Hafting
2002-06-10 14:10               ` Thunder from the hill
2002-06-10 20:46                 ` Kai Henningsen
2002-06-11 14:14                   ` john slee
2002-06-10 15:11               ` Linus Torvalds
2002-06-11 15:06                 ` Eric W. Biederman
2002-06-10 20:57             ` H. Peter Anvin
2002-06-09 10:07         ` Peter Wächtler
2002-06-09 17:49           ` Linus Torvalds
2002-06-07  9:06   ` Rusty Russell
2002-06-08 22:42     ` Linus Torvalds
2002-06-11  9:15       ` Rusty Russell
2002-06-11 16:53         ` Linus Torvalds
2002-06-12  5:32           ` Rusty Russell
2002-06-12  9:16             ` Peter Wächtler
2002-06-12 14:19               ` Hubertus Franke
2002-06-12 16:50                 ` Peter Wächtler
2002-06-12 18:15                   ` Vladimir Zidar
2002-06-12 15:39               ` Linus Torvalds
2002-06-12 16:29                 ` Peter Wächtler
2002-06-12 16:52                   ` Linus Torvalds
2002-06-12 17:07                     ` Peter Wächtler
2002-06-12 18:32                     ` Saurabh Desai
2002-06-12 20:05                     ` Oliver Xymoron
2002-06-12 20:16                       ` Linus Torvalds
2002-06-13  2:57                     ` Rusty Russell
2002-06-13  9:37                       ` Peter Wächtler
2002-06-13  9:55                         ` Rusty Russell
2002-06-13 16:38                     ` Gabriel Paubert
2002-06-13 16:40                       ` Linus Torvalds
2002-06-13  1:32               ` Rusty Russell
2002-06-06 16:08 Martin Wirth
2002-06-06 22:59 ` Rusty Russell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=E17FrfH-0006Gt-00@wagner.rustcorp.com.au \
    --to=rusty@rustcorp.com.au \
    --cc=alan@lxorguk.ukuu.org.uk \
    --cc=frankeh@watson.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.