[patch 00/12] pollfs: a naive filesystem for pollable objects

All of lore.kernel.org
 help / color / mirror / Atom feed

* [patch 00/12] pollfs: a naive filesystem for pollable objects
@ 2007-04-01 15:58 davi
  2007-04-01 15:58 ` [patch 01/12] pollfs: kernel-side API header davi
                   ` (11 more replies)
  0 siblings, 12 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
  To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton

This patch set introduces a new file system for the delivery of pollable
events through file descriptors. To the detriment of debugability, pollable
objects are a nice adjunct to nonblocking/epoll/event-based servers.

Currently implemented waitable "objects" are: signals, futexes, ai/o blocks,
timers and fsync.

Very initial stage, it has only been slightly tested and isn't ready for prime.
The syscall wire up patches were skipped, the complete patch set can be found at:

http://haxent.com/~davi/pollfs/

Comments are welcome.

--
Davi Arnaut

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [patch 01/12] pollfs: kernel-side API header
  2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
@ 2007-04-01 15:58 ` davi
  2007-04-01 15:58 ` [patch 02/12] pollfs: file system operations davi
                   ` (10 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
  To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton

[-- Attachment #1: pollfs-api.patch --]
[-- Type: text/plain, Size: 1787 bytes --]

Add pollfs_fs.h header which contains the kernel-side declarations
and auxiliary macros for type safety checks.

Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---

Index: linux-2.6/include/linux/pollfs_fs.h
===================================================================
--- /dev/null
+++ linux-2.6/include/linux/pollfs_fs.h
@@ -0,0 +1,57 @@
+/*
+ * pollfs, a naive filesystem for pollable (waitable) files (objects)
+ *
+ * Copyright (C) 2007 Davi E. M. Arnaut
+ *
+ */
+
+#ifndef _LINUX_POLL_FS_H
+#define _LINUX_POLL_FS_H
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <linux/dcache.h>
+#include <linux/fs.h>
+
+#define PFS_CHECK_CALLBACK_1(f, a) (void*)				\
+	(sizeof((f)((typeof(a *))0)))
+
+#define PFS_CHECK_CALLBACK_2(f, a, b) (void*)				\
+	(sizeof((f)((typeof(a *))0, (typeof(b*))0)))
+
+#define PFS_WRITE(func, type, utype)					\
+	(ssize_t (*)(void *, const void __user *))			\
+	(0 ? PFS_CHECK_CALLBACK_2(func, type, utype) : func)
+
+#define PFS_READ(func, type, utype)					\
+	(ssize_t (*)(void *, void __user *))				\
+	(0 ? PFS_CHECK_CALLBACK_2(func, type, utype) : func)
+
+#define PFS_POLL(func, type)						\
+	(int (*)(void *))(0 ? PFS_CHECK_CALLBACK_1(func, type) : func)
+
+#define PFS_RELEASE(func, type)						\
+	(int (*)(void *))(0 ? PFS_CHECK_CALLBACK_1(func, type) : func)
+
+struct pfs_operations {
+	ssize_t (*read)(void *, void __user *);
+	ssize_t (*write)(void *, const void __user *);
+	int (*mmap)(void *, struct vm_area_struct *);
+	int (*poll)(void *);
+	int (*release)(void *);
+	size_t rsize;
+	size_t wsize;
+};
+
+struct pfs_file {
+	void *data;
+	wait_queue_head_t *wait;
+	const struct pfs_operations *fops;
+};
+
+long pfs_open(struct pfs_file *pfs);
+
+#endif	/* __KERNEL __ */
+
+#endif	/* _LINUX_POLLFS_FS_H */

-- 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [patch 02/12] pollfs: file system operations
  2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
  2007-04-01 15:58 ` [patch 01/12] pollfs: kernel-side API header davi
@ 2007-04-01 15:58 ` davi
  2007-04-01 15:58 ` [patch 03/12] pollfs: asynchronously wait for a signal davi
                   ` (9 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
  To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton

[-- Attachment #1: pollfs-core.patch --]
[-- Type: text/plain, Size: 6437 bytes --]

The key feature of the pollfs file operations is to internally handle
pollable (waitable) resources as files without exporting complex and
bug-prone underlying (VFS) implementation details.

All resource handlers are required to implement the read, write, poll,
release operations and must not block.

Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---

Index: linux-2.6/fs/pollfs/file.c
===================================================================
--- /dev/null
+++ linux-2.6/fs/pollfs/file.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2007 Davi E. M. Arnaut
+ *
+ * Licensed under the GNU GPL. See the file COPYING for details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/wait.h>
+#include <asm/uaccess.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/pollfs_fs.h>
+
+#define POLLFS_MAGIC 0x9a6afcd
+
+MODULE_LICENSE("GPL");
+
+/* pollfs vfsmount entry */
+static struct vfsmount *pfs_mnt;
+
+/* pollfs file operations */
+static const struct file_operations pfs_fops;
+
+static inline ssize_t
+pfs_read_nonblock(const struct pfs_operations *fops, void *data,
+		  void __user *obj, size_t nr)
+{
+	ssize_t count = 0, res = 0;
+
+	do {
+		res = fops->read(data, obj);
+		if (res)
+			break;
+		count++;
+		obj += fops->rsize;
+	} while (--nr);
+
+	if (count)
+		return count * fops->rsize;
+	else if (res)
+		return res;
+	else
+		return -EAGAIN;
+}
+
+static inline ssize_t
+pfs_read_block(const struct pfs_operations *fops, void *data,
+	       wait_queue_head_t *wait, void __user *obj, size_t nr)
+{
+	ssize_t count;
+
+	do {
+		count = pfs_read_nonblock(fops, data, obj, nr);
+		if (count != -EAGAIN)
+			break;
+		count = wait_event_interruptible((*wait), fops->poll(data));
+	} while (!count);
+
+	return count;
+}
+
+static ssize_t pfs_read(struct file *filp, char __user * buf,
+			size_t count, loff_t * pos)
+{
+	size_t nevents = count;
+	struct pfs_file *pfs = filp->private_data;
+	const struct pfs_operations *fops = pfs->fops;
+
+	if (fops->rsize)
+		nevents /= fops->rsize;
+	else
+		nevents = 1;
+
+	if (!nevents)
+		return -EINVAL;
+
+	if (filp->f_flags & O_NONBLOCK)
+		return pfs_read_nonblock(fops, pfs->data, buf, nevents);
+	else
+		return pfs_read_block(fops, pfs->data, pfs->wait, buf, nevents);
+}
+
+static ssize_t pfs_write(struct file *filp, const char __user * buf,
+			 size_t count, loff_t * ppos)
+{
+	ssize_t res = 0;
+	size_t nevents = count;
+	struct pfs_file *pfs = filp->private_data;
+	const struct pfs_operations *fops = pfs->fops;
+
+	if (fops->wsize)
+		nevents /= fops->wsize;
+	else
+		nevents = 1;
+
+	if (!nevents)
+		return -EINVAL;
+
+	count = 0;
+
+	do {
+		res = fops->write(pfs->data, buf);
+		if (res)
+			break;
+		count++;
+		buf += fops->wsize;
+	} while (--nevents);
+
+	if (count)
+		return count * fops->wsize;
+	else if (res)
+		return res;
+	else
+		return 0;
+}
+
+static unsigned int pfs_poll(struct file *filp, struct poll_table_struct *wait)
+{
+	int ret = 0;
+	struct pfs_file *pfs = filp->private_data;
+
+	poll_wait(filp, pfs->wait, wait);
+
+	if (pfs->fops->poll)
+		ret = pfs->fops->poll(pfs->data);
+	else
+		ret = POLLIN;
+
+	return ret;
+}
+
+static int pfs_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct pfs_file *pfs = filp->private_data;
+
+	return (pfs->fops->mmap) ? pfs->fops->mmap(pfs->data, vma) : -ENODEV;
+}
+
+static int pfs_release(struct inode *inode, struct file *filp)
+{
+	struct pfs_file *pfs = filp->private_data;
+
+	return pfs->fops->release(pfs->data);
+}
+
+static const struct file_operations pfs_fops = {
+	.poll = pfs_poll,
+	.mmap = pfs_mmap,
+	.read = pfs_read,
+	.write = pfs_write,
+	.release = pfs_release
+};
+
+long pfs_open(struct pfs_file *pfs)
+{
+	int fd;
+	struct file *filp;
+	const struct pfs_operations *fops = pfs->fops;
+
+	if (IS_ERR(pfs_mnt))
+		return -ENOSYS;
+
+	if (!fops->poll || (!fops->read || !fops->write))
+		return -EINVAL;
+
+	fd = get_unused_fd();
+	if (fd < 0)
+		return -ENFILE;
+
+	filp = get_empty_filp();
+	if (!filp) {
+		put_unused_fd(fd);
+		return -ENFILE;
+	}
+
+	filp->f_op = &pfs_fops;
+	filp->f_path.mnt = mntget(pfs_mnt);
+	filp->f_path.dentry = dget(pfs_mnt->mnt_root);
+	filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
+	filp->f_mode = 0;
+	filp->f_flags = 0;
+	filp->private_data = pfs;
+
+	if (fops->read) {
+		filp->f_flags = O_RDONLY;
+		filp->f_mode |= FMODE_READ;
+	}
+
+	if (fops->write) {
+		filp->f_flags = O_WRONLY;
+		filp->f_mode |= FMODE_WRITE;
+	}
+
+	if (fops->write && fops->read)
+		filp->f_flags = O_RDWR;
+
+	fd_install(fd, filp);
+
+	return fd;
+}
+
+EXPORT_SYMBOL(pfs_open);
+
+static int pfs_get_sb(struct file_system_type *fs_type, int flags,
+		      const char *dev_name, void *data, struct vfsmount *mnt)
+{
+	return get_sb_pseudo(fs_type, "pollfs", NULL, POLLFS_MAGIC, mnt);
+}
+
+static struct file_system_type pollfs_type = {
+	.name = "pollfs",
+	.get_sb = pfs_get_sb,
+	.kill_sb = kill_anon_super
+};
+
+static int __init pollfs_init(void)
+{
+	int ret;
+
+	ret = register_filesystem(&pollfs_type);
+	if (ret)
+		return ret;
+
+	pfs_mnt = kern_mount(&pollfs_type);
+	if (IS_ERR(pfs_mnt)) {
+		ret = PTR_ERR(pfs_mnt);
+		unregister_filesystem(&pollfs_type);
+	}
+
+	return ret;
+}
+
+__initcall(pollfs_init);
Index: linux-2.6/init/Kconfig
===================================================================
--- linux-2.6.orig/init/Kconfig
+++ linux-2.6/init/Kconfig
@@ -463,6 +463,12 @@ config EPOLL
 	  Disabling this option will cause the kernel to be built without
 	  support for epoll family of system calls.
 
+config POLLFS
+	bool "Enable pollfs support" if EMBEDDED
+	default y
+	help
+	 Pollfs support
+
 config SHMEM
 	bool "Use full shmem filesystem" if EMBEDDED
 	default y
Index: linux-2.6/fs/Makefile
===================================================================
--- linux-2.6.orig/fs/Makefile
+++ linux-2.6/fs/Makefile
@@ -114,3 +114,4 @@ obj-$(CONFIG_HPPFS)		+= hppfs/
 obj-$(CONFIG_DEBUG_FS)		+= debugfs/
 obj-$(CONFIG_OCFS2_FS)		+= ocfs2/
 obj-$(CONFIG_GFS2_FS)           += gfs2/
+obj-$(CONFIG_POLLFS)		+= pollfs/
Index: linux-2.6/fs/pollfs/Makefile
===================================================================
--- /dev/null
+++ linux-2.6/fs/pollfs/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_POLLFS) += pollfs.o
+pollfs-y := file.o

-- 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [patch 03/12] pollfs: asynchronously wait for a signal
  2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
  2007-04-01 15:58 ` [patch 01/12] pollfs: kernel-side API header davi
  2007-04-01 15:58 ` [patch 02/12] pollfs: file system operations davi
@ 2007-04-01 15:58 ` davi
  2007-04-01 15:58 ` [patch 04/12] pollfs: pollable signal davi
                   ` (8 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
  To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton

[-- Attachment #1: pollfs-signal-wakeup.patch --]
[-- Type: text/plain, Size: 2851 bytes --]

Add a wait queue to the task_struct in order to be able to
associate (wait for) a signal with other resources.

Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---

Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -939,6 +939,7 @@ struct task_struct {
 	sigset_t blocked, real_blocked;
 	sigset_t saved_sigmask;		/* To be restored with TIF_RESTORE_SIGMASK */
 	struct sigpending pending;
+	wait_queue_head_t sigwait;
 
 	unsigned long sas_ss_sp;
 	size_t sas_ss_size;
Index: linux-2.6/include/linux/init_task.h
===================================================================
--- linux-2.6.orig/include/linux/init_task.h
+++ linux-2.6/include/linux/init_task.h
@@ -134,6 +134,7 @@ extern struct group_info init_groups;
 		.list = LIST_HEAD_INIT(tsk.pending.list),		\
 		.signal = {{0}}},					\
 	.blocked	= {{0}},					\
+	.sigwait	= __WAIT_QUEUE_HEAD_INITIALIZER(tsk.sigwait),	\
 	.alloc_lock	= __SPIN_LOCK_UNLOCKED(tsk.alloc_lock),		\
 	.journal_info	= NULL,						\
 	.cpu_timers	= INIT_CPU_TIMERS(tsk.cpu_timers),		\
Index: linux-2.6/kernel/fork.c
===================================================================
--- linux-2.6.orig/kernel/fork.c
+++ linux-2.6/kernel/fork.c
@@ -1034,6 +1034,7 @@ static struct task_struct *copy_process(
 
 	clear_tsk_thread_flag(p, TIF_SIGPENDING);
 	init_sigpending(&p->pending);
+	init_waitqueue_head(&p->sigwait);
 
 	p->utime = cputime_zero;
 	p->stime = cputime_zero;
Index: linux-2.6/kernel/signal.c
===================================================================
--- linux-2.6.orig/kernel/signal.c
+++ linux-2.6/kernel/signal.c
@@ -224,6 +224,8 @@ fastcall void recalc_sigpending_tsk(stru
 		set_tsk_thread_flag(t, TIF_SIGPENDING);
 	else
 		clear_tsk_thread_flag(t, TIF_SIGPENDING);
+
+	wake_up_interruptible_sync(&t->sigwait);
 }
 
 void recalc_sigpending(void)
@@ -759,6 +761,7 @@ static int send_signal(int sig, struct s
 					      info->si_code >= 0)));
 	if (q) {
 		list_add_tail(&q->list, &signals->list);
+		wake_up_interruptible_sync(&t->sigwait);
 		switch ((unsigned long) info) {
 		case (unsigned long) SEND_SIG_NOINFO:
 			q->info.si_signo = sig;
@@ -1404,6 +1407,7 @@ int send_sigqueue(int sig, struct sigque
 
 	list_add_tail(&q->list, &p->pending.list);
 	sigaddset(&p->pending.signal, sig);
+	wake_up_interruptible_sync(&p->sigwait);
 	if (!sigismember(&p->blocked, sig))
 		signal_wake_up(p, sig == SIGKILL);
 
@@ -1453,6 +1457,7 @@ send_group_sigqueue(int sig, struct sigq
 	list_add_tail(&q->list, &p->signal->shared_pending.list);
 	sigaddset(&p->signal->shared_pending.signal, sig);
 
+	wake_up_interruptible_sync(&p->sigwait);
 	__group_complete_signal(sig, p);
 out:
 	spin_unlock_irqrestore(&p->sighand->siglock, flags);

-- 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [patch 04/12] pollfs: pollable signal
  2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
                   ` (2 preceding siblings ...)
  2007-04-01 15:58 ` [patch 03/12] pollfs: asynchronously wait for a signal davi
@ 2007-04-01 15:58 ` davi
  2007-04-01 15:58 ` [patch 05/12] pollfs: pollable signal compat code davi
                   ` (7 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
  To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton

[-- Attachment #1: pollfs-signal.patch --]
[-- Type: text/plain, Size: 4043 bytes --]

Retrieve multiple per-process signals through a file descriptor.

Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---

Index: linux-2.6/fs/pollfs/signal.c
===================================================================
--- /dev/null
+++ linux-2.6/fs/pollfs/signal.c
@@ -0,0 +1,144 @@
+/*
+ * sigtimedwait4, retrieve multiple signals with one call.
+ *
+ * Copyright (C) 2007 Davi E. M. Arnaut
+ *
+ * Licensed under the GNU GPL. See the file COPYING for details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/wait.h>
+#include <asm/uaccess.h>
+#include <linux/poll.h>
+#include <linux/pollfs_fs.h>
+#include <linux/signal.h>
+
+struct pfs_signal {
+	sigset_t set;
+	spinlock_t lock;
+	struct task_struct *task;
+	struct pfs_file file;
+};
+
+static void inline sigset_adjust(sigset_t *set)
+{
+	/* SIGKILL and SIGSTOP cannot be caught, blocked, or ignored */
+	sigdelsetmask(set, sigmask(SIGKILL) | sigmask(SIGSTOP));
+
+	/* Signals we don't want to dequeue */
+	signotset(set);
+}
+
+static ssize_t read(struct pfs_signal *evs, siginfo_t __user *infoup)
+{
+	int signo;
+	siginfo_t info;
+
+	signo = dequeue_signal_lock(evs->task, &evs->set, &info);
+	if (!signo)
+		return -EAGAIN;
+
+	if (copy_siginfo_to_user(infoup, &info))
+		return -EFAULT;
+
+	return 0;
+}
+
+static ssize_t write(struct pfs_signal *evs, const sigset_t __user *uset)
+{
+	sigset_t set;
+
+	if (copy_from_user(&set, uset, sizeof(sigset_t)))
+		return -EFAULT;
+
+	sigset_adjust(&set);
+
+	spin_lock_irq(&evs->lock);
+	sigemptyset(&evs->set);
+	sigorsets(&evs->set, &evs->set, &set);
+	spin_unlock_irq(&evs->lock);
+
+	return 0;
+}
+
+static int poll(struct pfs_signal *evs)
+{
+	int ret = 0;
+	sigset_t pending;
+	unsigned long flags;
+
+	rcu_read_lock();
+
+	if (!lock_task_sighand(evs->task, &flags))
+		goto out_unlock;
+
+	sigorsets(&pending, &evs->task->pending.signal,
+		  &evs->task->signal->shared_pending.signal);
+
+	unlock_task_sighand(evs->task, &flags);
+
+	spin_lock_irqsave(&evs->lock, flags);
+	signandsets(&pending, &pending, &evs->set);
+	spin_unlock_irqrestore(&evs->lock, flags);
+
+	if (!sigisemptyset(&pending))
+		ret = POLLIN;
+
+out_unlock:
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static int release(struct pfs_signal *evs)
+{
+	put_task_struct(evs->task);
+	kfree(evs);
+
+	return 0;
+}
+
+static const struct pfs_operations signal_ops = {
+	.read		= PFS_READ(read, struct pfs_signal, siginfo_t),
+	.write		= PFS_WRITE(write, struct pfs_signal, sigset_t),
+	.poll		= PFS_POLL(poll, struct pfs_signal),
+	.release	= PFS_RELEASE(release, struct pfs_signal),
+	.rsize		= sizeof(siginfo_t),
+	.wsize		= sizeof(sigset_t),
+};
+
+asmlinkage long sys_plsignal(const sigset_t __user *uset)
+{
+	long error;
+	struct pfs_signal *evs;
+
+	evs = kmalloc(sizeof(*evs), GFP_KERNEL);
+	if (!evs)
+		return -ENOMEM;
+
+	if (copy_from_user(&evs->set, uset, sizeof(sigset_t))) {
+		kfree(evs);
+		return -EFAULT;
+	}
+
+	spin_lock_init(&evs->lock);
+
+	evs->task = current;
+	get_task_struct(current);
+
+	sigset_adjust(&evs->set);
+
+	evs->file.data = evs;
+	evs->file.fops = &signal_ops;
+	evs->file.wait = &evs->task->sigwait;
+
+	error = pfs_open(&evs->file);
+	if (error < 0)
+		release(evs);
+
+	return error;
+}
Index: linux-2.6/fs/pollfs/Makefile
===================================================================
--- linux-2.6.orig/fs/pollfs/Makefile
+++ linux-2.6/fs/pollfs/Makefile
@@ -1,2 +1,4 @@
 obj-$(CONFIG_POLLFS) += pollfs.o
 pollfs-y := file.o
+
+pollfs-$(CONFIG_POLLFS_SIGNAL) += signal.o
Index: linux-2.6/init/Kconfig
===================================================================
--- linux-2.6.orig/init/Kconfig
+++ linux-2.6/init/Kconfig
@@ -469,6 +469,13 @@ config POLLFS
 	help
 	 Pollfs support
 
+config POLLFS_SIGNAL
+	bool "Enable pollfs signal" if EMBEDDED
+	default y
+	depends on POLLFS
+	help
+	 Pollable signal support
+
 config SHMEM
 	bool "Use full shmem filesystem" if EMBEDDED
 	default y

-- 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [patch 05/12] pollfs: pollable signal compat code
  2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
                   ` (3 preceding siblings ...)
  2007-04-01 15:58 ` [patch 04/12] pollfs: pollable signal davi
@ 2007-04-01 15:58 ` davi
  2007-04-01 15:58 ` [patch 06/12] pollfs: pollable hrtimers davi
                   ` (6 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
  To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton

[-- Attachment #1: pollfs-signal-compat.patch --]
[-- Type: text/plain, Size: 2727 bytes --]

Compat handlers for the pollable signal operations.

Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---

Index: linux-2.6/fs/pollfs/signal.c
===================================================================
--- linux-2.6.orig/fs/pollfs/signal.c
+++ linux-2.6/fs/pollfs/signal.c
@@ -16,6 +16,7 @@
 #include <linux/poll.h>
 #include <linux/pollfs_fs.h>
 #include <linux/signal.h>
+#include <linux/compat.h>
 
 struct pfs_signal {
 	sigset_t set;
@@ -48,6 +49,24 @@ static ssize_t read(struct pfs_signal *e
 	return 0;
 }
 
+#ifdef CONFIG_COMPAT
+static ssize_t compat_read(struct pfs_signal *evs,
+			   struct compat_siginfo __user *infoup)
+{
+	int signo;
+	siginfo_t info;
+
+	signo = dequeue_signal_lock(evs->task, &evs->set, &info);
+	if (!signo)
+		return -EAGAIN;
+
+	if (copy_siginfo_to_user32(infoup, &info))
+		return -EFAULT;
+
+	return 0;
+}
+#endif
+
 static ssize_t write(struct pfs_signal *evs, const sigset_t __user *uset)
 {
 	sigset_t set;
@@ -65,6 +84,28 @@ static ssize_t write(struct pfs_signal *
 	return 0;
 }
 
+#ifdef CONFIG_COMPAT
+static ssize_t compat_write(struct pfs_signal *evs,
+			    const compat_sigset_t __user *uset)
+{
+	sigset_t set;
+	compat_sigset_t cset;
+
+	if (copy_from_user(&cset, uset, sizeof(compat_sigset_t)))
+		return -EFAULT;
+
+	sigset_from_compat(&set, &cset);
+	sigset_adjust(&set);
+
+	spin_lock_irq(&evs->lock);
+	sigemptyset(&evs->set);
+	sigorsets(&evs->set, &evs->set, &set);
+	spin_unlock_irq(&evs->lock);
+
+	return 0;
+}
+#endif
+
 static int poll(struct pfs_signal *evs)
 {
 	int ret = 0;
@@ -142,3 +183,47 @@ asmlinkage long sys_plsignal(const sigse
 
 	return error;
 }
+
+#ifdef CONFIG_COMPAT
+static const struct pfs_operations compat_signal_ops = {
+	/* .read        = PFS_READ(compat_read, struct pfs_signal, struct compat_siginfo), */
+	.write		= PFS_WRITE(compat_write, struct pfs_signal, compat_sigset_t),
+	.poll		= PFS_POLL(poll, struct pfs_signal),
+	.release	= PFS_RELEASE(release, struct pfs_signal),
+	/* .rsize       = sizeof(compat_siginfo_t), */
+	.wsize		= sizeof(sigset_t)
+};
+
+asmlinkage long compat_plsignal(const compat_sigset_t __user *uset)
+{
+	long error;
+	compat_sigset_t cset;
+	struct pfs_signal *evs;
+
+	if (copy_from_user(&cset, uset, sizeof(compat_sigset_t)))
+		return -EFAULT;
+
+	evs = kmalloc(sizeof(*evs), GFP_KERNEL);
+	if (!evs)
+		return -ENOMEM;
+
+	spin_lock_init(&evs->lock);
+
+	evs->task = current;
+	get_task_struct(current);
+
+	sigset_from_compat(&evs->set, &cset);
+	sigset_adjust(&evs->set);
+
+	evs->file.data = evs;
+	evs->file.fops = &compat_signal_ops;
+	evs->file.wait = &evs->task->sigwait;
+
+	error = pfs_open(&evs->file);
+
+	if (error < 0)
+		release(evs);
+
+	return error;
+}
+#endif

-- 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [patch 06/12] pollfs: pollable hrtimers
  2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
                   ` (4 preceding siblings ...)
  2007-04-01 15:58 ` [patch 05/12] pollfs: pollable signal compat code davi
@ 2007-04-01 15:58 ` davi
  2007-04-01 15:58 ` [patch 07/12] pollfs: asynchronous futex wait davi
                   ` (5 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
  To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton

[-- Attachment #1: pollfs-timer.patch --]
[-- Type: text/plain, Size: 5329 bytes --]

Per file descriptor high-resolution timers. A classic unix file interface for
the POSIX timer_(create|settime|gettime|delete) family of functions.

Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---

Index: linux-2.6/fs/pollfs/timer.c
===================================================================
--- /dev/null
+++ linux-2.6/fs/pollfs/timer.c
@@ -0,0 +1,191 @@
+/*
+ * pollable timers
+ *
+ * Copyright (C) 2007 Davi E. M. Arnaut
+ *
+ * Licensed under the GNU GPL. See the file COPYING for details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/pollfs_fs.h>
+#include <linux/hrtimer.h>
+
+struct pfs_timer {
+	wait_queue_head_t wait;
+	ktime_t interval;
+	spinlock_t lock;
+	atomic_t counter;
+	struct hrtimer timer;
+	struct pfs_file file;
+};
+
+struct hrtimerspec {
+	int flags;
+	clockid_t clock;
+	struct itimerspec expr;
+};
+
+static ssize_t read(struct pfs_timer *evs, struct itimerspec __user * uspec)
+{
+	ktime_t remaining = {};
+	struct itimerspec spec = {};
+	struct hrtimer *timer = &evs->timer;
+
+	/* atomic_dec_not_zero */
+	if (!atomic_add_unless(&evs->counter, -1, 0))
+		return -EAGAIN;
+
+	spin_lock_irq(&evs->lock);
+
+	if (hrtimer_active(timer))
+		remaining = hrtimer_get_remaining(timer);
+
+	if (remaining.tv64 > 0)
+		spec.it_value = ktime_to_timespec(remaining);
+
+	spec.it_interval = ktime_to_timespec(evs->interval);
+
+	spin_unlock_irq(&evs->lock);
+
+	if (copy_to_user(uspec, &spec, sizeof(spec)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static enum hrtimer_restart timer_fn(struct hrtimer *timer)
+{
+	unsigned long flags;
+	enum hrtimer_restart ret = HRTIMER_RESTART;
+	struct pfs_timer *evs = container_of(timer, struct pfs_timer, timer);
+
+	spin_lock_irqsave(&evs->lock, flags);
+
+	if (evs->interval.tv64 > 0)
+		hrtimer_forward(timer, hrtimer_cb_get_time(timer),
+				evs->interval);
+	else
+		ret = HRTIMER_NORESTART;
+
+	spin_unlock_irqrestore(&evs->lock, flags);
+
+	/* timer tick, interval has elapsed */
+	atomic_inc(&evs->counter);
+	wake_up_all(&evs->wait);
+
+	return ret;
+}
+
+static inline void rearm_timer(struct pfs_timer *evs, struct hrtimerspec *spec)
+{
+	struct hrtimer *timer = &evs->timer;
+	enum hrtimer_mode mode = HRTIMER_MODE_REL;
+
+	if (spec->flags & TIMER_ABSTIME)
+		mode = HRTIMER_MODE_ABS;
+
+	do {
+		spin_lock_irq(&evs->lock);
+		if (hrtimer_try_to_cancel(timer) >= 0)
+			break;
+		spin_unlock_irq(&evs->lock);
+		cpu_relax();
+	} while (1);
+
+	hrtimer_init(timer, spec->clock, mode);
+
+	timer->function = timer_fn;
+	timer->expires = timespec_to_ktime(spec->expr.it_value);
+	evs->interval = timespec_to_ktime(spec->expr.it_interval);
+
+	if (timer->expires.tv64)
+		hrtimer_start(timer, timer->expires, mode);
+
+	spin_unlock_irq(&evs->lock);
+}
+
+static inline int spec_invalid(const struct hrtimerspec *spec)
+{
+	if (spec->clock != CLOCK_REALTIME && spec->clock != CLOCK_MONOTONIC)
+		return 1;
+
+	if (!timespec_valid(&spec->expr.it_value) ||
+	    !timespec_valid(&spec->expr.it_interval))
+		return 1;
+
+	return 0;
+}
+
+static ssize_t write(struct pfs_timer *evs,
+		     const struct hrtimerspec __user *uspec)
+{
+	struct hrtimerspec spec;
+
+	if (copy_from_user(&spec, uspec, sizeof(spec)))
+		return -EFAULT;
+
+	if (spec_invalid(&spec))
+		return -EINVAL;
+
+	rearm_timer(evs, &spec);
+
+	return 0;
+}
+
+static int poll(struct pfs_timer *evs)
+{
+	int ret;
+
+	ret = atomic_read(&evs->counter) ? POLLIN : 0;
+
+	return ret;
+}
+
+static int release(struct pfs_timer *evs)
+{
+	hrtimer_cancel(&evs->timer);
+	kfree(evs);
+
+	return 0;
+}
+
+static const struct pfs_operations timer_ops = {
+	.read = PFS_READ(read, struct pfs_timer, struct itimerspec),
+	.write = PFS_WRITE(write, struct pfs_timer, struct hrtimerspec),
+	.poll = PFS_POLL(poll, struct pfs_timer),
+	.release = PFS_RELEASE(release, struct pfs_timer),
+	.rsize = sizeof(struct itimerspec),
+	.wsize = sizeof(struct hrtimerspec),
+};
+
+asmlinkage long sys_pltimer(void)
+{
+	long error;
+	struct pfs_timer *evs;
+
+	evs = kmalloc(sizeof(*evs), GFP_KERNEL);
+	if (!evs)
+		return -ENOMEM;
+
+	spin_lock_init(&evs->lock);
+	atomic_set(&evs->counter, 0);
+	init_waitqueue_head(&evs->wait);
+	hrtimer_init(&evs->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+
+	evs->file.data = evs;
+	evs->file.fops = &timer_ops;
+	evs->file.wait = &evs->wait;
+
+	error = pfs_open(&evs->file);
+
+	if (error < 0)
+		release(evs);
+
+	return error;
+}
Index: linux-2.6/fs/pollfs/Makefile
===================================================================
--- linux-2.6.orig/fs/pollfs/Makefile
+++ linux-2.6/fs/pollfs/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_POLLFS) += pollfs.o
 pollfs-y := file.o
 
 pollfs-$(CONFIG_POLLFS_SIGNAL) += signal.o
+pollfs-$(CONFIG_POLLFS_TIMER) += timer.o
Index: linux-2.6/init/Kconfig
===================================================================
--- linux-2.6.orig/init/Kconfig
+++ linux-2.6/init/Kconfig
@@ -476,6 +476,13 @@ config POLLFS_SIGNAL
 	help
 	 Pollable signal support
 
+config POLLFS_TIMER
+	bool "Enable pollfs timer" if EMBEDDED
+	default y
+	depends on POLLFS
+	help
+	 Pollable timer support
+
 config SHMEM
 	bool "Use full shmem filesystem" if EMBEDDED
 	default y

-- 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [patch 07/12] pollfs: asynchronous futex wait
  2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
                   ` (5 preceding siblings ...)
  2007-04-01 15:58 ` [patch 06/12] pollfs: pollable hrtimers davi
@ 2007-04-01 15:58 ` davi
  2007-04-01 15:58 ` [patch 08/12] pollfs: pollable futex davi
                   ` (4 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
  To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton

[-- Attachment #1: pollfs-futex-async-wait.patch --]
[-- Type: text/plain, Size: 7755 bytes --]

Break apart and export the futex_wait function in order to be able to
associate (wait for) a futex with other resources.

Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---

Index: linux-2.6/kernel/futex.c
===================================================================
--- linux-2.6.orig/kernel/futex.c
+++ linux-2.6/kernel/futex.c
@@ -55,81 +55,6 @@
 #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
 
 /*
- * Futexes are matched on equal values of this key.
- * The key type depends on whether it's a shared or private mapping.
- * Don't rearrange members without looking at hash_futex().
- *
- * offset is aligned to a multiple of sizeof(u32) (== 4) by definition.
- * We set bit 0 to indicate if it's an inode-based key.
- */
-union futex_key {
-	struct {
-		unsigned long pgoff;
-		struct inode *inode;
-		int offset;
-	} shared;
-	struct {
-		unsigned long address;
-		struct mm_struct *mm;
-		int offset;
-	} private;
-	struct {
-		unsigned long word;
-		void *ptr;
-		int offset;
-	} both;
-};
-
-/*
- * Priority Inheritance state:
- */
-struct futex_pi_state {
-	/*
-	 * list of 'owned' pi_state instances - these have to be
-	 * cleaned up in do_exit() if the task exits prematurely:
-	 */
-	struct list_head list;
-
-	/*
-	 * The PI object:
-	 */
-	struct rt_mutex pi_mutex;
-
-	struct task_struct *owner;
-	atomic_t refcount;
-
-	union futex_key key;
-};
-
-/*
- * We use this hashed waitqueue instead of a normal wait_queue_t, so
- * we can wake only the relevant ones (hashed queues may be shared).
- *
- * A futex_q has a woken state, just like tasks have TASK_RUNNING.
- * It is considered woken when list_empty(&q->list) || q->lock_ptr == 0.
- * The order of wakup is always to make the first condition true, then
- * wake up q->waiters, then make the second condition true.
- */
-struct futex_q {
-	struct list_head list;
-	wait_queue_head_t waiters;
-
-	/* Which hash list lock to use: */
-	spinlock_t *lock_ptr;
-
-	/* Key which the futex is hashed on: */
-	union futex_key key;
-
-	/* For fd, sigio sent using these: */
-	int fd;
-	struct file *filp;
-
-	/* Optional priority inheritance state: */
-	struct futex_pi_state *pi_state;
-	struct task_struct *task;
-};
-
-/*
  * Split the global futex_lock into every hash list lock.
  */
 struct futex_hash_bucket {
@@ -904,8 +829,6 @@ queue_lock(struct futex_q *q, int fd, st
 	q->fd = fd;
 	q->filp = filp;
 
-	init_waitqueue_head(&q->waiters);
-
 	get_key_refs(&q->key);
 	hb = hash_futex(&q->key);
 	q->lock_ptr = &hb->lock;
@@ -938,6 +861,7 @@ static void queue_me(struct futex_q *q, 
 {
 	struct futex_hash_bucket *hb;
 
+	init_waitqueue_head(&q->waiters);
 	hb = queue_lock(q, fd, filp);
 	__queue_me(q, hb);
 }
@@ -1002,24 +926,22 @@ static void unqueue_me_pi(struct futex_q
 	drop_key_refs(&q->key);
 }
 
-static int futex_wait(u32 __user *uaddr, u32 val, unsigned long time)
+int futex_wait_queue(struct futex_q *q, u32 __user *uaddr, u32 val)
 {
 	struct task_struct *curr = current;
-	DECLARE_WAITQUEUE(wait, curr);
 	struct futex_hash_bucket *hb;
-	struct futex_q q;
 	u32 uval;
 	int ret;
 
-	q.pi_state = NULL;
+	q->pi_state = NULL;
  retry:
 	down_read(&curr->mm->mmap_sem);
 
-	ret = get_futex_key(uaddr, &q.key);
+	ret = get_futex_key(uaddr, &q->key);
 	if (unlikely(ret != 0))
 		goto out_release_sem;
 
-	hb = queue_lock(&q, -1, NULL);
+	hb = queue_lock(q, -1, NULL);
 
 	/*
 	 * Access the page AFTER the futex is queued.
@@ -1044,7 +966,7 @@ static int futex_wait(u32 __user *uaddr,
 	ret = get_futex_value_locked(&uval, uaddr);
 
 	if (unlikely(ret)) {
-		queue_unlock(&q, hb);
+		queue_unlock(q, hb);
 
 		/*
 		 * If we would have faulted, release mmap_sem, fault it in and
@@ -1063,14 +985,37 @@ static int futex_wait(u32 __user *uaddr,
 		goto out_unlock_release_sem;
 
 	/* Only actually queue if *uaddr contained val.  */
-	__queue_me(&q, hb);
+	__queue_me(q, hb);
 
 	/*
 	 * Now the futex is queued and we have checked the data, we
-	 * don't want to hold mmap_sem while we sleep.
+	 * don't want to hold mmap_sem while we (might) sleep.
 	 */
 	up_read(&curr->mm->mmap_sem);
 
+	return 0;
+
+ out_unlock_release_sem:
+	queue_unlock(q, hb);
+
+ out_release_sem:
+	up_read(&curr->mm->mmap_sem);
+	return ret;
+}
+
+static int futex_wait(u32 __user *uaddr, u32 val, unsigned long time)
+{
+	int ret;
+	struct futex_q q;
+	DECLARE_WAITQUEUE(wait, current);
+
+	init_waitqueue_head(&q.waiters);
+
+	ret = futex_wait_queue(&q, uaddr, val);
+
+	if (ret)
+		return ret;
+
 	/*
 	 * There might have been scheduling since the queue_me(), as we
 	 * cannot hold a spinlock across the get_user() in case it
@@ -1106,13 +1051,12 @@ static int futex_wait(u32 __user *uaddr,
 	 * have handled it for us already.
 	 */
 	return -EINTR;
+}
 
- out_unlock_release_sem:
-	queue_unlock(&q, hb);
-
- out_release_sem:
-	up_read(&curr->mm->mmap_sem);
-	return ret;
+/* Return 1 if we were still queued, 0 means we were woken. */
+int futex_wait_unqueue(struct futex_q *q)
+{
+	return unqueue_me(q);
 }
 
 /*
@@ -1142,6 +1086,8 @@ static int futex_lock_pi(u32 __user *uad
 	}
 
 	q.pi_state = NULL;
+
+	init_waitqueue_head(&q.waiters);
  retry:
 	down_read(&curr->mm->mmap_sem);
 
Index: linux-2.6/include/linux/futex.h
===================================================================
--- linux-2.6.orig/include/linux/futex.h
+++ linux-2.6/include/linux/futex.h
@@ -94,12 +94,92 @@ struct robust_list_head {
 #define ROBUST_LIST_LIMIT	2048
 
 #ifdef __KERNEL__
+
+#include <linux/rtmutex.h>
+
+/*
+ * Futexes are matched on equal values of this key.
+ * The key type depends on whether it's a shared or private mapping.
+ * Don't rearrange members without looking at hash_futex().
+ *
+ * offset is aligned to a multiple of sizeof(u32) (== 4) by definition.
+ * We set bit 0 to indicate if it's an inode-based key.
+ */
+union futex_key {
+	struct {
+		unsigned long pgoff;
+		struct inode *inode;
+		int offset;
+	} shared;
+	struct {
+		unsigned long address;
+		struct mm_struct *mm;
+		int offset;
+	} private;
+	struct {
+		unsigned long word;
+		void *ptr;
+		int offset;
+	} both;
+};
+
+/*
+ * Priority Inheritance state:
+ */
+struct futex_pi_state {
+	/*
+	 * list of 'owned' pi_state instances - these have to be
+	 * cleaned up in do_exit() if the task exits prematurely:
+	 */
+	struct list_head list;
+
+	/*
+	 * The PI object:
+	 */
+	struct rt_mutex pi_mutex;
+
+	struct task_struct *owner;
+	atomic_t refcount;
+
+	union futex_key key;
+};
+
+/*
+ * We use this hashed waitqueue instead of a normal wait_queue_t, so
+ * we can wake only the relevant ones (hashed queues may be shared).
+ *
+ * A futex_q has a woken state, just like tasks have TASK_RUNNING.
+ * It is considered woken when list_empty(&q->list) || q->lock_ptr == 0.
+ * The order of wakup is always to make the first condition true, then
+ * wake up q->waiters, then make the second condition true.
+ */
+struct futex_q {
+	struct list_head list;
+	wait_queue_head_t waiters;
+
+	/* Which hash list lock to use: */
+	spinlock_t *lock_ptr;
+
+	/* Key which the futex is hashed on: */
+	union futex_key key;
+
+	/* For fd, sigio sent using these: */
+	int fd;
+	struct file *filp;
+
+	/* Optional priority inheritance state: */
+	struct futex_pi_state *pi_state;
+	struct task_struct *task;
+};
 long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout,
 	      u32 __user *uaddr2, u32 val2, u32 val3);
 
 extern int
 handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi);
 
+extern int futex_wait_queue(struct futex_q *q, u32 __user *uaddr, u32 val);
+extern int futex_wait_unqueue(struct futex_q *q);
+
 #ifdef CONFIG_FUTEX
 extern void exit_robust_list(struct task_struct *curr);
 extern void exit_pi_state_list(struct task_struct *curr);

-- 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [patch 08/12] pollfs: pollable futex
  2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
                   ` (6 preceding siblings ...)
  2007-04-01 15:58 ` [patch 07/12] pollfs: asynchronous futex wait davi
@ 2007-04-01 15:58 ` davi
  2007-04-01 15:58 ` [patch 09/12] pollfs: check if a AIO event ring is empty davi
                   ` (3 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
  To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton

[-- Attachment #1: pollfs-futex.patch --]
[-- Type: text/plain, Size: 4127 bytes --]

Asynchronously wait for FUTEX_WAKE operation on a futex if it still contains
a given value. There can be only one futex wait per file descriptor. However,
it can be rearmed (possibly at a different address) anytime.

Building block for pollable semaphores and user-defined events.

Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---

Index: linux-2.6/fs/pollfs/Makefile
===================================================================
--- linux-2.6.orig/fs/pollfs/Makefile
+++ linux-2.6/fs/pollfs/Makefile
@@ -3,3 +3,4 @@ pollfs-y := file.o
 
 pollfs-$(CONFIG_POLLFS_SIGNAL) += signal.o
 pollfs-$(CONFIG_POLLFS_TIMER) += timer.o
+pollfs-$(CONFIG_POLLFS_FUTEX) += futex.o
Index: linux-2.6/fs/pollfs/futex.c
===================================================================
--- /dev/null
+++ linux-2.6/fs/pollfs/futex.c
@@ -0,0 +1,155 @@
+/*
+ * pollable futex
+ *
+ * Copyright (C) 2007 Davi E. M. Arnaut
+ *
+ * Licensed under the GNU GPL. See the file COPYING for details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/pollfs_fs.h>
+#include <linux/futex.h>
+
+struct futex_event {
+	union {
+		void __user *addr;
+		u64 padding;
+	};
+	int val;
+};
+
+struct pfs_futex {
+	struct futex_q q;
+	struct futex_event fevt;
+	struct mutex mutex;
+	unsigned volatile queued;
+	struct pfs_file file;
+};
+
+static ssize_t read(struct pfs_futex *evs, struct futex_event __user *ufevt)
+{
+	int ret;
+	struct futex_event fevt;
+
+	mutex_lock(&evs->mutex);
+
+	fevt = evs->fevt;
+
+	ret = -EAGAIN;
+
+	if (!evs->queued)
+		ret = -EINVAL;
+	else if (list_empty(&evs->q.list))
+		ret = futex_wait_unqueue(&evs->q);
+
+	switch (ret) {
+	case 1:
+		ret = -EAGAIN;
+	case 0:
+		evs->queued = 0;
+	}
+
+	mutex_unlock(&evs->mutex);
+
+	if (ret < 0)
+		return ret;
+
+	if (copy_to_user(ufevt, &fevt, sizeof(fevt)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static ssize_t write(struct pfs_futex *evs,
+		     const struct futex_event __user *ufevt)
+{
+	int ret;
+	struct futex_event fevt;
+
+	if (copy_from_user(&fevt, ufevt, sizeof(fevt)))
+		return -EFAULT;
+
+	mutex_lock(&evs->mutex);
+
+	if (evs->queued)
+		futex_wait_unqueue(&evs->q);
+
+	ret = futex_wait_queue(&evs->q, fevt.addr, fevt.val);
+
+	if (!ret) {
+		evs->queued = 1;
+		evs->fevt = fevt;
+	} else {
+		evs->queued = 0;
+		evs->fevt.addr = NULL;
+	}
+
+	mutex_unlock(&evs->mutex);
+
+	return ret;
+}
+
+static int poll(struct pfs_futex *evs)
+{
+	int ret;
+
+	while (!mutex_trylock(&evs->mutex))
+		cpu_relax();
+
+	ret = evs->queued && list_empty(&evs->q.list) ? POLLIN : 0;
+
+	mutex_unlock(&evs->mutex);
+
+	return ret;
+}
+
+static int release(struct pfs_futex *evs)
+{
+	if (evs->queued)
+		futex_wait_unqueue(&evs->q);
+
+	mutex_destroy(&evs->mutex);
+
+	kfree(evs);
+
+	return 0;
+}
+
+static const struct pfs_operations futex_ops = {
+	.read = PFS_READ(read, struct pfs_futex, struct futex_event),
+	.write = PFS_WRITE(write, struct pfs_futex, struct futex_event),
+	.poll = PFS_POLL(poll, struct pfs_futex),
+	.release = PFS_RELEASE(release, struct pfs_futex),
+	.rsize = sizeof(struct futex_event),
+	.wsize = sizeof(struct futex_event),
+};
+
+asmlinkage long sys_plfutex(void)
+{
+	long error;
+	struct pfs_futex *evs;
+
+	evs = kzalloc(sizeof(*evs), GFP_KERNEL);
+	if (!evs)
+		return -ENOMEM;
+
+	mutex_init(&evs->mutex);
+	init_waitqueue_head(&evs->q.waiters);
+
+	evs->file.data = evs;
+	evs->file.fops = &futex_ops;
+	evs->file.wait = &evs->q.waiters;
+
+	error = pfs_open(&evs->file);
+
+	if (error < 0)
+		release(evs);
+
+	return error;
+}
Index: linux-2.6/init/Kconfig
===================================================================
--- linux-2.6.orig/init/Kconfig
+++ linux-2.6/init/Kconfig
@@ -483,6 +483,13 @@ config POLLFS_TIMER
 	help
 	 Pollable timer support
 
+config POLLFS_FUTEX
+	bool "Enable pollfs futex" if EMBEDDED
+	default y
+	depends on POLLFS && FUTEX
+	help
+	 Pollable futex support
+
 config SHMEM
 	bool "Use full shmem filesystem" if EMBEDDED
 	default y

-- 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [patch 09/12] pollfs: check if a AIO event ring is empty
  2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
                   ` (7 preceding siblings ...)
  2007-04-01 15:58 ` [patch 08/12] pollfs: pollable futex davi
@ 2007-04-01 15:58 ` davi
  2007-04-01 15:58 ` [patch 10/12] pollfs: pollable aio davi
                   ` (2 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
  To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton

[-- Attachment #1: pollfs-aio-ring-empty.patch --]
[-- Type: text/plain, Size: 1493 bytes --]

The aio_ring_empty() function returns true if the AIO event ring has no
elements, false otherwise.

Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---

Index: linux-2.6/fs/aio.c
===================================================================
--- linux-2.6.orig/fs/aio.c
+++ linux-2.6/fs/aio.c
@@ -1004,6 +1004,23 @@ put_rq:
 	return ret;
 }
 
+int fastcall aio_ring_empty(struct kioctx *ioctx)
+{
+	struct aio_ring_info *info = &ioctx->ring_info;
+	struct aio_ring *ring;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&ioctx->ctx_lock, flags);
+	ring = kmap_atomic(info->ring_pages[0], KM_IRQ1);
+	if (ring->head == ring->tail)
+		ret = 1;
+	kunmap_atomic(ring, KM_IRQ1);
+	spin_unlock_irqrestore(&ioctx->ctx_lock, flags);
+
+	return ret;
+}
+
 /* aio_read_evt
  *	Pull an event off of the ioctx's event ring.  Returns the number of 
  *	events fetched (0 or 1 ;-)
Index: linux-2.6/include/linux/aio.h
===================================================================
--- linux-2.6.orig/include/linux/aio.h
+++ linux-2.6/include/linux/aio.h
@@ -202,6 +202,7 @@ extern unsigned aio_max_size;
 
 extern ssize_t FASTCALL(wait_on_sync_kiocb(struct kiocb *iocb));
 extern int FASTCALL(aio_put_req(struct kiocb *iocb));
+extern int FASTCALL(aio_ring_empty(struct kioctx *ioctx));
 extern void FASTCALL(kick_iocb(struct kiocb *iocb));
 extern int FASTCALL(aio_complete(struct kiocb *iocb, long res, long res2));
 extern void FASTCALL(__put_ioctx(struct kioctx *ctx));

-- 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [patch 10/12] pollfs: pollable aio
  2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
                   ` (8 preceding siblings ...)
  2007-04-01 15:58 ` [patch 09/12] pollfs: check if a AIO event ring is empty davi
@ 2007-04-01 15:58 ` davi
  2007-04-01 15:58 ` [patch 11/12] pollfs: asynchronous workqueue davi
  2007-04-01 15:58 ` [patch 12/12] pollfs: pollable fsync davi
  11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
  To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton

[-- Attachment #1: pollfs-aio.patch --]
[-- Type: text/plain, Size: 3103 bytes --]

Submit, retrieve, or poll aio requests for completion through a
file descriptor. Untested.

Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---

Index: linux-2.6/fs/pollfs/Makefile
===================================================================
--- linux-2.6.orig/fs/pollfs/Makefile
+++ linux-2.6/fs/pollfs/Makefile
@@ -4,3 +4,4 @@ pollfs-y := file.o
 pollfs-$(CONFIG_POLLFS_SIGNAL) += signal.o
 pollfs-$(CONFIG_POLLFS_TIMER) += timer.o
 pollfs-$(CONFIG_POLLFS_FUTEX) += futex.o
+pollfs-$(CONFIG_POLLFS_AIO) += aio.o
Index: linux-2.6/fs/pollfs/aio.c
===================================================================
--- /dev/null
+++ linux-2.6/fs/pollfs/aio.c
@@ -0,0 +1,103 @@
+/*
+ * pollable aio
+ *
+ * Copyright (C) 2007 Davi E. M. Arnaut
+ *
+ * Licensed under the GNU GPL. See the file COPYING for details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/pollfs_fs.h>
+#include <linux/aio.h>
+#include <linux/syscalls.h>
+
+struct pfs_aio {
+	struct kioctx *ioctx;
+	struct pfs_file file;
+};
+
+static ssize_t read(struct pfs_aio *evs, struct io_event __user *uioevt)
+{
+	int ret;
+
+	ret = sys_io_getevents(evs->ioctx->user_id, 0, 1, uioevt, NULL);
+
+	if (!ret)
+		ret = -EAGAIN;
+	else if (ret > 0)
+		ret = 0;
+
+	return ret;
+}
+
+static ssize_t write(struct pfs_aio *evs, const struct iocb __user *uiocb)
+{
+	struct iocb iocb;
+
+	if (copy_from_user(&iocb, uiocb, sizeof(iocb)))
+		return -EFAULT;
+
+	return io_submit_one(evs->ioctx, uiocb, &iocb);
+}
+
+static int poll(struct pfs_aio *evs)
+{
+	int ret;
+
+	ret = aio_ring_empty(evs->ioctx) ? 0 : POLLIN;
+
+	return ret;
+}
+
+static int release(struct pfs_aio *evs)
+{
+	put_ioctx(evs->ioctx);
+
+	kfree(evs);
+
+	return 0;
+}
+
+static const struct pfs_operations aio_ops = {
+	.read = PFS_READ(read, struct pfs_aio, struct io_event),
+	.write = PFS_WRITE(write, struct pfs_aio, struct iocb),
+	.poll = PFS_POLL(poll, struct pfs_aio),
+	.release = PFS_RELEASE(release, struct pfs_aio),
+	.rsize = sizeof(struct io_event),
+	.wsize = sizeof(struct iocb),
+};
+
+asmlinkage long sys_plaio(aio_context_t ctx)
+{
+	long error;
+	struct pfs_aio *evs;
+	struct kioctx *ioctx = lookup_ioctx(ctx);
+
+	if (!ioctx)
+		return -EINVAL;
+
+	evs = kzalloc(sizeof(*evs), GFP_KERNEL);
+	if (!evs) {
+		put_ioctx(ioctx);
+		return -ENOMEM;
+	}
+
+	evs->ioctx = ioctx;
+
+	evs->file.data = evs;
+	evs->file.fops = &aio_ops;
+	evs->file.wait = &ioctx->wait;
+
+	error = pfs_open(&evs->file);
+
+	if (error < 0)
+		release(evs);
+
+	return error;
+}
Index: linux-2.6/init/Kconfig
===================================================================
--- linux-2.6.orig/init/Kconfig
+++ linux-2.6/init/Kconfig
@@ -490,6 +490,13 @@ config POLLFS_FUTEX
 	help
 	 Pollable futex support
 
+config POLLFS_AIO
+	bool "Enable pollfs aio" if EMBEDDED
+	default y
+	depends on POLLFS
+	help
+	 Pollable aio support
+
 config SHMEM
 	bool "Use full shmem filesystem" if EMBEDDED
 	default y

-- 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [patch 11/12] pollfs: asynchronous workqueue
  2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
                   ` (9 preceding siblings ...)
  2007-04-01 15:58 ` [patch 10/12] pollfs: pollable aio davi
@ 2007-04-01 15:58 ` davi
  2007-04-01 15:58 ` [patch 12/12] pollfs: pollable fsync davi
  11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
  To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton

[-- Attachment #1: pollfs-async-workqueue.patch --]
[-- Type: text/plain, Size: 8960 bytes --]

Asynchronously run work items.

If the worker thread blocks while the kernel executes the work function
call a new worker thread is created (if one is not available) to handle
the remaining workqueue items.

Various errors and resource limitations are not yet handled.

Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---

Index: linux-2.6/include/linux/workqueue.h
===================================================================
--- linux-2.6.orig/include/linux/workqueue.h
+++ linux-2.6/include/linux/workqueue.h
@@ -25,7 +25,8 @@ struct work_struct {
 	atomic_long_t data;
 #define WORK_STRUCT_PENDING 0		/* T if work item pending execution */
 #define WORK_STRUCT_NOAUTOREL 1		/* F if work item automatically released on exec */
-#define WORK_STRUCT_FLAG_MASK (3UL)
+#define WORK_STRUCT_ASYNC 2		/* T if work item can be executed asynchronously */
+#define WORK_STRUCT_FLAG_MASK (7UL)
 #define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
 	struct list_head entry;
 	work_func_t func;
@@ -171,6 +172,7 @@ extern int FASTCALL(queue_work(struct wo
 extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay));
 extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 	struct delayed_work *work, unsigned long delay);
+extern int FASTCALL(queue_async_work(struct workqueue_struct *wq, struct work_struct *work));
 extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq));
 
 extern int FASTCALL(schedule_work(struct work_struct *work));
Index: linux-2.6/kernel/workqueue.c
===================================================================
--- linux-2.6.orig/kernel/workqueue.c
+++ linux-2.6/kernel/workqueue.c
@@ -14,6 +14,7 @@
  *   Theodore Ts'o <tytso@mit.edu>
  *
  * Made to use alloc_percpu by Christoph Lameter <clameter@sgi.com>.
+ * Asynchronous workqueue by Davi E. M. Arnaut <davi.arnaut@gmail.com>
  */
 
 #include <linux/module.h>
@@ -60,6 +61,8 @@ struct cpu_workqueue_struct {
 	int run_depth;		/* Detect run_workqueue() recursion depth */
 
 	int freezeable;		/* Freeze the thread during suspend */
+
+	struct list_head threadlist;
 } ____cacheline_aligned;
 
 /*
@@ -297,9 +300,27 @@ int queue_delayed_work_on(int cpu, struc
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
 
+/**
+ * queue_async_work - queue an asynchronous work on a workqueue
+ * @wq: workqueue to use
+ * @work: work to queue
+ *
+ * Returns 0 if @work was already on a queue, non-zero otherwise.
+ *
+ * We queue the work to the CPU it was submitted, but there is no
+ * guarantee that it will be processed by that CPU.
+ */
+int fastcall queue_async_work(struct workqueue_struct *wq, struct work_struct *work)
+{
+	set_bit(WORK_STRUCT_ASYNC, work_data_bits(work));
+
+	return queue_work(wq, work);
+}
+EXPORT_SYMBOL_GPL(queue_async_work);
+
 static void run_workqueue(struct cpu_workqueue_struct *cwq)
 {
-	unsigned long flags;
+	unsigned long flags, async;
 
 	/*
 	 * Keep taking off work from the queue until
@@ -324,8 +345,18 @@ static void run_workqueue(struct cpu_wor
 		BUG_ON(get_wq_data(work) != cwq);
 		if (!test_bit(WORK_STRUCT_NOAUTOREL, work_data_bits(work)))
 			work_release(work);
+
+		async = test_bit(WORK_STRUCT_ASYNC, work_data_bits(work));
+		if (unlikely(async))
+			current->cwq = cwq;
+
 		f(work);
 
+		if (current->cwq)
+			current->cwq = NULL;
+		else if (async)
+			async++;
+
 		if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
 			printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
 					"%s/0x%08x/%d\n",
@@ -340,6 +371,17 @@ static void run_workqueue(struct cpu_wor
 		spin_lock_irqsave(&cwq->lock, flags);
 		cwq->remove_sequence++;
 		wake_up(&cwq->work_done);
+
+		if (async > 1) {
+			if (cwq->thread) {
+				list_add_tail(&current->cwq_entry, &cwq->threadlist);
+				spin_unlock_irqrestore(&cwq->lock, flags);
+				schedule();
+				spin_lock_irqsave(&cwq->lock, flags);
+			}
+			else
+				cwq->thread = current;
+		}
 	}
 	cwq->run_depth--;
 	spin_unlock_irqrestore(&cwq->lock, flags);
@@ -467,6 +509,7 @@ static struct task_struct *create_workqu
 	cwq->remove_sequence = 0;
 	cwq->freezeable = freezeable;
 	INIT_LIST_HEAD(&cwq->worklist);
+	INIT_LIST_HEAD(&cwq->threadlist);
 	init_waitqueue_head(&cwq->more_work);
 	init_waitqueue_head(&cwq->work_done);
 
@@ -534,15 +577,19 @@ static void cleanup_workqueue_thread(str
 {
 	struct cpu_workqueue_struct *cwq;
 	unsigned long flags;
-	struct task_struct *p;
+	struct task_struct *p, *tmp;
+	LIST_HEAD(threadlist);
 
 	cwq = per_cpu_ptr(wq->cpu_wq, cpu);
 	spin_lock_irqsave(&cwq->lock, flags);
 	p = cwq->thread;
 	cwq->thread = NULL;
+	list_splice_init(&cwq->threadlist, &threadlist);
 	spin_unlock_irqrestore(&cwq->lock, flags);
 	if (p)
 		kthread_stop(p);
+	list_for_each_entry_safe(p, tmp, &threadlist, cwq_entry)
+		kthread_stop(p);
 }
 
 /**
@@ -811,6 +858,68 @@ static int __devinit workqueue_cpu_callb
 	return NOTIFY_OK;
 }
 
+static void create_cpu_worker(struct cpu_workqueue_struct *cwq)
+{
+	unsigned long flags;
+	struct task_struct *p;
+	struct workqueue_struct *wq = cwq->wq;
+	int cpu = first_cpu(current->cpus_allowed);
+
+	mutex_lock(&workqueue_mutex);
+	if (is_single_threaded(wq))
+		p = kthread_create(worker_thread, cwq, "%s", wq->name);
+	else
+		p = kthread_create(worker_thread, cwq, "%s/%d", wq->name, cpu);
+
+	if (IS_ERR(p))
+		/* oh well, there isn't much we can do anyway. */
+		goto unlock;
+
+	kthread_bind(p, cpu);
+
+	spin_lock_irqsave(&cwq->lock, flags);
+	if (!cwq->thread)
+		wake_up_process(p);
+	else
+		list_add_tail(&p->cwq_entry, &cwq->threadlist);
+	spin_unlock_irqrestore(&cwq->lock, flags);
+
+unlock:
+	mutex_unlock(&workqueue_mutex);
+}
+
+static inline void wake_up_cpu_worker(struct cpu_workqueue_struct *cwq)
+{
+	struct task_struct *worker = list_entry(cwq->threadlist.next,
+						struct task_struct, cwq_entry);
+
+	list_del_init(cwq->threadlist.next);
+
+	cwq->thread = worker;
+
+	wake_up_process(worker);
+}
+
+void schedule_workqueue(struct task_struct *task)
+{
+	struct cpu_workqueue_struct *cwq = task->cwq;
+	unsigned long flags;
+
+	task->cwq = NULL;
+
+	spin_lock_irqsave(&cwq->lock, flags);
+	if (cwq->thread == task) {
+		if (!list_empty(&cwq->threadlist))
+			wake_up_cpu_worker(cwq);
+		else
+			task = cwq->thread = NULL;
+	}
+	spin_unlock_irqrestore(&cwq->lock, flags);
+
+	if (!task)
+		create_cpu_worker(cwq);
+}
+
 void init_workqueues(void)
 {
 	singlethread_cpu = first_cpu(cpu_possible_map);
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -843,6 +843,9 @@ struct task_struct {
 
 	struct mm_struct *mm, *active_mm;
 
+	/* (asynchronous) cpu workqueue */
+	void *cwq;
+	struct list_head cwq_entry;
 /* task state */
 	struct linux_binfmt *binfmt;
 	long exit_state;
@@ -1409,6 +1412,7 @@ extern int disallow_signal(int);
 extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
 extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
 struct task_struct *fork_idle(int);
+extern void schedule_workqueue(struct task_struct *);
 
 extern void set_task_comm(struct task_struct *tsk, char *from);
 extern void get_task_comm(char *to, struct task_struct *tsk);
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -3305,6 +3305,12 @@ asmlinkage void __sched schedule(void)
 	}
 	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
 
+	/* asynchronous queue worker */
+	if (unlikely(current->cwq))
+		/* only if it's a voluntary sleep */
+		if (!(preempt_count() & PREEMPT_ACTIVE) && current->state != TASK_RUNNING)
+			schedule_workqueue(current);
+
 need_resched:
 	preempt_disable();
 	prev = current;
Index: linux-2.6/include/linux/init_task.h
===================================================================
--- linux-2.6.orig/include/linux/init_task.h
+++ linux-2.6/include/linux/init_task.h
@@ -112,6 +112,7 @@ extern struct group_info init_groups;
 	.tasks		= LIST_HEAD_INIT(tsk.tasks),			\
 	.ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children),		\
 	.ptrace_list	= LIST_HEAD_INIT(tsk.ptrace_list),		\
+	.cwq_entry	= LIST_HEAD_INIT(tsk.cwq_entry),		\
 	.real_parent	= &tsk,						\
 	.parent		= &tsk,						\
 	.children	= LIST_HEAD_INIT(tsk.children),			\
Index: linux-2.6/kernel/fork.c
===================================================================
--- linux-2.6.orig/kernel/fork.c
+++ linux-2.6/kernel/fork.c
@@ -1173,6 +1173,7 @@ static struct task_struct *copy_process(
 	INIT_LIST_HEAD(&p->thread_group);
 	INIT_LIST_HEAD(&p->ptrace_children);
 	INIT_LIST_HEAD(&p->ptrace_list);
+	INIT_LIST_HEAD(&p->cwq_entry);
 
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p, clone_flags);

-- 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [patch 12/12] pollfs: pollable fsync
  2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
                   ` (10 preceding siblings ...)
  2007-04-01 15:58 ` [patch 11/12] pollfs: asynchronous workqueue davi
@ 2007-04-01 15:58 ` davi
  11 siblings, 0 replies; 13+ messages in thread
From: davi @ 2007-04-01 15:58 UTC (permalink / raw)
  To: Linux Kernel Mailing List; +Cc: Davide Libenzi, Linus Torvalds, Andrew Morton

[-- Attachment #1: pollfs-sync-file.patch --]
[-- Type: text/plain, Size: 4473 bytes --]

Pollable asynchronous fsync() using a global workqueue. Maybe a sync_file_range
in the future.

Signed-off-by: Davi E. M. Arnaut <davi@haxent.com.br>
---

Index: linux-2.6/fs/pollfs/Makefile
===================================================================
--- linux-2.6.orig/fs/pollfs/Makefile
+++ linux-2.6/fs/pollfs/Makefile
@@ -5,3 +5,4 @@ pollfs-$(CONFIG_POLLFS_SIGNAL) += signal
 pollfs-$(CONFIG_POLLFS_TIMER) += timer.o
 pollfs-$(CONFIG_POLLFS_FUTEX) += futex.o
 pollfs-$(CONFIG_POLLFS_AIO) += aio.o
+pollfs-$(CONFIG_POLLFS_SYNC) += sync.o
Index: linux-2.6/fs/pollfs/sync.c
===================================================================
--- /dev/null
+++ linux-2.6/fs/pollfs/sync.c
@@ -0,0 +1,173 @@
+/*
+ * pollable fsync
+ *
+ * Copyright (C) 2007 Davi E. M. Arnaut
+ *
+ * Licensed under the GNU GPL. See the file COPYING for details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/pollfs_fs.h>
+#include <linux/workqueue.h>
+#include <linux/file.h>
+
+struct sync_file {
+	int fd;
+	int datasync;
+	long result;
+};
+
+struct pfs_sync {
+	struct file *filp;
+	struct sync_file sync;
+	struct work_struct work;
+	struct mutex mutex;
+	enum {
+		WORK_REST,
+		WORK_BUSY,
+		WORK_DONE,
+	} status;
+	wait_queue_head_t wait;
+	struct pfs_file file;
+};
+
+static struct workqueue_struct *sync_wq;
+
+static void sync_file_work(struct work_struct *work)
+{
+	struct pfs_sync *evs = container_of(work, struct pfs_sync, work);
+
+	evs->sync.result = do_fsync(evs->filp, evs->sync.datasync);
+
+	fput(evs->filp);
+	evs->status = WORK_DONE;
+
+	wake_up_all(&evs->wait);
+}
+
+static ssize_t read(struct pfs_sync *evs, struct sync_file __user *usync)
+{
+	int ret = 0;
+	struct sync_file sync = {};
+
+	mutex_lock(&evs->mutex);
+	switch (evs->status) {
+	case WORK_REST:
+		ret = -EINVAL; break;
+	case WORK_BUSY:
+		ret = -EAGAIN; break;
+	case WORK_DONE:
+		evs->status = WORK_REST;
+		sync = evs->sync;
+		break;
+	}
+	mutex_unlock(&evs->mutex);
+
+	if (ret)
+		return ret;
+
+	if (copy_to_user(usync, &sync, sizeof(sync)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static ssize_t write(struct pfs_sync *evs, const struct sync_file __user *usync)
+{
+	int ret = 0;
+	struct file *filp;
+	struct sync_file sync;
+
+	if (copy_from_user(&sync, usync, sizeof(sync)))
+		return -EFAULT;
+
+	filp = fget(sync.fd);
+	if (!filp)
+		return -EINVAL;
+
+	mutex_lock(&evs->mutex);
+	if (evs->status != WORK_REST)
+		ret = -EAGAIN;
+	else {
+		evs->filp = filp;
+		evs->status = WORK_BUSY;
+		queue_async_work(sync_wq, &evs->work);
+	}
+	mutex_unlock(&evs->mutex);
+
+	return ret;
+}
+
+static int poll(struct pfs_sync *evs)
+{
+	int ret = 0;
+
+	if (evs->status == WORK_DONE)
+		ret = POLLIN;
+	else if (evs->status == WORK_REST)
+		ret = POLLOUT;
+
+	return ret;
+}
+
+static int release(struct pfs_sync *evs)
+{
+	wait_event(evs->wait, evs->status != WORK_BUSY);
+
+	kfree(evs);
+
+	return 0;
+}
+
+static const struct pfs_operations sync_ops = {
+	.read = PFS_READ(read, struct pfs_sync, struct sync_file),
+	.write = PFS_WRITE(write, struct pfs_sync, struct sync_file),
+	.poll = PFS_POLL(poll, struct pfs_sync),
+	.release = PFS_RELEASE(release, struct pfs_sync),
+	.rsize = sizeof(struct sync_file),
+	.wsize = sizeof(struct sync_file),
+};
+
+asmlinkage long sys_plsync(void)
+{
+	long error;
+	struct pfs_sync *evs;
+
+	if (!sync_wq)
+		return -ENOSYS;
+
+	evs = kzalloc(sizeof(*evs), GFP_KERNEL);
+	if (!evs)
+		return -ENOMEM;
+
+	evs->status = WORK_REST;
+	mutex_init(&evs->mutex);
+	init_waitqueue_head(&evs->wait);
+	INIT_WORK(&evs->work, sync_file_work);
+
+	evs->file.data = evs;
+	evs->file.fops = &sync_ops;
+	evs->file.wait = &evs->wait;
+
+	error = pfs_open(&evs->file);
+
+	if (error < 0)
+		release(evs);
+
+	return error;
+}
+
+static int __init init(void)
+{
+	sync_wq = create_workqueue("syncd");
+	WARN_ON(!sync_wq);
+	return 0;
+}
+
+__initcall(init);
Index: linux-2.6/init/Kconfig
===================================================================
--- linux-2.6.orig/init/Kconfig
+++ linux-2.6/init/Kconfig
@@ -497,6 +497,13 @@ config POLLFS_AIO
 	help
 	 Pollable aio support
 
+config POLLFS_SYNC
+	bool "Enable pollfs file sync" if EMBEDDED
+	default y
+	depends on POLLFS
+	help
+	 Pollable file sync support
+
 config SHMEM
 	bool "Use full shmem filesystem" if EMBEDDED
 	default y

-- 

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2007-04-01 16:06 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-04-01 15:58 [patch 00/12] pollfs: a naive filesystem for pollable objects davi
2007-04-01 15:58 ` [patch 01/12] pollfs: kernel-side API header davi
2007-04-01 15:58 ` [patch 02/12] pollfs: file system operations davi
2007-04-01 15:58 ` [patch 03/12] pollfs: asynchronously wait for a signal davi
2007-04-01 15:58 ` [patch 04/12] pollfs: pollable signal davi
2007-04-01 15:58 ` [patch 05/12] pollfs: pollable signal compat code davi
2007-04-01 15:58 ` [patch 06/12] pollfs: pollable hrtimers davi
2007-04-01 15:58 ` [patch 07/12] pollfs: asynchronous futex wait davi
2007-04-01 15:58 ` [patch 08/12] pollfs: pollable futex davi
2007-04-01 15:58 ` [patch 09/12] pollfs: check if a AIO event ring is empty davi
2007-04-01 15:58 ` [patch 10/12] pollfs: pollable aio davi
2007-04-01 15:58 ` [patch 11/12] pollfs: asynchronous workqueue davi
2007-04-01 15:58 ` [patch 12/12] pollfs: pollable fsync davi

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.