Make task directories in /proc pollable
diff mbox series

Message ID 1840e47fc4113af16989a4250d98bed62a9bce53.1354559528.git.luto@amacapital.net
State New, archived
Headers show
Series
  • Make task directories in /proc pollable
Related show

Commit Message

Andy Lutomirski Dec. 3, 2012, 6:32 p.m. UTC
This has been a requested feature for a long time [1][2].

/proc/<pid> and /proc/<tgid>/task/<pid> will show POLLIN | POLLRDNORM
when the <pid> is dead and POLLIN | POLLRDNORM | POLLERR when <pid> is
dead and reaped.

The ability to tell whether the task exists given an fd isn't new -- readdir
can do it.  The ability to distinguish live and zombie tasks by fd may
have minor security implications.

It's conceivable, although unlikely, that some existing software expects
directories in /proc to always have POLLIN set.  The benefit of using POLLIN
instead of something like POLLPRI is feature detection -- checking /proc/self
for POLLIN will reliably detect this feature.

[1] http://lwn.net/Articles/462177/
[2] http://0pointer.de/blog/projects/plumbers-wishlist-3.html

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
---
 fs/proc/base.c            | 26 ++++++++++++++++++++++++++
 include/linux/init_task.h |  2 ++
 include/linux/sched.h     |  3 +++
 kernel/exit.c             |  3 +++
 kernel/fork.c             |  4 ++++
 5 files changed, 38 insertions(+)

Patch
diff mbox series

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3c231ad..ebab7ec 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2737,9 +2737,34 @@  static int proc_tgid_base_readdir(struct file * filp,
 				   tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
 }
 
+static unsigned int proc_task_base_poll(struct file *filp,
+					struct poll_table_struct *wait)
+{
+	struct task_struct *task;
+	int retval;
+
+	task = get_proc_task(filp->f_path.dentry->d_inode);
+	if (!task)
+		return POLLIN | POLLRDNORM | POLLERR;
+
+	read_lock(&tasklist_lock);
+	poll_wait(filp, &task->detach_wqh, wait);
+	if (task_is_dead(task)) {
+		retval = POLLIN | POLLRDNORM;
+	} else {
+		retval = 0;
+		poll_wait(filp, &task->exit_wqh, wait);
+	}
+	read_unlock(&tasklist_lock);
+
+	put_task_struct(task);
+	return retval;
+}
+
 static const struct file_operations proc_tgid_base_operations = {
 	.read		= generic_read_dir,
 	.readdir	= proc_tgid_base_readdir,
+	.poll		= proc_task_base_poll,
 	.llseek		= default_llseek,
 };
 
@@ -3110,6 +3135,7 @@  static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *den
 static const struct file_operations proc_tid_base_operations = {
 	.read		= generic_read_dir,
 	.readdir	= proc_tid_base_readdir,
+	.poll		= proc_task_base_poll,
 	.llseek		= default_llseek,
 };
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 6d087c5..093379e 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -202,6 +202,8 @@  extern struct task_group root_task_group;
 		[PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),		\
 	},								\
 	.thread_group	= LIST_HEAD_INIT(tsk.thread_group),		\
+	.exit_wqh	= __WAIT_QUEUE_HEAD_INITIALIZER(tsk.exit_wqh),	\
+	.detach_wqh	= __WAIT_QUEUE_HEAD_INITIALIZER(tsk.detach_wqh),\
 	INIT_IDS							\
 	INIT_PERF_EVENTS(tsk)						\
 	INIT_TRACE_IRQFLAGS						\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0dd42a0..6034a37 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1548,6 +1548,9 @@  struct task_struct {
 #ifdef CONFIG_UPROBES
 	struct uprobe_task *utask;
 #endif
+
+	/* These are woken with tasklist_lock held. */
+	wait_queue_head_t exit_wqh, detach_wqh;
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
diff --git a/kernel/exit.c b/kernel/exit.c
index 346616c..01c584b 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -212,6 +212,7 @@  repeat:
 			leader->exit_state = EXIT_DEAD;
 	}
 
+	wake_up_all(&p->detach_wqh);
 	write_unlock_irq(&tasklist_lock);
 	release_thread(p);
 	call_rcu(&p->rcu, delayed_put_task_struct);
@@ -775,6 +776,8 @@  static void exit_notify(struct task_struct *tsk, int group_dead)
 	/* mt-exec, de_thread() is waiting for group leader */
 	if (unlikely(tsk->signal->notify_count < 0))
 		wake_up_process(tsk->signal->group_exit_task);
+
+	wake_up_all(&tsk->exit_wqh);
 	write_unlock_irq(&tasklist_lock);
 
 	/* If the process is dead, release it - nobody will wait for it */
diff --git a/kernel/fork.c b/kernel/fork.c
index 8b20ab7..356b32c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -235,6 +235,8 @@  void __put_task_struct(struct task_struct *tsk)
 	WARN_ON(!tsk->exit_state);
 	WARN_ON(atomic_read(&tsk->usage));
 	WARN_ON(tsk == current);
+	WARN_ON(waitqueue_active(&tsk->exit_wqh));
+	WARN_ON(waitqueue_active(&tsk->detach_wqh));
 
 	security_task_free(tsk);
 	exit_creds(tsk);
@@ -1285,6 +1287,8 @@  static struct task_struct *copy_process(unsigned long clone_flags,
 	p->memcg_batch.do_batch = 0;
 	p->memcg_batch.memcg = NULL;
 #endif
+	init_waitqueue_head(&p->exit_wqh);
+	init_waitqueue_head(&p->detach_wqh);
 
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p);