I posted this a few weeks back, took another look at it and refined it a bit. I'd like some input on the viability of this approach. A new signalfd setup flag is added, SFD_TASK. This is only valid if used with SFD_CLOEXEC. If set, the task setting up the signalfd descriptor is remembered in the signalfd context, and will be the one we use for checking signals in the poll/read handlers in signalfd. This is needed to make signalfd useful with io_uring and aio, of which the former in particular has my interest. I _think_ this is sane. To prevent the case of a task clearing O_CLOEXEC on the signalfd descriptor, forking, and then exiting, we grab a reference to the task when we assign it. If that original task exits, we catch it in signalfd_flush() and ensure waiters are woken up. The waiters also hold a task reference, so we don't have to wait for them to go away. Need to double check we can't race between original task exiting and new task grabbing a reference. I don't think this is solid in the version below. Probably need to add a refcount for ctx->task (the pointer, not the task) for that. Comments? Attaching two test programs using io_uring, one using poll and the other read. Remove SFD_TASK from either of them, and they will fail ala: ./signalfd-read Timed out waiting for cqe and with SFD_TASK set, both will exit silent with a value of 0. You need liburing installed, then compile them with: gcc -Wall -O2 -o signalfd-read signalfd-read.c -luring --- diff --git a/fs/signalfd.c b/fs/signalfd.c index 44b6845b071c..4bbdab9438c1 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -50,28 +50,62 @@ void signalfd_cleanup(struct sighand_struct *sighand) struct signalfd_ctx { sigset_t sigmask; + struct task_struct *task; }; +static int signalfd_flush(struct file *file, void *data) +{ + struct signalfd_ctx *ctx = file->private_data; + struct task_struct *tsk = ctx->task; + + if (tsk == current) { + ctx->task = NULL; + wake_up(&tsk->sighand->signalfd_wqh); + put_task_struct(tsk); + } + + return 0; +} + static int signalfd_release(struct inode *inode, struct file *file) { - kfree(file->private_data); + struct signalfd_ctx *ctx = file->private_data; + + if (ctx->task) + put_task_struct(ctx->task); + kfree(ctx); return 0; } +static void signalfd_put_task(struct task_struct *tsk) +{ + put_task_struct(tsk); +} + +static struct task_struct *signalfd_get_task(struct signalfd_ctx *ctx) +{ + struct task_struct *tsk = ctx->task ?: current; + + get_task_struct(tsk); + return tsk; +} + static __poll_t signalfd_poll(struct file *file, poll_table *wait) { struct signalfd_ctx *ctx = file->private_data; + struct task_struct *tsk = signalfd_get_task(ctx); __poll_t events = 0; - poll_wait(file, ¤t->sighand->signalfd_wqh, wait); + poll_wait(file, &tsk->sighand->signalfd_wqh, wait); - spin_lock_irq(¤t->sighand->siglock); - if (next_signal(¤t->pending, &ctx->sigmask) || - next_signal(¤t->signal->shared_pending, + spin_lock_irq(&tsk->sighand->siglock); + if (next_signal(&tsk->pending, &ctx->sigmask) || + next_signal(&tsk->signal->shared_pending, &ctx->sigmask)) events |= EPOLLIN; - spin_unlock_irq(¤t->sighand->siglock); + spin_unlock_irq(&tsk->sighand->siglock); + signalfd_put_task(tsk); return events; } @@ -167,10 +201,11 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info int nonblock) { ssize_t ret; + struct task_struct *tsk = signalfd_get_task(ctx); DECLARE_WAITQUEUE(wait, current); - spin_lock_irq(¤t->sighand->siglock); - ret = dequeue_signal(current, &ctx->sigmask, info); + spin_lock_irq(&tsk->sighand->siglock); + ret = dequeue_signal(tsk, &ctx->sigmask, info); switch (ret) { case 0: if (!nonblock) @@ -178,29 +213,35 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info ret = -EAGAIN; /* fall through */ default: - spin_unlock_irq(¤t->sighand->siglock); + spin_unlock_irq(&tsk->sighand->siglock); + signalfd_put_task(tsk); return ret; } - add_wait_queue(¤t->sighand->signalfd_wqh, &wait); + add_wait_queue(&tsk->sighand->signalfd_wqh, &wait); for (;;) { set_current_state(TASK_INTERRUPTIBLE); - ret = dequeue_signal(current, &ctx->sigmask, info); + ret = dequeue_signal(tsk, &ctx->sigmask, info); if (ret != 0) break; if (signal_pending(current)) { ret = -ERESTARTSYS; break; } - spin_unlock_irq(¤t->sighand->siglock); + spin_unlock_irq(&tsk->sighand->siglock); schedule(); - spin_lock_irq(¤t->sighand->siglock); + spin_lock_irq(&tsk->sighand->siglock); + if (tsk != current && !ctx->task) { + ret = -ESRCH; + break; + } } - spin_unlock_irq(¤t->sighand->siglock); + spin_unlock_irq(&tsk->sighand->siglock); - remove_wait_queue(¤t->sighand->signalfd_wqh, &wait); + remove_wait_queue(&tsk->sighand->signalfd_wqh, &wait); __set_current_state(TASK_RUNNING); + signalfd_put_task(tsk); return ret; } @@ -254,6 +295,7 @@ static const struct file_operations signalfd_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = signalfd_show_fdinfo, #endif + .flush = signalfd_flush, .release = signalfd_release, .poll = signalfd_poll, .read = signalfd_read, @@ -267,19 +309,26 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags) /* Check the SFD_* constants for consistency. */ BUILD_BUG_ON(SFD_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(SFD_NONBLOCK != O_NONBLOCK); + BUILD_BUG_ON(SFD_TASK & (SFD_CLOEXEC | SFD_NONBLOCK)); - if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK)) + if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK | SFD_TASK)) + return -EINVAL; + if ((flags & (SFD_CLOEXEC | SFD_TASK)) == SFD_TASK) return -EINVAL; sigdelsetmask(mask, sigmask(SIGKILL) | sigmask(SIGSTOP)); signotset(mask); if (ufd == -1) { - ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->sigmask = *mask; + if (flags & SFD_TASK) { + ctx->task = current; + get_task_struct(ctx->task); + } /* * When we call this, the initialization must be complete, since @@ -290,6 +339,7 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags) if (ufd < 0) kfree(ctx); } else { + struct task_struct *tsk; struct fd f = fdget(ufd); if (!f.file) return -EBADF; @@ -298,11 +348,13 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags) fdput(f); return -EINVAL; } - spin_lock_irq(¤t->sighand->siglock); + tsk = signalfd_get_task(ctx); + spin_lock_irq(&tsk->sighand->siglock); ctx->sigmask = *mask; - spin_unlock_irq(¤t->sighand->siglock); + spin_unlock_irq(&tsk->sighand->siglock); - wake_up(¤t->sighand->signalfd_wqh); + wake_up(&tsk->sighand->signalfd_wqh); + signalfd_put_task(tsk); fdput(f); } diff --git a/include/uapi/linux/signalfd.h b/include/uapi/linux/signalfd.h index 83429a05b698..064c5dc3eb99 100644 --- a/include/uapi/linux/signalfd.h +++ b/include/uapi/linux/signalfd.h @@ -16,6 +16,7 @@ /* Flags for signalfd4. */ #define SFD_CLOEXEC O_CLOEXEC #define SFD_NONBLOCK O_NONBLOCK +#define SFD_TASK 00000001 struct signalfd_siginfo { __u32 ssi_signo; -- Jens Axboe