All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@kernel.dk>
To: io-uring <io-uring@vger.kernel.org>
Cc: Jann Horn <jannh@google.com>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	linux-fsdevel <linux-fsdevel@vger.kernel.org>
Subject: [PATCH RFC] signalfd: add support for SFD_TASK
Date: Tue, 26 Nov 2019 22:11:38 -0700	[thread overview]
Message-ID: <254505c9-2b76-ebeb-306c-02aaf1704b88@kernel.dk> (raw)

[-- Attachment #1: Type: text/plain, Size: 7356 bytes --]

I posted this a few weeks back, took another look at it and refined it a
bit. I'd like some input on the viability of this approach.

A new signalfd setup flag is added, SFD_TASK. This is only valid if used
with SFD_CLOEXEC. If set, the task setting up the signalfd descriptor is
remembered in the signalfd context, and will be the one we use for
checking signals in the poll/read handlers in signalfd.

This is needed to make signalfd useful with io_uring and aio, of which
the former in particular has my interest.

I _think_ this is sane. To prevent the case of a task clearing O_CLOEXEC
on the signalfd descriptor, forking, and then exiting, we grab a
reference to the task when we assign it. If that original task exits, we
catch it in signalfd_flush() and ensure waiters are woken up. The
waiters also hold a task reference, so we don't have to wait for them to
go away.

Need to double check we can't race between original task exiting and new
task grabbing a reference. I don't think this is solid in the version
below. Probably need to add a refcount for ctx->task (the pointer, not
the task) for that.

Comments? Attaching two test programs using io_uring, one using poll and
the other read. Remove SFD_TASK from either of them, and they will fail
ala:

./signalfd-read
Timed out waiting for cqe

and with SFD_TASK set, both will exit silent with a value of 0. You need
liburing installed, then compile them with:

gcc -Wall -O2 -o signalfd-read signalfd-read.c -luring

---

diff --git a/fs/signalfd.c b/fs/signalfd.c
index 44b6845b071c..4bbdab9438c1 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -50,28 +50,62 @@ void signalfd_cleanup(struct sighand_struct *sighand)
  
  struct signalfd_ctx {
  	sigset_t sigmask;
+	struct task_struct *task;
  };
  
+static int signalfd_flush(struct file *file, void *data)
+{
+	struct signalfd_ctx *ctx = file->private_data;
+	struct task_struct *tsk = ctx->task;
+
+	if (tsk == current) {
+		ctx->task = NULL;
+		wake_up(&tsk->sighand->signalfd_wqh);
+		put_task_struct(tsk);
+	}
+
+	return 0;
+}
+
  static int signalfd_release(struct inode *inode, struct file *file)
  {
-	kfree(file->private_data);
+	struct signalfd_ctx *ctx = file->private_data;
+
+	if (ctx->task)
+		put_task_struct(ctx->task);
+	kfree(ctx);
  	return 0;
  }
  
+static void signalfd_put_task(struct task_struct *tsk)
+{
+	put_task_struct(tsk);
+}
+
+static struct task_struct *signalfd_get_task(struct signalfd_ctx *ctx)
+{
+	struct task_struct *tsk = ctx->task ?: current;
+
+	get_task_struct(tsk);
+	return tsk;
+}
+
  static __poll_t signalfd_poll(struct file *file, poll_table *wait)
  {
  	struct signalfd_ctx *ctx = file->private_data;
+	struct task_struct *tsk = signalfd_get_task(ctx);
  	__poll_t events = 0;
  
-	poll_wait(file, &current->sighand->signalfd_wqh, wait);
+	poll_wait(file, &tsk->sighand->signalfd_wqh, wait);
  
-	spin_lock_irq(&current->sighand->siglock);
-	if (next_signal(&current->pending, &ctx->sigmask) ||
-	    next_signal(&current->signal->shared_pending,
+	spin_lock_irq(&tsk->sighand->siglock);
+	if (next_signal(&tsk->pending, &ctx->sigmask) ||
+	    next_signal(&tsk->signal->shared_pending,
  			&ctx->sigmask))
  		events |= EPOLLIN;
-	spin_unlock_irq(&current->sighand->siglock);
+	spin_unlock_irq(&tsk->sighand->siglock);
  
+	signalfd_put_task(tsk);
  	return events;
  }
  
@@ -167,10 +201,11 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info
  				int nonblock)
  {
  	ssize_t ret;
+	struct task_struct *tsk = signalfd_get_task(ctx);
  	DECLARE_WAITQUEUE(wait, current);
  
-	spin_lock_irq(&current->sighand->siglock);
-	ret = dequeue_signal(current, &ctx->sigmask, info);
+	spin_lock_irq(&tsk->sighand->siglock);
+	ret = dequeue_signal(tsk, &ctx->sigmask, info);
  	switch (ret) {
  	case 0:
  		if (!nonblock)
@@ -178,29 +213,35 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info
  		ret = -EAGAIN;
  		/* fall through */
  	default:
-		spin_unlock_irq(&current->sighand->siglock);
+		spin_unlock_irq(&tsk->sighand->siglock);
+		signalfd_put_task(tsk);
  		return ret;
  	}
  
-	add_wait_queue(&current->sighand->signalfd_wqh, &wait);
+	add_wait_queue(&tsk->sighand->signalfd_wqh, &wait);
  	for (;;) {
  		set_current_state(TASK_INTERRUPTIBLE);
-		ret = dequeue_signal(current, &ctx->sigmask, info);
+		ret = dequeue_signal(tsk, &ctx->sigmask, info);
  		if (ret != 0)
  			break;
  		if (signal_pending(current)) {
  			ret = -ERESTARTSYS;
  			break;
  		}
-		spin_unlock_irq(&current->sighand->siglock);
+		spin_unlock_irq(&tsk->sighand->siglock);
  		schedule();
-		spin_lock_irq(&current->sighand->siglock);
+		spin_lock_irq(&tsk->sighand->siglock);
+		if (tsk != current && !ctx->task) {
+			ret = -ESRCH;
+			break;
+		}
  	}
-	spin_unlock_irq(&current->sighand->siglock);
+	spin_unlock_irq(&tsk->sighand->siglock);
  
-	remove_wait_queue(&current->sighand->signalfd_wqh, &wait);
+	remove_wait_queue(&tsk->sighand->signalfd_wqh, &wait);
  	__set_current_state(TASK_RUNNING);
  
+	signalfd_put_task(tsk);
  	return ret;
  }
  
@@ -254,6 +295,7 @@ static const struct file_operations signalfd_fops = {
  #ifdef CONFIG_PROC_FS
  	.show_fdinfo	= signalfd_show_fdinfo,
  #endif
+	.flush		= signalfd_flush,
  	.release	= signalfd_release,
  	.poll		= signalfd_poll,
  	.read		= signalfd_read,
@@ -267,19 +309,26 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags)
  	/* Check the SFD_* constants for consistency.  */
  	BUILD_BUG_ON(SFD_CLOEXEC != O_CLOEXEC);
  	BUILD_BUG_ON(SFD_NONBLOCK != O_NONBLOCK);
+	BUILD_BUG_ON(SFD_TASK & (SFD_CLOEXEC | SFD_NONBLOCK));
  
-	if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK))
+	if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK | SFD_TASK))
+		return -EINVAL;
+	if ((flags & (SFD_CLOEXEC | SFD_TASK)) == SFD_TASK)
  		return -EINVAL;
  
  	sigdelsetmask(mask, sigmask(SIGKILL) | sigmask(SIGSTOP));
  	signotset(mask);
  
  	if (ufd == -1) {
-		ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+		ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
  		if (!ctx)
  			return -ENOMEM;
  
  		ctx->sigmask = *mask;
+		if (flags & SFD_TASK) {
+			ctx->task = current;
+			get_task_struct(ctx->task);
+		}
  
  		/*
  		 * When we call this, the initialization must be complete, since
@@ -290,6 +339,7 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags)
  		if (ufd < 0)
  			kfree(ctx);
  	} else {
+		struct task_struct *tsk;
  		struct fd f = fdget(ufd);
  		if (!f.file)
  			return -EBADF;
@@ -298,11 +348,13 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags)
  			fdput(f);
  			return -EINVAL;
  		}
-		spin_lock_irq(&current->sighand->siglock);
+		tsk = signalfd_get_task(ctx);
+		spin_lock_irq(&tsk->sighand->siglock);
  		ctx->sigmask = *mask;
-		spin_unlock_irq(&current->sighand->siglock);
+		spin_unlock_irq(&tsk->sighand->siglock);
  
-		wake_up(&current->sighand->signalfd_wqh);
+		wake_up(&tsk->sighand->signalfd_wqh);
+		signalfd_put_task(tsk);
  		fdput(f);
  	}
  
diff --git a/include/uapi/linux/signalfd.h b/include/uapi/linux/signalfd.h
index 83429a05b698..064c5dc3eb99 100644
--- a/include/uapi/linux/signalfd.h
+++ b/include/uapi/linux/signalfd.h
@@ -16,6 +16,7 @@
  /* Flags for signalfd4.  */
  #define SFD_CLOEXEC O_CLOEXEC
  #define SFD_NONBLOCK O_NONBLOCK
+#define SFD_TASK 00000001
  
  struct signalfd_siginfo {
  	__u32 ssi_signo;

-- 
Jens Axboe


[-- Attachment #2: signalfd-poll.c --]
[-- Type: text/x-csrc, Size: 1411 bytes --]

#include <unistd.h>
#include <sys/signalfd.h>
#include <sys/poll.h>
#include <sys/time.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>

#include <liburing.h>

#define SFD_TASK	00000001

int main(int argc, char *argv[])
{
	struct __kernel_timespec ts;
	struct io_uring_sqe *sqe;
	struct io_uring_cqe *cqe;
	struct io_uring ring;
	struct itimerval itv;
	sigset_t mask;
	int sfd, ret;

	sigemptyset(&mask);
	sigaddset(&mask, SIGALRM);
	sigprocmask(SIG_BLOCK, &mask, NULL);

	sfd = signalfd(-1, &mask, SFD_CLOEXEC | SFD_TASK);
	if (sfd < 0) {
		if (errno == EINVAL) {
			printf("Not supported\n");
			return 0;
		}
		perror("signalfd");
		return 1;
	}

	memset(&itv, 0, sizeof(itv));
	itv.it_value.tv_sec = 0;
	itv.it_value.tv_usec = 100000;
	setitimer(ITIMER_REAL, &itv, NULL);

	io_uring_queue_init(32, &ring, 0);
	sqe = io_uring_get_sqe(&ring);
	io_uring_prep_poll_add(sqe, sfd, POLLIN);
	io_uring_submit(&ring);

	ts.tv_sec = 1;
	ts.tv_nsec = 0;
	ret = io_uring_wait_cqe_timeout(&ring, &cqe, &ts);
	if (ret < 0) {
		fprintf(stderr, "Timed out waiting for cqe\n");
		ret = 1;
	} else {
		if (cqe->res < 0) {
			fprintf(stderr, "cqe failed with %d\n", cqe->res);
			ret = 1;
		} else if (!(cqe->res & POLLIN)) {
			fprintf(stderr, "POLLIN not set in result mask?\n");
			ret = 1;
		} else {
			ret = 0;
		}
	}
	io_uring_cqe_seen(&ring, cqe);

	io_uring_queue_exit(&ring);
	close(sfd);
	return ret;
}

[-- Attachment #3: signalfd-read.c --]
[-- Type: text/x-csrc, Size: 1516 bytes --]

#include <unistd.h>
#include <sys/signalfd.h>
#include <sys/poll.h>
#include <sys/time.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>

#include <liburing.h>

#define SFD_TASK	00000001

int main(int argc, char *argv[])
{
	struct __kernel_timespec ts;
	struct signalfd_siginfo si;
	struct iovec iov = {
		.iov_base = &si,
		.iov_len = sizeof(si),
	};
	struct io_uring_sqe *sqe;
	struct io_uring_cqe *cqe;
	struct io_uring ring;
	struct itimerval itv;
	sigset_t mask;
	int sfd, ret;

	sigemptyset(&mask);
	sigaddset(&mask, SIGALRM);
	sigprocmask(SIG_BLOCK, &mask, NULL);

	sfd = signalfd(-1, &mask, SFD_CLOEXEC | SFD_TASK);
	if (sfd < 0) {
		if (errno == EINVAL) {
			printf("Not supported\n");
			return 0;
		}
		perror("signalfd");
		return 1;
	}

	memset(&itv, 0, sizeof(itv));
	itv.it_value.tv_sec = 0;
	itv.it_value.tv_usec = 100000;
	setitimer(ITIMER_REAL, &itv, NULL);

	io_uring_queue_init(32, &ring, 0);
	sqe = io_uring_get_sqe(&ring);
	io_uring_prep_readv(sqe, sfd, &iov, 1, 0);
	io_uring_submit(&ring);

	ts.tv_sec = 1;
	ts.tv_nsec = 0;
	ret = io_uring_wait_cqe_timeout(&ring, &cqe, &ts);
	if (ret < 0) {
		fprintf(stderr, "Timed out waiting for cqe\n");
		ret = 1;
	} else {
		ret = 0;
		if (cqe->res < 0) {
			fprintf(stderr, "cqe failed with %d\n", cqe->res);
			ret = 1;
		} else if (cqe->res != sizeof(si)) {
			fprintf(stderr, "Read %d, wanted %d\n", cqe->res, (int)sizeof(si));
			ret = 1;
		}
	}
	io_uring_cqe_seen(&ring, cqe);

	io_uring_queue_exit(&ring);
	close(sfd);
	return ret;
}

             reply	other threads:[~2019-11-27  5:11 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-27  5:11 Jens Axboe [this message]
2019-11-27 19:23 ` [PATCH RFC] signalfd: add support for SFD_TASK Jann Horn
2019-11-27 20:48   ` Jens Axboe
2019-11-27 23:27     ` Jann Horn
2019-11-28  0:41       ` Jens Axboe
2019-11-28  9:02       ` Rasmus Villemoes
2019-11-28 10:07         ` Jann Horn
2019-11-28 19:18           ` Jann Horn
2019-11-28 22:46             ` Jann Horn
2019-11-29 22:30             ` Jann Horn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=254505c9-2b76-ebeb-306c-02aaf1704b88@kernel.dk \
    --to=axboe@kernel.dk \
    --cc=io-uring@vger.kernel.org \
    --cc=jannh@google.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.