All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@kernel.dk>
To: Jann Horn <jannh@google.com>
Cc: io-uring <io-uring@vger.kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	linux-fsdevel <linux-fsdevel@vger.kernel.org>
Subject: Re: [PATCH RFC] signalfd: add support for SFD_TASK
Date: Wed, 27 Nov 2019 12:48:29 -0800	[thread overview]
Message-ID: <1d3a458a-fa79-5e33-b5ce-b473122f6d1a@kernel.dk> (raw)
In-Reply-To: <CAG48ez33ewwQB26cag+HhjbgGfQCdOLt6CvfmV1A5daCJoXiZQ@mail.gmail.com>

On 11/27/19 12:23 PM, Jann Horn wrote:
> On Wed, Nov 27, 2019 at 6:11 AM Jens Axboe <axboe@kernel.dk> wrote:
>> I posted this a few weeks back, took another look at it and refined it a
>> bit. I'd like some input on the viability of this approach.
>>
>> A new signalfd setup flag is added, SFD_TASK. This is only valid if used
>> with SFD_CLOEXEC. If set, the task setting up the signalfd descriptor is
>> remembered in the signalfd context, and will be the one we use for
>> checking signals in the poll/read handlers in signalfd.
>>
>> This is needed to make signalfd useful with io_uring and aio, of which
>> the former in particular has my interest.
>>
>> I _think_ this is sane. To prevent the case of a task clearing O_CLOEXEC
>> on the signalfd descriptor, forking, and then exiting, we grab a
>> reference to the task when we assign it. If that original task exits, we
>> catch it in signalfd_flush() and ensure waiters are woken up.
> 
> Mh... that's not really reliable, because you only get ->flush() from
> the last exiting thread (or more precisely, the last exiting task that
> shares the files_struct).
> 
> What is your goal here? To have a reference to a task without keeping
> the entire task_struct around in memory if someone leaks the signalfd
> to another process - basically like a weak pointer? If so, you could
> store a refcounted reference to "struct pid" instead of a refcounted
> reference to the task_struct, and then do the lookup of the
> task_struct on ->poll and ->read (similar to what procfs does).

Yeah, I think that works out much better (and cleaner). How about this,
then? Follows your advice and turns it into a struct pid instead. I
don't particularly like the -ESRCH in dequeue and setup, what do you
think? For poll, POLLERR seems like a prudent choice.

Tested with the test cases I sent out yesterday, works for me.

diff --git a/fs/signalfd.c b/fs/signalfd.c
index 44b6845b071c..ccb1173b20aa 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -50,6 +50,7 @@ void signalfd_cleanup(struct sighand_struct *sighand)
  
  struct signalfd_ctx {
  	sigset_t sigmask;
+	struct pid *task_pid;
  };
  
  static int signalfd_release(struct inode *inode, struct file *file)
@@ -58,20 +59,41 @@ static int signalfd_release(struct inode *inode, struct file *file)
  	return 0;
  }
  
+static void signalfd_put_task(struct signalfd_ctx *ctx, struct task_struct *tsk)
+{
+	if (ctx->task_pid)
+		put_task_struct(tsk);
+}
+
+static struct task_struct *signalfd_get_task(struct signalfd_ctx *ctx)
+{
+	if (ctx->task_pid)
+		return get_pid_task(ctx->task_pid, PIDTYPE_PID);
+
+	return current;
+}
+
  static __poll_t signalfd_poll(struct file *file, poll_table *wait)
  {
  	struct signalfd_ctx *ctx = file->private_data;
+	struct task_struct *tsk;
  	__poll_t events = 0;
  
-	poll_wait(file, &current->sighand->signalfd_wqh, wait);
+	tsk = signalfd_get_task(ctx);
+	if (tsk) {
+		poll_wait(file, &tsk->sighand->signalfd_wqh, wait);
  
-	spin_lock_irq(&current->sighand->siglock);
-	if (next_signal(&current->pending, &ctx->sigmask) ||
-	    next_signal(&current->signal->shared_pending,
-			&ctx->sigmask))
-		events |= EPOLLIN;
-	spin_unlock_irq(&current->sighand->siglock);
+		spin_lock_irq(&tsk->sighand->siglock);
+		if (next_signal(&tsk->pending, &ctx->sigmask) ||
+		    next_signal(&tsk->signal->shared_pending,
+				&ctx->sigmask))
+			events |= EPOLLIN;
+		spin_unlock_irq(&tsk->sighand->siglock);
  
+		signalfd_put_task(ctx, tsk);
+	} else {
+		events |= EPOLLERR;
+	}
  	return events;
  }
  
@@ -167,10 +189,15 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info
  				int nonblock)
  {
  	ssize_t ret;
+	struct task_struct *tsk;
  	DECLARE_WAITQUEUE(wait, current);
  
-	spin_lock_irq(&current->sighand->siglock);
-	ret = dequeue_signal(current, &ctx->sigmask, info);
+	tsk = signalfd_get_task(ctx);
+	if (!tsk)
+		return -ESRCH;
+
+	spin_lock_irq(&tsk->sighand->siglock);
+	ret = dequeue_signal(tsk, &ctx->sigmask, info);
  	switch (ret) {
  	case 0:
  		if (!nonblock)
@@ -178,29 +205,31 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info
  		ret = -EAGAIN;
  		/* fall through */
  	default:
-		spin_unlock_irq(&current->sighand->siglock);
+		spin_unlock_irq(&tsk->sighand->siglock);
+		signalfd_put_task(ctx, tsk);
  		return ret;
  	}
  
-	add_wait_queue(&current->sighand->signalfd_wqh, &wait);
+	add_wait_queue(&tsk->sighand->signalfd_wqh, &wait);
  	for (;;) {
  		set_current_state(TASK_INTERRUPTIBLE);
-		ret = dequeue_signal(current, &ctx->sigmask, info);
+		ret = dequeue_signal(tsk, &ctx->sigmask, info);
  		if (ret != 0)
  			break;
  		if (signal_pending(current)) {
  			ret = -ERESTARTSYS;
  			break;
  		}
-		spin_unlock_irq(&current->sighand->siglock);
+		spin_unlock_irq(&tsk->sighand->siglock);
  		schedule();
-		spin_lock_irq(&current->sighand->siglock);
+		spin_lock_irq(&tsk->sighand->siglock);
  	}
-	spin_unlock_irq(&current->sighand->siglock);
+	spin_unlock_irq(&tsk->sighand->siglock);
  
-	remove_wait_queue(&current->sighand->signalfd_wqh, &wait);
+	remove_wait_queue(&tsk->sighand->signalfd_wqh, &wait);
  	__set_current_state(TASK_RUNNING);
  
+	signalfd_put_task(ctx, tsk);
  	return ret;
  }
  
@@ -267,19 +296,24 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags)
  	/* Check the SFD_* constants for consistency.  */
  	BUILD_BUG_ON(SFD_CLOEXEC != O_CLOEXEC);
  	BUILD_BUG_ON(SFD_NONBLOCK != O_NONBLOCK);
+	BUILD_BUG_ON(SFD_TASK & (SFD_CLOEXEC | SFD_NONBLOCK));
  
-	if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK))
+	if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK | SFD_TASK))
+		return -EINVAL;
+	if ((flags & (SFD_CLOEXEC | SFD_TASK)) == SFD_TASK)
  		return -EINVAL;
  
  	sigdelsetmask(mask, sigmask(SIGKILL) | sigmask(SIGSTOP));
  	signotset(mask);
  
  	if (ufd == -1) {
-		ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+		ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
  		if (!ctx)
  			return -ENOMEM;
  
  		ctx->sigmask = *mask;
+		if (flags & SFD_TASK)
+			ctx->task_pid = get_task_pid(current, PIDTYPE_PID);
  
  		/*
  		 * When we call this, the initialization must be complete, since
@@ -290,6 +324,7 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags)
  		if (ufd < 0)
  			kfree(ctx);
  	} else {
+		struct task_struct *tsk;
  		struct fd f = fdget(ufd);
  		if (!f.file)
  			return -EBADF;
@@ -298,11 +333,17 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags)
  			fdput(f);
  			return -EINVAL;
  		}
-		spin_lock_irq(&current->sighand->siglock);
+		tsk = signalfd_get_task(ctx);
+		if (!tsk) {
+			fdput(f);
+			return -ESRCH;
+		}
+		spin_lock_irq(&tsk->sighand->siglock);
  		ctx->sigmask = *mask;
-		spin_unlock_irq(&current->sighand->siglock);
+		spin_unlock_irq(&tsk->sighand->siglock);
  
-		wake_up(&current->sighand->signalfd_wqh);
+		wake_up(&tsk->sighand->signalfd_wqh);
+		signalfd_put_task(ctx, tsk);
  		fdput(f);
  	}
  
diff --git a/include/uapi/linux/signalfd.h b/include/uapi/linux/signalfd.h
index 83429a05b698..064c5dc3eb99 100644
--- a/include/uapi/linux/signalfd.h
+++ b/include/uapi/linux/signalfd.h
@@ -16,6 +16,7 @@
  /* Flags for signalfd4.  */
  #define SFD_CLOEXEC O_CLOEXEC
  #define SFD_NONBLOCK O_NONBLOCK
+#define SFD_TASK 00000001
  
  struct signalfd_siginfo {
  	__u32 ssi_signo;

-- 
Jens Axboe


  reply	other threads:[~2019-11-27 20:48 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-27  5:11 [PATCH RFC] signalfd: add support for SFD_TASK Jens Axboe
2019-11-27 19:23 ` Jann Horn
2019-11-27 20:48   ` Jens Axboe [this message]
2019-11-27 23:27     ` Jann Horn
2019-11-28  0:41       ` Jens Axboe
2019-11-28  9:02       ` Rasmus Villemoes
2019-11-28 10:07         ` Jann Horn
2019-11-28 19:18           ` Jann Horn
2019-11-28 22:46             ` Jann Horn
2019-11-29 22:30             ` Jann Horn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1d3a458a-fa79-5e33-b5ce-b473122f6d1a@kernel.dk \
    --to=axboe@kernel.dk \
    --cc=io-uring@vger.kernel.org \
    --cc=jannh@google.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.