linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Christian Brauner <christian@brauner.io>
To: linux-kernel@vger.kernel.org, oleg@redhat.com
Cc: arnd@arndb.de, ebiederm@xmission.com, keescook@chromium.org,
	joel@joelfernandes.org, tglx@linutronix.de, tj@kernel.org,
	dhowells@redhat.com, jannh@google.com, luto@kernel.org,
	akpm@linux-foundation.org, cyphar@cyphar.com,
	torvalds@linux-foundation.org, viro@zeniv.linux.org.uk,
	kernel-team@android.com, Christian Brauner <christian@brauner.io>
Subject: [PATCH v1 1/2] pidfd: add P_PIDFD to waitid()
Date: Fri, 26 Jul 2019 11:39:33 +0200	[thread overview]
Message-ID: <20190726093934.13557-2-christian@brauner.io> (raw)
In-Reply-To: <20190726093934.13557-1-christian@brauner.io>

This adds the P_PIDFD type to waitid().
One of the last remaining bits for the pidfd api is to make it possible
to wait on pidfds. With P_PIDFD added to waitid() the parts of userspace
that want to use the pidfd api to exclusively manage processes can do so
now.

One of the things this will unblock in the future is the ability to make
it possible to retrieve the exit status via waitid(P_PIDFD) for
non-parent processes if handed a _suitable_ pidfd that has this feature
set. This is similar to what you can do on FreeBSD with kqueue(). It
might even end up being possible to wait on a process as a non-parent if
an appropriate property is enabled on the pidfd.

With P_PIDFD no scoping of the process identified by the pidfd is
possible, i.e. it explicitly blocks things such as wait4(-1), wait4(0),
waitid(P_ALL), waitid(P_PGID) etc. It only allows for semantics
equivalent to wait4(pid), waitid(P_PID). Users that need scoping should
rely on pid-based wait*() syscalls for now.

Signed-off-by: Christian Brauner <christian@brauner.io>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: David Howells <dhowells@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Andy Lutomirsky <luto@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Aleksa Sarai <cyphar@cyphar.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
---
v1:
- Linus Torvalds <torvalds@linux-foundation.org>:
  - use flag as discussed before, not a dedicated pidfd_wait() syscall
- Oleg Nesterov <oleg@redhat.com>:
  - use flag as discussed before, not a dedicated pidfd_wait() syscall
---
 include/linux/pid.h       |  4 ++++
 include/uapi/linux/wait.h |  1 +
 kernel/exit.c             | 25 +++++++++++++++++++++++--
 kernel/fork.c             |  8 ++++++++
 kernel/signal.c           |  7 +++++--
 5 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 2a83e434db9d..9645b1194c98 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -72,6 +72,10 @@ extern struct pid init_struct_pid;
 
 extern const struct file_operations pidfd_fops;
 
+struct file;
+
+extern struct pid *pidfd_pid(const struct file *file);
+
 static inline struct pid *get_pid(struct pid *pid)
 {
 	if (pid)
diff --git a/include/uapi/linux/wait.h b/include/uapi/linux/wait.h
index ac49a220cf2a..85b809fc9f11 100644
--- a/include/uapi/linux/wait.h
+++ b/include/uapi/linux/wait.h
@@ -17,6 +17,7 @@
 #define P_ALL		0
 #define P_PID		1
 #define P_PGID		2
+#define P_PIDFD		3
 
 
 #endif /* _UAPI_LINUX_WAIT_H */
diff --git a/kernel/exit.c b/kernel/exit.c
index a75b6a7f458a..a9b49260b965 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1555,6 +1555,7 @@ static long do_wait(struct wait_opts *wo)
 static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
 			  int options, struct rusage *ru)
 {
+	struct fd f;
 	struct wait_opts wo;
 	struct pid *pid = NULL;
 	enum pid_type type;
@@ -1580,11 +1581,27 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
 		if (upid <= 0)
 			return -EINVAL;
 		break;
+	case P_PIDFD:
+		type = PIDTYPE_PID;
+		if (upid < 0)
+			return -EINVAL;
+
+		f = fdget(upid);
+		if (!f.file)
+			return -EBADF;
+
+		pid = pidfd_pid(f.file);
+		if (IS_ERR(pid)) {
+			fdput(f);
+			return PTR_ERR(pid);
+		}
+
+		break;
 	default:
 		return -EINVAL;
 	}
 
-	if (type < PIDTYPE_MAX)
+	if (type < PIDTYPE_MAX && !pid)
 		pid = find_get_pid(upid);
 
 	wo.wo_type	= type;
@@ -1594,7 +1611,11 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
 	wo.wo_rusage	= ru;
 	ret = do_wait(&wo);
 
-	put_pid(pid);
+	if (which == P_PIDFD)
+		fdput(f);
+	else
+		put_pid(pid);
+
 	return ret;
 }
 
diff --git a/kernel/fork.c b/kernel/fork.c
index d8ae0f1b4148..b169e2ca2d84 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1690,6 +1690,14 @@ static inline void rcu_copy_process(struct task_struct *p)
 #endif /* #ifdef CONFIG_TASKS_RCU */
 }
 
+struct pid *pidfd_pid(const struct file *file)
+{
+	if (file->f_op == &pidfd_fops)
+		return file->private_data;
+
+	return ERR_PTR(-EBADF);
+}
+
 static int pidfd_release(struct inode *inode, struct file *file)
 {
 	struct pid *pid = file->private_data;
diff --git a/kernel/signal.c b/kernel/signal.c
index 91b789dd6e72..2e567f64812f 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3672,8 +3672,11 @@ static int copy_siginfo_from_user_any(kernel_siginfo_t *kinfo, siginfo_t *info)
 
 static struct pid *pidfd_to_pid(const struct file *file)
 {
-	if (file->f_op == &pidfd_fops)
-		return file->private_data;
+	struct pid *pid;
+
+	pid = pidfd_pid(file);
+	if (!IS_ERR(pid))
+		return pid;
 
 	return tgid_pidfd_to_pid(file);
 }
-- 
2.22.0


  reply	other threads:[~2019-07-26  9:41 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-26  9:39 [PATCH v1 0/2] pidfd: waiting on processes through pidfds Christian Brauner
2019-07-26  9:39 ` Christian Brauner [this message]
2019-07-26 21:26   ` [PATCH v1 1/2] pidfd: add P_PIDFD to waitid() Linus Torvalds
2019-07-26 21:30     ` Christian Brauner
2019-07-26  9:39 ` [PATCH v1 2/2] pidfd: add pidfd_wait tests Christian Brauner
2019-07-26 10:02 ` [OFFLIST] [PATCH v1 0/2] pidfd: waiting on processes through pidfds Oleg Nesterov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190726093934.13557-2-christian@brauner.io \
    --to=christian@brauner.io \
    --cc=akpm@linux-foundation.org \
    --cc=arnd@arndb.de \
    --cc=cyphar@cyphar.com \
    --cc=dhowells@redhat.com \
    --cc=ebiederm@xmission.com \
    --cc=jannh@google.com \
    --cc=joel@joelfernandes.org \
    --cc=keescook@chromium.org \
    --cc=kernel-team@android.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=oleg@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=tj@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).