* [PATCH 0/2] exit: Introduce __WCHILDSIGINFO for waitid
@ 2022-02-14 21:38 Kees Cook
2022-02-14 21:38 ` [PATCH 1/2] " Kees Cook
2022-02-14 21:38 ` [PATCH 2/2] selftests/seccomp: Check for waitid() behavior Kees Cook
0 siblings, 2 replies; 4+ messages in thread
From: Kees Cook @ 2022-02-14 21:38 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Kees Cook, Christian Brauner, Andy Lutomirski,
Robert Święcki, Jann Horn, Oleg Nesterov,
Thomas Gleixner, Ingo Molnar, linux-kernel, linux-api,
linux-hardening
Hi,
Okay, here's a working version of this. Is adding 48 bytes into task
struct worth it? Can this be improved, and is the non-signal-exit logic
for __WCHILDSIGINFO sane?
Other thoughts?
-Kees
Kees Cook (2):
exit: Introduce __WCHILDSIGINFO for waitid
selftests/seccomp: Check for waitid() behavior
include/linux/sched.h | 1 +
include/uapi/linux/wait.h | 1 +
kernel/exit.c | 23 +++-
kernel/signal.c | 4 +
tools/testing/selftests/seccomp/seccomp_bpf.c | 130 ++++++++++++++++++
5 files changed, 155 insertions(+), 4 deletions(-)
--
2.30.2
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/2] exit: Introduce __WCHILDSIGINFO for waitid
2022-02-14 21:38 [PATCH 0/2] exit: Introduce __WCHILDSIGINFO for waitid Kees Cook
@ 2022-02-14 21:38 ` Kees Cook
2022-02-15 9:14 ` Christian Brauner
2022-02-14 21:38 ` [PATCH 2/2] selftests/seccomp: Check for waitid() behavior Kees Cook
1 sibling, 1 reply; 4+ messages in thread
From: Kees Cook @ 2022-02-14 21:38 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Kees Cook, Christian Brauner, Andy Lutomirski,
Robert Święcki, Jann Horn, Oleg Nesterov,
Thomas Gleixner, linux-api, Ingo Molnar, linux-kernel,
linux-hardening
When __WCHILDSIGINFO is specified, copy the child's siginfo into the
waitid infop instead of constructing the parent's SIGCHLD perspective.
Cc: Christian Brauner <brauner@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: "Robert Święcki" <robert@swiecki.net>
Cc: Jann Horn <jannh@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-api@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
include/linux/sched.h | 1 +
include/uapi/linux/wait.h | 1 +
kernel/exit.c | 23 +++++++++++++++++++----
kernel/signal.c | 4 ++++
4 files changed, 25 insertions(+), 4 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f5b2be39a78c..e40789e801ef 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1178,6 +1178,7 @@ struct task_struct {
#endif
/* Ptrace state: */
unsigned long ptrace_message;
+ kernel_siginfo_t death_siginfo;
kernel_siginfo_t *last_siginfo;
struct task_io_accounting ioac;
diff --git a/include/uapi/linux/wait.h b/include/uapi/linux/wait.h
index 85b809fc9f11..7258cd4510ba 100644
--- a/include/uapi/linux/wait.h
+++ b/include/uapi/linux/wait.h
@@ -9,6 +9,7 @@
#define WCONTINUED 0x00000008
#define WNOWAIT 0x01000000 /* Don't reap, just poll status. */
+#define __WCHILDSIGINFO 0x10000000 /* Report child's siginfo. */
#define __WNOTHREAD 0x20000000 /* Don't wait on children of other threads in this group */
#define __WALL 0x40000000 /* Wait on all children, regardless of type */
#define __WCLONE 0x80000000 /* Wait only on non-SIGCHLD children */
diff --git a/kernel/exit.c b/kernel/exit.c
index b00a25bb4ab9..de6e024976c6 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -953,6 +953,7 @@ struct waitid_info {
uid_t uid;
int status;
int cause;
+ kernel_siginfo_t siginfo;
};
struct wait_opts {
@@ -964,7 +965,7 @@ struct wait_opts {
int wo_stat;
struct rusage *wo_rusage;
- wait_queue_entry_t child_wait;
+ wait_queue_entry_t child_wait;
int notask_error;
};
@@ -1012,11 +1013,16 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
int state, status;
pid_t pid = task_pid_vnr(p);
uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
- struct waitid_info *infop;
+ struct waitid_info *infop = wo->wo_info;
if (!likely(wo->wo_flags & WEXITED))
return 0;
+ /* Before WNOWAIT so a copy can be extracted without reaping. */
+ if (unlikely(wo->wo_flags & __WCHILDSIGINFO)) {
+ if (infop && p->last_siginfo)
+ copy_siginfo(&infop->siginfo, p->last_siginfo);
+ }
if (unlikely(wo->wo_flags & WNOWAIT)) {
status = (p->signal->flags & SIGNAL_GROUP_EXIT)
? p->signal->group_exit_code : p->exit_code;
@@ -1121,7 +1127,6 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
release_task(p);
out_info:
- infop = wo->wo_info;
if (infop) {
if ((status & 0x7f) == 0) {
infop->cause = CLD_EXITED;
@@ -1564,7 +1569,7 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
unsigned int f_flags = 0;
if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED|
- __WNOTHREAD|__WCLONE|__WALL))
+ __WNOTHREAD|__WCLONE|__WALL|__WCHILDSIGINFO))
return -EINVAL;
if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))
return -EINVAL;
@@ -1637,6 +1642,10 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
if (!infop)
return err;
+ /* __WCHILDSIGINFO */
+ if (info.siginfo.si_signo)
+ return copy_siginfo_to_user(infop, &info.siginfo);
+
if (!user_write_access_begin(infop, sizeof(*infop)))
return -EFAULT;
@@ -1780,6 +1789,12 @@ COMPAT_SYSCALL_DEFINE5(waitid,
if (!infop)
return err;
+ /* __WCHILDSIGINFO */
+ if (info.siginfo.si_signo)
+ return copy_siginfo_to_user32(
+ (struct compat_siginfo __user *)infop,
+ &info.siginfo);
+
if (!user_write_access_begin(infop, sizeof(*infop)))
return -EFAULT;
diff --git a/kernel/signal.c b/kernel/signal.c
index 9b04631acde8..41f6ba6b7aa7 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2825,6 +2825,10 @@ bool get_signal(struct ksignal *ksig)
}
fatal:
+ /* Allow siginfo to be queried until reaped. */
+ copy_siginfo(¤t->death_siginfo, &ksig->info);
+ current->last_siginfo = ¤t->death_siginfo;
+
spin_unlock_irq(&sighand->siglock);
if (unlikely(cgroup_task_frozen(current)))
cgroup_leave_frozen(true);
--
2.30.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] selftests/seccomp: Check for waitid() behavior
2022-02-14 21:38 [PATCH 0/2] exit: Introduce __WCHILDSIGINFO for waitid Kees Cook
2022-02-14 21:38 ` [PATCH 1/2] " Kees Cook
@ 2022-02-14 21:38 ` Kees Cook
1 sibling, 0 replies; 4+ messages in thread
From: Kees Cook @ 2022-02-14 21:38 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Kees Cook, Christian Brauner, Andy Lutomirski,
Robert Święcki, Jann Horn, Oleg Nesterov,
Thomas Gleixner, Ingo Molnar, linux-kernel, linux-api,
linux-hardening
Verify we can fetch child's siginfo_t from waitid() with __WCHILDSIGINFO.
Skip if it's not available.
Signed-off-by: Kees Cook <keescook@chromium.org>
---
tools/testing/selftests/seccomp/seccomp_bpf.c | 130 ++++++++++++++++++
1 file changed, 130 insertions(+)
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 9d126d7fabdb..0c803c2b450e 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -268,6 +268,14 @@ struct seccomp_notif_addfd_big {
#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
#endif
+#ifndef SYS_SECCOMP
+#define SYS_SECCOMP 1
+#endif
+
+#ifndef __WCHILDSIGINFO
+#define __WCHILDSIGINFO 0x10000000
+#endif
+
#ifndef seccomp
int seccomp(unsigned int op, unsigned int flags, void *args)
{
@@ -765,6 +773,128 @@ TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
close(fd);
}
+FIXTURE(SIGINFO) {
+ pid_t child_pid;
+};
+
+FIXTURE_SETUP(SIGINFO)
+{
+ self->child_pid = 0;
+}
+
+FIXTURE_TEARDOWN(SIGINFO)
+{
+ if (self->child_pid > 0)
+ waitpid(self->child_pid, NULL, WNOHANG);
+}
+
+TEST_F(SIGINFO, child)
+{
+ int status;
+ siginfo_t info = { };
+ /* Kill only when calling __NR_prctl. */
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS | 0xBA),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ self->child_pid = fork();
+ ASSERT_LE(0, self->child_pid);
+ if (self->child_pid == 0) {
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+ ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
+ prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+ /* Should have died now. */
+ _exit(37);
+ }
+
+ /* Check siginfo_t contents. */
+ EXPECT_EQ(waitid(P_PID, self->child_pid, &info, WEXITED | WNOWAIT), 0);
+#if 0
+ struct {
+ int si_signo;
+ int si_code;
+ int si_errno;
+ union __sifields _sifields;
+ }
+
+ /* SIGCHLD */
+ struct {
+ __kernel_pid_t _pid; /* which child */
+ __kernel_uid32_t _uid; /* sender's uid */
+ int _status; /* exit code */
+ __ARCH_SI_CLOCK_T _utime;
+ __ARCH_SI_CLOCK_T _stime;
+ } _sigchld;
+#endif
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ EXPECT_TRUE(info.si_code == CLD_KILLED || info.si_code == CLD_DUMPED);
+ EXPECT_TRUE(info.si_errno == 0);
+ EXPECT_EQ(info.si_pid, self->child_pid);
+
+ ASSERT_TRUE(WIFSIGNALED(info.si_status));
+ /* TODO: why doesn't this WCOREDUMP() agree with below? */
+ /* EXPECT_TRUE(WCOREDUMP(status)); */
+ EXPECT_EQ(WTERMSIG(info.si_status), SIGSYS);
+
+
+ memset(&info, 0, sizeof(info));
+ status = waitid(P_PID, self->child_pid, &info,
+ WEXITED | WNOWAIT | __WCHILDSIGINFO);
+ EXPECT_EQ(status, 0) {
+ if (status < 0 && errno == EINVAL)
+ SKIP(goto skip_siginfo, "Kernel does not support waitid() with __WCHILDSIGINFO");
+ }
+#if 0
+ /* SIGSYS */
+ struct {
+ void __user *_call_addr;/* calling user insn */
+ int _syscall; /* triggering system call number */
+ unsigned int _arch; /* AUDIT_ARCH_* of syscall */
+ } _sigsys;
+
+ info.si_signo = SIGSYS;
+ info.si_code = SYS_SECCOMP;
+ info.si_call_addr = (void __user *)KSTK_EIP(current);
+ info.si_errno = reason;
+ info.si_arch = syscall_get_arch(current);
+ info.si_syscall = syscall;
+
+#endif
+ ASSERT_EQ(info.si_signo, SIGSYS);
+ EXPECT_EQ(info.si_code, SYS_SECCOMP);
+ /* EXPECT_EQ(info.si_arch, ...native arch...); */
+ EXPECT_EQ(info.si_syscall, __NR_prctl);
+ /*
+ * The syscall will have happened somewhere near the libc
+ * prctl implementation.
+ */
+ EXPECT_TRUE(info.si_call_addr >= (void *)prctl &&
+ info.si_call_addr <= (void *)prctl + PAGE_SIZE) {
+ TH_LOG("info.si_call_addr: %p", info.si_call_addr);
+ TH_LOG("prctl : %p", prctl);
+ }
+ EXPECT_EQ(info.si_errno, 0xBA);
+
+skip_siginfo:
+ /* Check status contents. */
+ ASSERT_EQ(waitpid(self->child_pid, &status, 0), self->child_pid);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ /* TODO: why doesn't this WCOREDUMP() agree with above? */
+ /* EXPECT_TRUE(WCOREDUMP(status)); */
+ EXPECT_EQ(WTERMSIG(status), SIGSYS);
+ self->child_pid = 0;
+}
+
/* This is a thread task to die via seccomp filter violation. */
void *kill_thread(void *data)
{
--
2.30.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 1/2] exit: Introduce __WCHILDSIGINFO for waitid
2022-02-14 21:38 ` [PATCH 1/2] " Kees Cook
@ 2022-02-15 9:14 ` Christian Brauner
0 siblings, 0 replies; 4+ messages in thread
From: Christian Brauner @ 2022-02-15 9:14 UTC (permalink / raw)
To: Kees Cook
Cc: Eric W. Biederman, Andy Lutomirski, Robert Święcki,
Jann Horn, Oleg Nesterov, Thomas Gleixner, linux-api,
Ingo Molnar, linux-kernel, linux-hardening
On Mon, Feb 14, 2022 at 01:38:22PM -0800, Kees Cook wrote:
> When __WCHILDSIGINFO is specified, copy the child's siginfo into the
> waitid infop instead of constructing the parent's SIGCHLD perspective.
>
> Cc: Christian Brauner <brauner@kernel.org>
> Cc: "Eric W. Biederman" <ebiederm@xmission.com>
> Cc: Andy Lutomirski <luto@amacapital.net>
> Cc: "Robert Święcki" <robert@swiecki.net>
> Cc: Jann Horn <jannh@google.com>
> Cc: Oleg Nesterov <oleg@redhat.com>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: linux-api@vger.kernel.org
> Signed-off-by: Kees Cook <keescook@chromium.org>
> ---
> include/linux/sched.h | 1 +
> include/uapi/linux/wait.h | 1 +
> kernel/exit.c | 23 +++++++++++++++++++----
> kernel/signal.c | 4 ++++
> 4 files changed, 25 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index f5b2be39a78c..e40789e801ef 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1178,6 +1178,7 @@ struct task_struct {
> #endif
> /* Ptrace state: */
> unsigned long ptrace_message;
> + kernel_siginfo_t death_siginfo;
> kernel_siginfo_t *last_siginfo;
>
> struct task_io_accounting ioac;
> diff --git a/include/uapi/linux/wait.h b/include/uapi/linux/wait.h
> index 85b809fc9f11..7258cd4510ba 100644
> --- a/include/uapi/linux/wait.h
> +++ b/include/uapi/linux/wait.h
> @@ -9,6 +9,7 @@
> #define WCONTINUED 0x00000008
> #define WNOWAIT 0x01000000 /* Don't reap, just poll status. */
>
> +#define __WCHILDSIGINFO 0x10000000 /* Report child's siginfo. */
> #define __WNOTHREAD 0x20000000 /* Don't wait on children of other threads in this group */
> #define __WALL 0x40000000 /* Wait on all children, regardless of type */
> #define __WCLONE 0x80000000 /* Wait only on non-SIGCHLD children */
> diff --git a/kernel/exit.c b/kernel/exit.c
> index b00a25bb4ab9..de6e024976c6 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -953,6 +953,7 @@ struct waitid_info {
> uid_t uid;
> int status;
> int cause;
> + kernel_siginfo_t siginfo;
Heh, I once got yelled at because I put siginfo in there... :)
If this time it's acceptable it might be worth considering replacing
some of the duplicated fields if possible. But again, on a suboptimal
device because of vacation.
> };
>
> struct wait_opts {
> @@ -964,7 +965,7 @@ struct wait_opts {
> int wo_stat;
> struct rusage *wo_rusage;
>
> - wait_queue_entry_t child_wait;
> + wait_queue_entry_t child_wait;
> int notask_error;
> };
>
> @@ -1012,11 +1013,16 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
> int state, status;
> pid_t pid = task_pid_vnr(p);
> uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
> - struct waitid_info *infop;
> + struct waitid_info *infop = wo->wo_info;
>
> if (!likely(wo->wo_flags & WEXITED))
> return 0;
>
> + /* Before WNOWAIT so a copy can be extracted without reaping. */
> + if (unlikely(wo->wo_flags & __WCHILDSIGINFO)) {
> + if (infop && p->last_siginfo)
> + copy_siginfo(&infop->siginfo, p->last_siginfo);
> + }
> if (unlikely(wo->wo_flags & WNOWAIT)) {
> status = (p->signal->flags & SIGNAL_GROUP_EXIT)
> ? p->signal->group_exit_code : p->exit_code;
> @@ -1121,7 +1127,6 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
> release_task(p);
>
> out_info:
> - infop = wo->wo_info;
> if (infop) {
> if ((status & 0x7f) == 0) {
> infop->cause = CLD_EXITED;
> @@ -1564,7 +1569,7 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
> unsigned int f_flags = 0;
>
> if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED|
> - __WNOTHREAD|__WCLONE|__WALL))
> + __WNOTHREAD|__WCLONE|__WALL|__WCHILDSIGINFO))
> return -EINVAL;
> if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))
> return -EINVAL;
> @@ -1637,6 +1642,10 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
> if (!infop)
> return err;
>
> + /* __WCHILDSIGINFO */
> + if (info.siginfo.si_signo)
> + return copy_siginfo_to_user(infop, &info.siginfo);
> +
> if (!user_write_access_begin(infop, sizeof(*infop)))
> return -EFAULT;
>
> @@ -1780,6 +1789,12 @@ COMPAT_SYSCALL_DEFINE5(waitid,
> if (!infop)
> return err;
>
> + /* __WCHILDSIGINFO */
> + if (info.siginfo.si_signo)
> + return copy_siginfo_to_user32(
> + (struct compat_siginfo __user *)infop,
> + &info.siginfo);
> +
Just to repeat what I said in the earlier thread: if I'm not misreading
this - I have to do this on a suboptimal device because of vacation -
then there's a behavioral change for what fields are initialized and
what additional fields (if any) are zeroed between a __WCHILDSIGINFO
request and a regular waitid() request. I'm a) not sure we want this and
b) we should document it.
> if (!user_write_access_begin(infop, sizeof(*infop)))
> return -EFAULT;
>
> diff --git a/kernel/signal.c b/kernel/signal.c
> index 9b04631acde8..41f6ba6b7aa7 100644
> --- a/kernel/signal.c
> +++ b/kernel/signal.c
> @@ -2825,6 +2825,10 @@ bool get_signal(struct ksignal *ksig)
> }
>
> fatal:
> + /* Allow siginfo to be queried until reaped. */
> + copy_siginfo(¤t->death_siginfo, &ksig->info);
> + current->last_siginfo = ¤t->death_siginfo;
> +
> spin_unlock_irq(&sighand->siglock);
> if (unlikely(cgroup_task_frozen(current)))
> cgroup_leave_frozen(true);
> --
> 2.30.2
>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2022-02-15 9:14 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-14 21:38 [PATCH 0/2] exit: Introduce __WCHILDSIGINFO for waitid Kees Cook
2022-02-14 21:38 ` [PATCH 1/2] " Kees Cook
2022-02-15 9:14 ` Christian Brauner
2022-02-14 21:38 ` [PATCH 2/2] selftests/seccomp: Check for waitid() behavior Kees Cook
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).