* [PATCH 1/2] exit: Introduce __WCHILDSIGINFO for waitid
2022-02-14 21:38 [PATCH 0/2] exit: Introduce __WCHILDSIGINFO for waitid Kees Cook
@ 2022-02-14 21:38 ` Kees Cook
2022-02-15 9:14 ` Christian Brauner
2022-02-14 21:38 ` [PATCH 2/2] selftests/seccomp: Check for waitid() behavior Kees Cook
1 sibling, 1 reply; 4+ messages in thread
From: Kees Cook @ 2022-02-14 21:38 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Kees Cook, Christian Brauner, Andy Lutomirski,
Robert Święcki, Jann Horn, Oleg Nesterov,
Thomas Gleixner, linux-api, Ingo Molnar, linux-kernel,
linux-hardening
When __WCHILDSIGINFO is specified, copy the child's siginfo into the
waitid infop instead of constructing the parent's SIGCHLD perspective.
Cc: Christian Brauner <brauner@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: "Robert Święcki" <robert@swiecki.net>
Cc: Jann Horn <jannh@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-api@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
include/linux/sched.h | 1 +
include/uapi/linux/wait.h | 1 +
kernel/exit.c | 23 +++++++++++++++++++----
kernel/signal.c | 4 ++++
4 files changed, 25 insertions(+), 4 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f5b2be39a78c..e40789e801ef 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1178,6 +1178,7 @@ struct task_struct {
#endif
/* Ptrace state: */
unsigned long ptrace_message;
+ kernel_siginfo_t death_siginfo;
kernel_siginfo_t *last_siginfo;
struct task_io_accounting ioac;
diff --git a/include/uapi/linux/wait.h b/include/uapi/linux/wait.h
index 85b809fc9f11..7258cd4510ba 100644
--- a/include/uapi/linux/wait.h
+++ b/include/uapi/linux/wait.h
@@ -9,6 +9,7 @@
#define WCONTINUED 0x00000008
#define WNOWAIT 0x01000000 /* Don't reap, just poll status. */
+#define __WCHILDSIGINFO 0x10000000 /* Report child's siginfo. */
#define __WNOTHREAD 0x20000000 /* Don't wait on children of other threads in this group */
#define __WALL 0x40000000 /* Wait on all children, regardless of type */
#define __WCLONE 0x80000000 /* Wait only on non-SIGCHLD children */
diff --git a/kernel/exit.c b/kernel/exit.c
index b00a25bb4ab9..de6e024976c6 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -953,6 +953,7 @@ struct waitid_info {
uid_t uid;
int status;
int cause;
+ kernel_siginfo_t siginfo;
};
struct wait_opts {
@@ -964,7 +965,7 @@ struct wait_opts {
int wo_stat;
struct rusage *wo_rusage;
- wait_queue_entry_t child_wait;
+ wait_queue_entry_t child_wait;
int notask_error;
};
@@ -1012,11 +1013,16 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
int state, status;
pid_t pid = task_pid_vnr(p);
uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
- struct waitid_info *infop;
+ struct waitid_info *infop = wo->wo_info;
if (!likely(wo->wo_flags & WEXITED))
return 0;
+ /* Before WNOWAIT so a copy can be extracted without reaping. */
+ if (unlikely(wo->wo_flags & __WCHILDSIGINFO)) {
+ if (infop && p->last_siginfo)
+ copy_siginfo(&infop->siginfo, p->last_siginfo);
+ }
if (unlikely(wo->wo_flags & WNOWAIT)) {
status = (p->signal->flags & SIGNAL_GROUP_EXIT)
? p->signal->group_exit_code : p->exit_code;
@@ -1121,7 +1127,6 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
release_task(p);
out_info:
- infop = wo->wo_info;
if (infop) {
if ((status & 0x7f) == 0) {
infop->cause = CLD_EXITED;
@@ -1564,7 +1569,7 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
unsigned int f_flags = 0;
if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED|
- __WNOTHREAD|__WCLONE|__WALL))
+ __WNOTHREAD|__WCLONE|__WALL|__WCHILDSIGINFO))
return -EINVAL;
if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))
return -EINVAL;
@@ -1637,6 +1642,10 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
if (!infop)
return err;
+ /* __WCHILDSIGINFO */
+ if (info.siginfo.si_signo)
+ return copy_siginfo_to_user(infop, &info.siginfo);
+
if (!user_write_access_begin(infop, sizeof(*infop)))
return -EFAULT;
@@ -1780,6 +1789,12 @@ COMPAT_SYSCALL_DEFINE5(waitid,
if (!infop)
return err;
+ /* __WCHILDSIGINFO */
+ if (info.siginfo.si_signo)
+ return copy_siginfo_to_user32(
+ (struct compat_siginfo __user *)infop,
+ &info.siginfo);
+
if (!user_write_access_begin(infop, sizeof(*infop)))
return -EFAULT;
diff --git a/kernel/signal.c b/kernel/signal.c
index 9b04631acde8..41f6ba6b7aa7 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2825,6 +2825,10 @@ bool get_signal(struct ksignal *ksig)
}
fatal:
+ /* Allow siginfo to be queried until reaped. */
+ copy_siginfo(¤t->death_siginfo, &ksig->info);
+ current->last_siginfo = ¤t->death_siginfo;
+
spin_unlock_irq(&sighand->siglock);
if (unlikely(cgroup_task_frozen(current)))
cgroup_leave_frozen(true);
--
2.30.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] selftests/seccomp: Check for waitid() behavior
2022-02-14 21:38 [PATCH 0/2] exit: Introduce __WCHILDSIGINFO for waitid Kees Cook
2022-02-14 21:38 ` [PATCH 1/2] " Kees Cook
@ 2022-02-14 21:38 ` Kees Cook
1 sibling, 0 replies; 4+ messages in thread
From: Kees Cook @ 2022-02-14 21:38 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Kees Cook, Christian Brauner, Andy Lutomirski,
Robert Święcki, Jann Horn, Oleg Nesterov,
Thomas Gleixner, Ingo Molnar, linux-kernel, linux-api,
linux-hardening
Verify we can fetch child's siginfo_t from waitid() with __WCHILDSIGINFO.
Skip if it's not available.
Signed-off-by: Kees Cook <keescook@chromium.org>
---
tools/testing/selftests/seccomp/seccomp_bpf.c | 130 ++++++++++++++++++
1 file changed, 130 insertions(+)
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 9d126d7fabdb..0c803c2b450e 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -268,6 +268,14 @@ struct seccomp_notif_addfd_big {
#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
#endif
+#ifndef SYS_SECCOMP
+#define SYS_SECCOMP 1
+#endif
+
+#ifndef __WCHILDSIGINFO
+#define __WCHILDSIGINFO 0x10000000
+#endif
+
#ifndef seccomp
int seccomp(unsigned int op, unsigned int flags, void *args)
{
@@ -765,6 +773,128 @@ TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
close(fd);
}
+FIXTURE(SIGINFO) {
+ pid_t child_pid;
+};
+
+FIXTURE_SETUP(SIGINFO)
+{
+ self->child_pid = 0;
+}
+
+FIXTURE_TEARDOWN(SIGINFO)
+{
+ if (self->child_pid > 0)
+ waitpid(self->child_pid, NULL, WNOHANG);
+}
+
+TEST_F(SIGINFO, child)
+{
+ int status;
+ siginfo_t info = { };
+ /* Kill only when calling __NR_prctl. */
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS | 0xBA),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ self->child_pid = fork();
+ ASSERT_LE(0, self->child_pid);
+ if (self->child_pid == 0) {
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+ ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
+ prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+ /* Should have died now. */
+ _exit(37);
+ }
+
+ /* Check siginfo_t contents. */
+ EXPECT_EQ(waitid(P_PID, self->child_pid, &info, WEXITED | WNOWAIT), 0);
+#if 0
+ struct {
+ int si_signo;
+ int si_code;
+ int si_errno;
+ union __sifields _sifields;
+ }
+
+ /* SIGCHLD */
+ struct {
+ __kernel_pid_t _pid; /* which child */
+ __kernel_uid32_t _uid; /* sender's uid */
+ int _status; /* exit code */
+ __ARCH_SI_CLOCK_T _utime;
+ __ARCH_SI_CLOCK_T _stime;
+ } _sigchld;
+#endif
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ EXPECT_TRUE(info.si_code == CLD_KILLED || info.si_code == CLD_DUMPED);
+ EXPECT_TRUE(info.si_errno == 0);
+ EXPECT_EQ(info.si_pid, self->child_pid);
+
+ ASSERT_TRUE(WIFSIGNALED(info.si_status));
+ /* TODO: why doesn't this WCOREDUMP() agree with below? */
+ /* EXPECT_TRUE(WCOREDUMP(status)); */
+ EXPECT_EQ(WTERMSIG(info.si_status), SIGSYS);
+
+
+ memset(&info, 0, sizeof(info));
+ status = waitid(P_PID, self->child_pid, &info,
+ WEXITED | WNOWAIT | __WCHILDSIGINFO);
+ EXPECT_EQ(status, 0) {
+ if (status < 0 && errno == EINVAL)
+ SKIP(goto skip_siginfo, "Kernel does not support waitid() with __WCHILDSIGINFO");
+ }
+#if 0
+ /* SIGSYS */
+ struct {
+ void __user *_call_addr;/* calling user insn */
+ int _syscall; /* triggering system call number */
+ unsigned int _arch; /* AUDIT_ARCH_* of syscall */
+ } _sigsys;
+
+ info.si_signo = SIGSYS;
+ info.si_code = SYS_SECCOMP;
+ info.si_call_addr = (void __user *)KSTK_EIP(current);
+ info.si_errno = reason;
+ info.si_arch = syscall_get_arch(current);
+ info.si_syscall = syscall;
+
+#endif
+ ASSERT_EQ(info.si_signo, SIGSYS);
+ EXPECT_EQ(info.si_code, SYS_SECCOMP);
+ /* EXPECT_EQ(info.si_arch, ...native arch...); */
+ EXPECT_EQ(info.si_syscall, __NR_prctl);
+ /*
+ * The syscall will have happened somewhere near the libc
+ * prctl implementation.
+ */
+ EXPECT_TRUE(info.si_call_addr >= (void *)prctl &&
+ info.si_call_addr <= (void *)prctl + PAGE_SIZE) {
+ TH_LOG("info.si_call_addr: %p", info.si_call_addr);
+ TH_LOG("prctl : %p", prctl);
+ }
+ EXPECT_EQ(info.si_errno, 0xBA);
+
+skip_siginfo:
+ /* Check status contents. */
+ ASSERT_EQ(waitpid(self->child_pid, &status, 0), self->child_pid);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ /* TODO: why doesn't this WCOREDUMP() agree with above? */
+ /* EXPECT_TRUE(WCOREDUMP(status)); */
+ EXPECT_EQ(WTERMSIG(status), SIGSYS);
+ self->child_pid = 0;
+}
+
/* This is a thread task to die via seccomp filter violation. */
void *kill_thread(void *data)
{
--
2.30.2
^ permalink raw reply related [flat|nested] 4+ messages in thread