linux-hardening.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] exit: Introduce __WCHILDSIGINFO for waitid
@ 2022-02-14 21:38 Kees Cook
  2022-02-14 21:38 ` [PATCH 1/2] " Kees Cook
  2022-02-14 21:38 ` [PATCH 2/2] selftests/seccomp: Check for waitid() behavior Kees Cook
  0 siblings, 2 replies; 4+ messages in thread
From: Kees Cook @ 2022-02-14 21:38 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Kees Cook, Christian Brauner, Andy Lutomirski,
	Robert Święcki, Jann Horn, Oleg Nesterov,
	Thomas Gleixner, Ingo Molnar, linux-kernel, linux-api,
	linux-hardening

Hi,

Okay, here's a working version of this. Is adding 48 bytes into task
struct worth it? Can this be improved, and is the non-signal-exit logic
for __WCHILDSIGINFO sane?

Other thoughts?

-Kees

Kees Cook (2):
  exit: Introduce __WCHILDSIGINFO for waitid
  selftests/seccomp: Check for waitid() behavior

 include/linux/sched.h                         |   1 +
 include/uapi/linux/wait.h                     |   1 +
 kernel/exit.c                                 |  23 +++-
 kernel/signal.c                               |   4 +
 tools/testing/selftests/seccomp/seccomp_bpf.c | 130 ++++++++++++++++++
 5 files changed, 155 insertions(+), 4 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/2] exit: Introduce __WCHILDSIGINFO for waitid
  2022-02-14 21:38 [PATCH 0/2] exit: Introduce __WCHILDSIGINFO for waitid Kees Cook
@ 2022-02-14 21:38 ` Kees Cook
  2022-02-15  9:14   ` Christian Brauner
  2022-02-14 21:38 ` [PATCH 2/2] selftests/seccomp: Check for waitid() behavior Kees Cook
  1 sibling, 1 reply; 4+ messages in thread
From: Kees Cook @ 2022-02-14 21:38 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Kees Cook, Christian Brauner, Andy Lutomirski,
	Robert Święcki, Jann Horn, Oleg Nesterov,
	Thomas Gleixner, linux-api, Ingo Molnar, linux-kernel,
	linux-hardening

When __WCHILDSIGINFO is specified, copy the child's siginfo into the
waitid infop instead of constructing the parent's SIGCHLD perspective.

Cc: Christian Brauner <brauner@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: "Robert Święcki" <robert@swiecki.net>
Cc: Jann Horn <jannh@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-api@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/sched.h     |  1 +
 include/uapi/linux/wait.h |  1 +
 kernel/exit.c             | 23 +++++++++++++++++++----
 kernel/signal.c           |  4 ++++
 4 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f5b2be39a78c..e40789e801ef 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1178,6 +1178,7 @@ struct task_struct {
 #endif
 	/* Ptrace state: */
 	unsigned long			ptrace_message;
+	kernel_siginfo_t		death_siginfo;
 	kernel_siginfo_t		*last_siginfo;
 
 	struct task_io_accounting	ioac;
diff --git a/include/uapi/linux/wait.h b/include/uapi/linux/wait.h
index 85b809fc9f11..7258cd4510ba 100644
--- a/include/uapi/linux/wait.h
+++ b/include/uapi/linux/wait.h
@@ -9,6 +9,7 @@
 #define WCONTINUED	0x00000008
 #define WNOWAIT		0x01000000	/* Don't reap, just poll status.  */
 
+#define __WCHILDSIGINFO	0x10000000	/* Report child's siginfo. */
 #define __WNOTHREAD	0x20000000	/* Don't wait on children of other threads in this group */
 #define __WALL		0x40000000	/* Wait on all children, regardless of type */
 #define __WCLONE	0x80000000	/* Wait only on non-SIGCHLD children */
diff --git a/kernel/exit.c b/kernel/exit.c
index b00a25bb4ab9..de6e024976c6 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -953,6 +953,7 @@ struct waitid_info {
 	uid_t uid;
 	int status;
 	int cause;
+	kernel_siginfo_t siginfo;
 };
 
 struct wait_opts {
@@ -964,7 +965,7 @@ struct wait_opts {
 	int			wo_stat;
 	struct rusage		*wo_rusage;
 
-	wait_queue_entry_t		child_wait;
+	wait_queue_entry_t	child_wait;
 	int			notask_error;
 };
 
@@ -1012,11 +1013,16 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 	int state, status;
 	pid_t pid = task_pid_vnr(p);
 	uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
-	struct waitid_info *infop;
+	struct waitid_info *infop = wo->wo_info;
 
 	if (!likely(wo->wo_flags & WEXITED))
 		return 0;
 
+	/* Before WNOWAIT so a copy can be extracted without reaping. */
+	if (unlikely(wo->wo_flags & __WCHILDSIGINFO)) {
+		if (infop && p->last_siginfo)
+			copy_siginfo(&infop->siginfo, p->last_siginfo);
+	}
 	if (unlikely(wo->wo_flags & WNOWAIT)) {
 		status = (p->signal->flags & SIGNAL_GROUP_EXIT)
 			? p->signal->group_exit_code : p->exit_code;
@@ -1121,7 +1127,6 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		release_task(p);
 
 out_info:
-	infop = wo->wo_info;
 	if (infop) {
 		if ((status & 0x7f) == 0) {
 			infop->cause = CLD_EXITED;
@@ -1564,7 +1569,7 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
 	unsigned int f_flags = 0;
 
 	if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED|
-			__WNOTHREAD|__WCLONE|__WALL))
+			__WNOTHREAD|__WCLONE|__WALL|__WCHILDSIGINFO))
 		return -EINVAL;
 	if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))
 		return -EINVAL;
@@ -1637,6 +1642,10 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
 	if (!infop)
 		return err;
 
+	/* __WCHILDSIGINFO */
+	if (info.siginfo.si_signo)
+		return copy_siginfo_to_user(infop, &info.siginfo);
+
 	if (!user_write_access_begin(infop, sizeof(*infop)))
 		return -EFAULT;
 
@@ -1780,6 +1789,12 @@ COMPAT_SYSCALL_DEFINE5(waitid,
 	if (!infop)
 		return err;
 
+	/* __WCHILDSIGINFO */
+	if (info.siginfo.si_signo)
+		return copy_siginfo_to_user32(
+				(struct compat_siginfo __user *)infop,
+				&info.siginfo);
+
 	if (!user_write_access_begin(infop, sizeof(*infop)))
 		return -EFAULT;
 
diff --git a/kernel/signal.c b/kernel/signal.c
index 9b04631acde8..41f6ba6b7aa7 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2825,6 +2825,10 @@ bool get_signal(struct ksignal *ksig)
 		}
 
 	fatal:
+		/* Allow siginfo to be queried until reaped. */
+		copy_siginfo(&current->death_siginfo, &ksig->info);
+		current->last_siginfo = &current->death_siginfo;
+
 		spin_unlock_irq(&sighand->siglock);
 		if (unlikely(cgroup_task_frozen(current)))
 			cgroup_leave_frozen(true);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2] selftests/seccomp: Check for waitid() behavior
  2022-02-14 21:38 [PATCH 0/2] exit: Introduce __WCHILDSIGINFO for waitid Kees Cook
  2022-02-14 21:38 ` [PATCH 1/2] " Kees Cook
@ 2022-02-14 21:38 ` Kees Cook
  1 sibling, 0 replies; 4+ messages in thread
From: Kees Cook @ 2022-02-14 21:38 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Kees Cook, Christian Brauner, Andy Lutomirski,
	Robert Święcki, Jann Horn, Oleg Nesterov,
	Thomas Gleixner, Ingo Molnar, linux-kernel, linux-api,
	linux-hardening

Verify we can fetch child's siginfo_t from waitid() with __WCHILDSIGINFO.
Skip if it's not available.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 130 ++++++++++++++++++
 1 file changed, 130 insertions(+)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 9d126d7fabdb..0c803c2b450e 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -268,6 +268,14 @@ struct seccomp_notif_addfd_big {
 #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
 #endif
 
+#ifndef SYS_SECCOMP
+#define SYS_SECCOMP	1
+#endif
+
+#ifndef __WCHILDSIGINFO
+#define __WCHILDSIGINFO	0x10000000
+#endif
+
 #ifndef seccomp
 int seccomp(unsigned int op, unsigned int flags, void *args)
 {
@@ -765,6 +773,128 @@ TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
 	close(fd);
 }
 
+FIXTURE(SIGINFO) {
+	pid_t child_pid;
+};
+
+FIXTURE_SETUP(SIGINFO)
+{
+	self->child_pid = 0;
+}
+
+FIXTURE_TEARDOWN(SIGINFO)
+{
+	if (self->child_pid > 0)
+		waitpid(self->child_pid, NULL, WNOHANG);
+}
+
+TEST_F(SIGINFO, child)
+{
+	int status;
+	siginfo_t info = { };
+	/* Kill only when calling __NR_prctl. */
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS | 0xBA),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+
+	self->child_pid = fork();
+	ASSERT_LE(0, self->child_pid);
+	if (self->child_pid == 0) {
+		ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+			TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+		}
+		ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
+		prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+		/* Should have died now. */
+		_exit(37);
+	}
+
+	/* Check siginfo_t contents. */
+	EXPECT_EQ(waitid(P_PID, self->child_pid, &info, WEXITED | WNOWAIT), 0);
+#if 0
+	struct {
+		int si_signo;
+		int si_code;
+		int si_errno;
+		union __sifields _sifields;
+	}
+
+	/* SIGCHLD */
+	struct {
+		__kernel_pid_t _pid;	/* which child */
+		__kernel_uid32_t _uid;	/* sender's uid */
+		int _status;		/* exit code */
+		__ARCH_SI_CLOCK_T _utime;
+		__ARCH_SI_CLOCK_T _stime;
+	} _sigchld;
+#endif
+	ASSERT_EQ(info.si_signo, SIGCHLD);
+	EXPECT_TRUE(info.si_code == CLD_KILLED || info.si_code == CLD_DUMPED);
+	EXPECT_TRUE(info.si_errno == 0);
+	EXPECT_EQ(info.si_pid, self->child_pid);
+
+	ASSERT_TRUE(WIFSIGNALED(info.si_status));
+	/* TODO: why doesn't this WCOREDUMP() agree with below? */
+	/* EXPECT_TRUE(WCOREDUMP(status)); */
+	EXPECT_EQ(WTERMSIG(info.si_status), SIGSYS);
+
+
+	memset(&info, 0, sizeof(info));
+	status = waitid(P_PID, self->child_pid, &info,
+			WEXITED | WNOWAIT | __WCHILDSIGINFO);
+	EXPECT_EQ(status, 0) {
+		if (status < 0 && errno == EINVAL)
+			SKIP(goto skip_siginfo, "Kernel does not support waitid() with __WCHILDSIGINFO");
+	}
+#if 0
+	/* SIGSYS */
+	struct {
+		void __user *_call_addr;/* calling user insn */
+		int _syscall;		/* triggering system call number */
+		unsigned int _arch;	/* AUDIT_ARCH_* of syscall */
+	} _sigsys;
+
+	info.si_signo = SIGSYS;
+	info.si_code = SYS_SECCOMP;
+	info.si_call_addr = (void __user *)KSTK_EIP(current);
+	info.si_errno = reason;
+	info.si_arch = syscall_get_arch(current);
+	info.si_syscall = syscall;
+
+#endif
+	ASSERT_EQ(info.si_signo, SIGSYS);
+	EXPECT_EQ(info.si_code, SYS_SECCOMP);
+	/* EXPECT_EQ(info.si_arch, ...native arch...); */
+	EXPECT_EQ(info.si_syscall, __NR_prctl);
+	/*
+	 * The syscall will have happened somewhere near the libc
+	 * prctl implementation.
+	 */
+	EXPECT_TRUE(info.si_call_addr >= (void *)prctl &&
+		    info.si_call_addr <= (void *)prctl + PAGE_SIZE) {
+		TH_LOG("info.si_call_addr: %p", info.si_call_addr);
+		TH_LOG("prctl            : %p", prctl);
+	}
+	EXPECT_EQ(info.si_errno, 0xBA);
+
+skip_siginfo:
+	/* Check status contents. */
+	ASSERT_EQ(waitpid(self->child_pid, &status, 0), self->child_pid);
+	ASSERT_TRUE(WIFSIGNALED(status));
+	/* TODO: why doesn't this WCOREDUMP() agree with above? */
+	/* EXPECT_TRUE(WCOREDUMP(status)); */
+	EXPECT_EQ(WTERMSIG(status), SIGSYS);
+	self->child_pid = 0;
+}
+
 /* This is a thread task to die via seccomp filter violation. */
 void *kill_thread(void *data)
 {
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/2] exit: Introduce __WCHILDSIGINFO for waitid
  2022-02-14 21:38 ` [PATCH 1/2] " Kees Cook
@ 2022-02-15  9:14   ` Christian Brauner
  0 siblings, 0 replies; 4+ messages in thread
From: Christian Brauner @ 2022-02-15  9:14 UTC (permalink / raw)
  To: Kees Cook
  Cc: Eric W. Biederman, Andy Lutomirski, Robert Święcki,
	Jann Horn, Oleg Nesterov, Thomas Gleixner, linux-api,
	Ingo Molnar, linux-kernel, linux-hardening

On Mon, Feb 14, 2022 at 01:38:22PM -0800, Kees Cook wrote:
> When __WCHILDSIGINFO is specified, copy the child's siginfo into the
> waitid infop instead of constructing the parent's SIGCHLD perspective.
> 
> Cc: Christian Brauner <brauner@kernel.org>
> Cc: "Eric W. Biederman" <ebiederm@xmission.com>
> Cc: Andy Lutomirski <luto@amacapital.net>
> Cc: "Robert Święcki" <robert@swiecki.net>
> Cc: Jann Horn <jannh@google.com>
> Cc: Oleg Nesterov <oleg@redhat.com>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: linux-api@vger.kernel.org
> Signed-off-by: Kees Cook <keescook@chromium.org>
> ---
>  include/linux/sched.h     |  1 +
>  include/uapi/linux/wait.h |  1 +
>  kernel/exit.c             | 23 +++++++++++++++++++----
>  kernel/signal.c           |  4 ++++
>  4 files changed, 25 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index f5b2be39a78c..e40789e801ef 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1178,6 +1178,7 @@ struct task_struct {
>  #endif
>  	/* Ptrace state: */
>  	unsigned long			ptrace_message;
> +	kernel_siginfo_t		death_siginfo;
>  	kernel_siginfo_t		*last_siginfo;
>  
>  	struct task_io_accounting	ioac;
> diff --git a/include/uapi/linux/wait.h b/include/uapi/linux/wait.h
> index 85b809fc9f11..7258cd4510ba 100644
> --- a/include/uapi/linux/wait.h
> +++ b/include/uapi/linux/wait.h
> @@ -9,6 +9,7 @@
>  #define WCONTINUED	0x00000008
>  #define WNOWAIT		0x01000000	/* Don't reap, just poll status.  */
>  
> +#define __WCHILDSIGINFO	0x10000000	/* Report child's siginfo. */
>  #define __WNOTHREAD	0x20000000	/* Don't wait on children of other threads in this group */
>  #define __WALL		0x40000000	/* Wait on all children, regardless of type */
>  #define __WCLONE	0x80000000	/* Wait only on non-SIGCHLD children */
> diff --git a/kernel/exit.c b/kernel/exit.c
> index b00a25bb4ab9..de6e024976c6 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -953,6 +953,7 @@ struct waitid_info {
>  	uid_t uid;
>  	int status;
>  	int cause;
> +	kernel_siginfo_t siginfo;

Heh, I once got yelled at because I put siginfo in there... :)
If this time it's acceptable it might be worth considering replacing
some of the duplicated fields if possible. But again, on a suboptimal
device because of vacation.

>  };
>  
>  struct wait_opts {
> @@ -964,7 +965,7 @@ struct wait_opts {
>  	int			wo_stat;
>  	struct rusage		*wo_rusage;
>  
> -	wait_queue_entry_t		child_wait;
> +	wait_queue_entry_t	child_wait;
>  	int			notask_error;
>  };
>  
> @@ -1012,11 +1013,16 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
>  	int state, status;
>  	pid_t pid = task_pid_vnr(p);
>  	uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
> -	struct waitid_info *infop;
> +	struct waitid_info *infop = wo->wo_info;
>  
>  	if (!likely(wo->wo_flags & WEXITED))
>  		return 0;
>  
> +	/* Before WNOWAIT so a copy can be extracted without reaping. */
> +	if (unlikely(wo->wo_flags & __WCHILDSIGINFO)) {
> +		if (infop && p->last_siginfo)
> +			copy_siginfo(&infop->siginfo, p->last_siginfo);
> +	}
>  	if (unlikely(wo->wo_flags & WNOWAIT)) {
>  		status = (p->signal->flags & SIGNAL_GROUP_EXIT)
>  			? p->signal->group_exit_code : p->exit_code;
> @@ -1121,7 +1127,6 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
>  		release_task(p);
>  
>  out_info:
> -	infop = wo->wo_info;
>  	if (infop) {
>  		if ((status & 0x7f) == 0) {
>  			infop->cause = CLD_EXITED;
> @@ -1564,7 +1569,7 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
>  	unsigned int f_flags = 0;
>  
>  	if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED|
> -			__WNOTHREAD|__WCLONE|__WALL))
> +			__WNOTHREAD|__WCLONE|__WALL|__WCHILDSIGINFO))
>  		return -EINVAL;
>  	if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))
>  		return -EINVAL;
> @@ -1637,6 +1642,10 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
>  	if (!infop)
>  		return err;
>  
> +	/* __WCHILDSIGINFO */
> +	if (info.siginfo.si_signo)
> +		return copy_siginfo_to_user(infop, &info.siginfo);
> +
>  	if (!user_write_access_begin(infop, sizeof(*infop)))
>  		return -EFAULT;
>  
> @@ -1780,6 +1789,12 @@ COMPAT_SYSCALL_DEFINE5(waitid,
>  	if (!infop)
>  		return err;
>  
> +	/* __WCHILDSIGINFO */
> +	if (info.siginfo.si_signo)
> +		return copy_siginfo_to_user32(
> +				(struct compat_siginfo __user *)infop,
> +				&info.siginfo);
> +

Just to repeat what I said in the earlier thread: if I'm not misreading
this - I have to do this on a suboptimal device because of vacation -
then there's a behavioral change for what fields are initialized and
what additional fields (if any) are zeroed between a __WCHILDSIGINFO
request and a regular waitid() request. I'm a) not sure we want this and
b) we should document it.

>  	if (!user_write_access_begin(infop, sizeof(*infop)))
>  		return -EFAULT;
>  
> diff --git a/kernel/signal.c b/kernel/signal.c
> index 9b04631acde8..41f6ba6b7aa7 100644
> --- a/kernel/signal.c
> +++ b/kernel/signal.c
> @@ -2825,6 +2825,10 @@ bool get_signal(struct ksignal *ksig)
>  		}
>  
>  	fatal:
> +		/* Allow siginfo to be queried until reaped. */
> +		copy_siginfo(&current->death_siginfo, &ksig->info);
> +		current->last_siginfo = &current->death_siginfo;
> +
>  		spin_unlock_irq(&sighand->siglock);
>  		if (unlikely(cgroup_task_frozen(current)))
>  			cgroup_leave_frozen(true);
> -- 
> 2.30.2
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-02-15  9:14 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-14 21:38 [PATCH 0/2] exit: Introduce __WCHILDSIGINFO for waitid Kees Cook
2022-02-14 21:38 ` [PATCH 1/2] " Kees Cook
2022-02-15  9:14   ` Christian Brauner
2022-02-14 21:38 ` [PATCH 2/2] selftests/seccomp: Check for waitid() behavior Kees Cook

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).