All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tycho Andersen <tycho@tycho.ws>
To: Kees Cook <keescook@chromium.org>
Cc: linux-kernel@vger.kernel.org,
	containers@lists.linux-foundation.org, linux-api@vger.kernel.org,
	Andy Lutomirski <luto@amacapital.net>,
	Oleg Nesterov <oleg@redhat.com>,
	"Eric W . Biederman" <ebiederm@xmission.com>,
	"Serge E . Hallyn" <serge@hallyn.com>,
	Christian Brauner <christian.brauner@ubuntu.com>,
	Tyler Hicks <tyhicks@canonical.com>,
	Akihiro Suda <suda.akihiro@lab.ntt.co.jp>,
	Jann Horn <jannh@google.com>,
	linux-fsdevel@vger.kernel.org, Tycho Andersen <tycho@tycho.ws>
Subject: [PATCH v7 6/6] samples: add an example of seccomp user trap
Date: Thu, 27 Sep 2018 09:11:19 -0600	[thread overview]
Message-ID: <20180927151119.9989-7-tycho@tycho.ws> (raw)
In-Reply-To: <20180927151119.9989-1-tycho@tycho.ws>

The idea here is just to give a demonstration of how one could safely use
the SECCOMP_RET_USER_NOTIF feature to do mount policies. This particular
policy is (as noted in the comment) not very interesting, but it serves to
illustrate how one might apply a policy dodging the various TOCTOU issues.

v5: new in v5
v7: updates for v7 API changes

Signed-off-by: Tycho Andersen <tycho@tycho.ws>
CC: Kees Cook <keescook@chromium.org>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Oleg Nesterov <oleg@redhat.com>
CC: Eric W. Biederman <ebiederm@xmission.com>
CC: "Serge E. Hallyn" <serge@hallyn.com>
CC: Christian Brauner <christian.brauner@ubuntu.com>
CC: Tyler Hicks <tyhicks@canonical.com>
CC: Akihiro Suda <suda.akihiro@lab.ntt.co.jp>
---
 samples/seccomp/.gitignore  |   1 +
 samples/seccomp/Makefile    |   7 +-
 samples/seccomp/user-trap.c | 312 ++++++++++++++++++++++++++++++++++++
 3 files changed, 319 insertions(+), 1 deletion(-)

diff --git a/samples/seccomp/.gitignore b/samples/seccomp/.gitignore
index 78fb78184291..d1e2e817d556 100644
--- a/samples/seccomp/.gitignore
+++ b/samples/seccomp/.gitignore
@@ -1,3 +1,4 @@
 bpf-direct
 bpf-fancy
 dropper
+user-trap
diff --git a/samples/seccomp/Makefile b/samples/seccomp/Makefile
index cf34ff6b4065..4920903c8009 100644
--- a/samples/seccomp/Makefile
+++ b/samples/seccomp/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 ifndef CROSS_COMPILE
-hostprogs-$(CONFIG_SAMPLE_SECCOMP) := bpf-fancy dropper bpf-direct
+hostprogs-$(CONFIG_SAMPLE_SECCOMP) := bpf-fancy dropper bpf-direct user-trap
 
 HOSTCFLAGS_bpf-fancy.o += -I$(objtree)/usr/include
 HOSTCFLAGS_bpf-fancy.o += -idirafter $(objtree)/include
@@ -16,6 +16,10 @@ HOSTCFLAGS_bpf-direct.o += -I$(objtree)/usr/include
 HOSTCFLAGS_bpf-direct.o += -idirafter $(objtree)/include
 bpf-direct-objs := bpf-direct.o
 
+HOSTCFLAGS_user-trap.o += -I$(objtree)/usr/include
+HOSTCFLAGS_user-trap.o += -idirafter $(objtree)/include
+user-trap-objs := user-trap.o
+
 # Try to match the kernel target.
 ifndef CONFIG_64BIT
 
@@ -33,6 +37,7 @@ HOSTCFLAGS_bpf-fancy.o += $(MFLAG)
 HOSTLDLIBS_bpf-direct += $(MFLAG)
 HOSTLDLIBS_bpf-fancy += $(MFLAG)
 HOSTLDLIBS_dropper += $(MFLAG)
+HOSTLDLIBS_user-trap += $(MFLAG)
 endif
 always := $(hostprogs-m)
 endif
diff --git a/samples/seccomp/user-trap.c b/samples/seccomp/user-trap.c
new file mode 100644
index 000000000000..63c9a5994dc1
--- /dev/null
+++ b/samples/seccomp/user-trap.c
@@ -0,0 +1,312 @@
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stddef.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/user.h>
+#include <sys/ioctl.h>
+#include <sys/ptrace.h>
+#include <sys/mount.h>
+#include <linux/limits.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+
+/*
+ * Because of some grossness, we can't include linux/ptrace.h here, so we
+ * re-define PTRACE_SECCOMP_NEW_LISTENER.
+ */
+#ifndef PTRACE_SECCOMP_NEW_LISTENER
+#define PTRACE_SECCOMP_NEW_LISTENER	0x420e
+#endif
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+
+static int seccomp(unsigned int op, unsigned int flags, void *args)
+{
+	errno = 0;
+	return syscall(__NR_seccomp, op, flags, args);
+}
+
+static int user_trap_syscall(int nr, unsigned int flags)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
+		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
+		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
+	};
+
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+
+	return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
+}
+
+static int handle_req(struct seccomp_notif *req,
+		      struct seccomp_notif_resp *resp, int listener)
+{
+	char path[PATH_MAX], source[PATH_MAX], target[PATH_MAX];
+	int ret = -1, mem;
+
+	resp->len = sizeof(*resp);
+	resp->id = req->id;
+	resp->error = -EPERM;
+	resp->val = 0;
+
+	if (req->data.nr != __NR_mount) {
+		fprintf(stderr, "huh? trapped something besides mknod? %d\n", req->data.nr);
+		return -1;
+	}
+
+	/* Only allow bind mounts. */
+	if (!(req->data.args[3] & MS_BIND))
+		return 0;
+
+	/*
+	 * Ok, let's read the task's memory to see where they wanted their
+	 * mount to go.
+	 */
+	snprintf(path, sizeof(path), "/proc/%d/mem", req->pid);
+	mem = open(path, O_RDONLY);
+	if (mem < 0) {
+		perror("open mem");
+		return -1;
+	}
+
+	/*
+	 * Now we avoid a TOCTOU: we referred to a pid by its pid, but since
+	 * the pid that made the syscall may have died, we need to confirm that
+	 * the pid is still valid after we open its /proc/pid/mem file. We can
+	 * ask the listener fd this as follows.
+	 *
+	 * Note that this check should occur *after* any task-specific
+	 * resources are opened, to make sure that the task has not died and
+	 * we're not wrongly reading someone else's state in order to make
+	 * decisions.
+	 */
+	if (ioctl(listener, SECCOMP_NOTIF_ID_VALID, &req->id) < 0) {
+		fprintf(stderr, "task died before we could map its memory\n");
+		goto out;
+	}
+
+	/*
+	 * Phew, we've got the right /proc/pid/mem. Now we can read it. Note
+	 * that to avoid another TOCTOU, we should read all of the pointer args
+	 * before we decide to allow the syscall.
+	 */
+	if (lseek(mem, req->data.args[0], SEEK_SET) < 0) {
+		perror("seek");
+		goto out;
+	}
+
+	ret = read(mem, source, sizeof(source));
+	if (ret < 0) {
+		perror("read");
+		goto out;
+	}
+
+	if (lseek(mem, req->data.args[1], SEEK_SET) < 0) {
+		perror("seek");
+		goto out;
+	}
+
+	ret = read(mem, target, sizeof(target));
+	if (ret < 0) {
+		perror("read");
+		goto out;
+	}
+
+	/*
+	 * Our policy is to only allow bind mounts inside /tmp. This isn't very
+	 * interesting, because we could do unprivlieged bind mounts with user
+	 * namespaces already, but you get the idea.
+	 */
+	if (!strncmp(source, "/tmp", 4) && !strncmp(target, "/tmp", 4)) {
+		if (mount(source, target, NULL, req->data.args[3], NULL) < 0) {
+			ret = -1;
+			perror("actual mount");
+			goto out;
+		}
+		resp->error = 0;
+	}
+
+	/* Even if we didn't allow it because of policy, generating the
+	 * response was be a success, because we want to tell the worker EPERM.
+	 */
+	ret = 0;
+
+out:
+	close(mem);
+	return ret;
+}
+
+int main(void)
+{
+	int sk_pair[2], ret = 1, status, listener;
+	pid_t worker = 0 , tracer = 0;
+	char c;
+
+	if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair) < 0) {
+		perror("socketpair");
+		return 1;
+	}
+
+	worker = fork();
+	if (worker < 0) {
+		perror("fork");
+		goto close_pair;
+	}
+
+	if (worker == 0) {
+		if (user_trap_syscall(__NR_mount, 0) < 0) {
+			perror("seccomp");
+			exit(1);
+		}
+
+		if (setuid(1000) < 0) {
+			perror("setuid");
+			exit(1);
+		}
+
+		if (write(sk_pair[1], "a", 1) != 1) {
+			perror("write");
+			exit(1);
+		}
+
+		if (read(sk_pair[1], &c, 1) != 1) {
+			perror("write");
+			exit(1);
+		}
+
+		if (mkdir("/tmp/foo", 0755) < 0) {
+			perror("mkdir");
+			exit(1);
+		}
+
+		if (mount("/dev/sda", "/tmp/foo", NULL, 0, NULL) != -1) {
+			fprintf(stderr, "huh? mounted /dev/sda?\n");
+			exit(1);
+		}
+
+		if (errno != EPERM) {
+			perror("bad error from mount");
+			exit(1);
+		}
+
+		if (mount("/tmp/foo", "/tmp/foo", NULL, MS_BIND, NULL) < 0) {
+			perror("mount");
+			exit(1);
+		}
+
+		exit(0);
+	}
+
+	if (read(sk_pair[0], &c, 1) != 1) {
+		perror("read ready signal");
+		goto out_kill;
+	}
+
+	if (ptrace(PTRACE_ATTACH, worker) < 0) {
+		perror("ptrace");
+		goto out_kill;
+	}
+
+	if (waitpid(worker, NULL, 0) != worker) {
+		perror("waitpid");
+		goto out_kill;
+	}
+
+	listener = ptrace(PTRACE_SECCOMP_NEW_LISTENER, worker, 0);
+	if (listener < 0) {
+		perror("ptrace get listener");
+		goto out_kill;
+	}
+
+	if (ptrace(PTRACE_DETACH, worker, NULL, 0) < 0) {
+		perror("ptrace detach");
+		goto out_kill;
+	}
+
+	if (write(sk_pair[0], "a", 1) != 1) {
+		perror("write");
+		exit(1);
+	}
+
+	tracer = fork();
+	if (tracer < 0) {
+		perror("fork");
+		goto out_kill;
+	}
+
+	if (tracer == 0) {
+		while (1) {
+			struct seccomp_notif req = {};
+			struct seccomp_notif_resp resp = {};
+
+			req.len = sizeof(req);
+			if (ioctl(listener, SECCOMP_NOTIF_RECV, &req) != sizeof(req)) {
+				perror("ioctl recv");
+				goto out_close;
+			}
+
+			if (handle_req(&req, &resp, listener) < 0)
+				goto out_close;
+
+			if (ioctl(listener, SECCOMP_NOTIF_SEND, &resp) != sizeof(resp)) {
+				perror("ioctl send");
+				goto out_close;
+			}
+		}
+out_close:
+		close(listener);
+		exit(1);
+	}
+
+	close(listener);
+
+	if (waitpid(worker, &status, 0) != worker) {
+		perror("waitpid");
+		goto out_kill;
+	}
+
+	if (umount2("/tmp/foo", MNT_DETACH) < 0 && errno != EINVAL) {
+		perror("umount2");
+		goto out_kill;
+	}
+
+	if (remove("/tmp/foo") < 0 && errno != ENOENT) {
+		perror("remove");
+		exit(1);
+	}
+
+	if (!WIFEXITED(status) || WEXITSTATUS(status)) {
+		fprintf(stderr, "worker exited nonzero\n");
+		goto out_kill;
+	}
+
+	ret = 0;
+
+out_kill:
+	if (tracer > 0)
+		kill(tracer, SIGKILL);
+	if (worker > 0)
+		kill(worker, SIGKILL);
+
+close_pair:
+	close(sk_pair[0]);
+	close(sk_pair[1]);
+	return ret;
+}
-- 
2.17.1


  parent reply	other threads:[~2018-09-27 15:11 UTC|newest]

Thread overview: 95+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-27 15:11 [PATCH v7 0/6] seccomp trap to userspace Tycho Andersen
2018-09-27 15:11 ` [PATCH v7 1/6] seccomp: add a return code to " Tycho Andersen
2018-09-27 21:31   ` Kees Cook
2018-09-27 22:48     ` Tycho Andersen
2018-09-27 22:48       ` Tycho Andersen
2018-09-27 23:10       ` Kees Cook
2018-09-28 14:39         ` Tycho Andersen
2018-10-08 14:58       ` Christian Brauner
2018-10-09 14:28         ` Tycho Andersen
2018-10-09 16:24           ` Christian Brauner
2018-10-09 16:29             ` Tycho Andersen
2018-10-17 20:29     ` Tycho Andersen
2018-10-17 22:21       ` Kees Cook
2018-10-17 22:33         ` Tycho Andersen
2018-10-21 16:04         ` Tycho Andersen
2018-10-22  9:42           ` Christian Brauner
2018-09-27 21:51   ` Jann Horn
2018-09-27 22:45     ` Kees Cook
2018-09-27 23:08       ` Tycho Andersen
2018-09-27 23:04     ` Tycho Andersen
2018-09-27 23:37       ` Jann Horn
2018-09-29  0:28   ` Aleksa Sarai
2018-09-27 15:11 ` [PATCH v7 2/6] seccomp: make get_nth_filter available outside of CHECKPOINT_RESTORE Tycho Andersen
2018-09-27 16:51   ` Jann Horn
2018-09-27 21:42   ` Kees Cook
2018-10-08 13:55   ` Christian Brauner
2018-09-27 15:11 ` [PATCH v7 3/6] seccomp: add a way to get a listener fd from ptrace Tycho Andersen
2018-09-27 16:20   ` Jann Horn
2018-09-27 16:34     ` Tycho Andersen
2018-09-27 17:35   ` Jann Horn
2018-09-27 18:09     ` Tycho Andersen
2018-09-27 21:53   ` Kees Cook
2018-10-08 15:16   ` Christian Brauner
2018-10-08 15:33     ` Jann Horn
2018-10-08 16:21       ` Christian Brauner
2018-10-08 16:42         ` Jann Horn
2018-10-08 18:18           ` Christian Brauner
2018-10-09 12:39             ` Jann Horn
2018-10-09 13:28               ` Christian Brauner
2018-10-09 13:36                 ` Jann Horn
2018-10-09 13:49                   ` Christian Brauner
2018-10-09 13:50                     ` Jann Horn
2018-10-09 14:09                       ` Christian Brauner
2018-10-09 15:26                         ` Jann Horn
2018-10-09 16:20                           ` Christian Brauner
2018-10-09 16:26                             ` Jann Horn
2018-10-10 12:54                               ` Christian Brauner
2018-10-10 13:09                                 ` Christian Brauner
2018-10-10 13:10                                 ` Jann Horn
2018-10-10 13:18                                   ` Christian Brauner
2018-10-10 15:31                   ` Paul Moore
2018-10-10 15:33                     ` Jann Horn
2018-10-10 15:39                       ` Christian Brauner
2018-10-10 16:54                         ` Tycho Andersen
2018-10-10 17:15                           ` Christian Brauner
2018-10-10 17:26                             ` Tycho Andersen
2018-10-10 18:28                               ` Christian Brauner
2018-10-11  7:24                       ` Paul Moore
2018-10-11  7:24                         ` Paul Moore
2018-10-11 13:39                         ` Jann Horn
2018-10-11 23:10                           ` Paul Moore
2018-10-11 23:10                             ` Paul Moore
2018-10-12  1:02                             ` Andy Lutomirski
2018-10-12 20:02                               ` Tycho Andersen
2018-10-12 20:06                                 ` Jann Horn
2018-10-12 20:06                                   ` Jann Horn
2018-10-12 20:11                                 ` Christian Brauner
2018-10-08 18:00     ` Tycho Andersen
2018-10-08 18:41       ` Christian Brauner
2018-10-10 17:45       ` Andy Lutomirski
2018-10-10 18:26         ` Christian Brauner
2018-09-27 15:11 ` [PATCH v7 4/6] files: add a replace_fd_files() function Tycho Andersen
2018-09-27 16:49   ` Jann Horn
2018-09-27 18:04     ` Tycho Andersen
2018-09-27 21:59   ` Kees Cook
2018-09-28  2:20     ` Kees Cook
2018-09-28  2:46       ` Jann Horn
2018-09-28  5:23       ` Tycho Andersen
2018-09-27 15:11 ` [PATCH v7 5/6] seccomp: add a way to pass FDs via a notification fd Tycho Andersen
2018-09-27 16:39   ` Jann Horn
2018-09-27 22:13     ` Tycho Andersen
2018-09-27 19:28   ` Jann Horn
2018-09-27 22:14     ` Tycho Andersen
2018-09-27 22:17       ` Jann Horn
2018-09-27 22:49         ` Tycho Andersen
2018-09-27 22:09   ` Kees Cook
2018-09-27 22:15     ` Tycho Andersen
2018-09-27 15:11 ` Tycho Andersen [this message]
2018-09-27 22:11   ` [PATCH v7 6/6] samples: add an example of seccomp user trap Kees Cook
2018-09-28 21:57 ` [PATCH v7 0/6] seccomp trap to userspace Michael Kerrisk (man-opages)
2018-09-28 22:03   ` Tycho Andersen
2018-09-28 22:16     ` Michael Kerrisk (man-pages)
2018-09-28 22:34       ` Kees Cook
2018-09-28 22:46         ` Michael Kerrisk (man-pages)
2018-09-28 22:48           ` Jann Horn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180927151119.9989-7-tycho@tycho.ws \
    --to=tycho@tycho.ws \
    --cc=christian.brauner@ubuntu.com \
    --cc=containers@lists.linux-foundation.org \
    --cc=ebiederm@xmission.com \
    --cc=jannh@google.com \
    --cc=keescook@chromium.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=oleg@redhat.com \
    --cc=serge@hallyn.com \
    --cc=suda.akihiro@lab.ntt.co.jp \
    --cc=tyhicks@canonical.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.