All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kees Cook <keescook@chromium.org>
To: Tycho Andersen <tycho@tycho.ws>
Cc: LKML <linux-kernel@vger.kernel.org>,
	Linux Containers <containers@lists.linux-foundation.org>,
	Linux API <linux-api@vger.kernel.org>,
	Andy Lutomirski <luto@amacapital.net>,
	Oleg Nesterov <oleg@redhat.com>,
	"Eric W . Biederman" <ebiederm@xmission.com>,
	"Serge E . Hallyn" <serge@hallyn.com>,
	Christian Brauner <christian.brauner@ubuntu.com>,
	Tyler Hicks <tyhicks@canonical.com>,
	Akihiro Suda <suda.akihiro@lab.ntt.co.jp>,
	Jann Horn <jannh@google.com>,
	"linux-fsdevel@vger.kernel.org" <linux-fsdevel@vger.kernel.org>
Subject: Re: [PATCH v7 6/6] samples: add an example of seccomp user trap
Date: Thu, 27 Sep 2018 15:11:27 -0700	[thread overview]
Message-ID: <CAGXu5jJG0RNPabcuiRa7g3xU+mDjXj6qjQCNrKcOooEfCy2aCg@mail.gmail.com> (raw)
In-Reply-To: <20180927151119.9989-7-tycho@tycho.ws>

On Thu, Sep 27, 2018 at 8:11 AM, Tycho Andersen <tycho@tycho.ws> wrote:
> The idea here is just to give a demonstration of how one could safely use
> the SECCOMP_RET_USER_NOTIF feature to do mount policies. This particular
> policy is (as noted in the comment) not very interesting, but it serves to
> illustrate how one might apply a policy dodging the various TOCTOU issues.
>
> v5: new in v5
> v7: updates for v7 API changes
>
> Signed-off-by: Tycho Andersen <tycho@tycho.ws>
> CC: Kees Cook <keescook@chromium.org>
> CC: Andy Lutomirski <luto@amacapital.net>
> CC: Oleg Nesterov <oleg@redhat.com>
> CC: Eric W. Biederman <ebiederm@xmission.com>
> CC: "Serge E. Hallyn" <serge@hallyn.com>
> CC: Christian Brauner <christian.brauner@ubuntu.com>
> CC: Tyler Hicks <tyhicks@canonical.com>
> CC: Akihiro Suda <suda.akihiro@lab.ntt.co.jp>
> ---
>  samples/seccomp/.gitignore  |   1 +
>  samples/seccomp/Makefile    |   7 +-
>  samples/seccomp/user-trap.c | 312 ++++++++++++++++++++++++++++++++++++
>  3 files changed, 319 insertions(+), 1 deletion(-)
>
> diff --git a/samples/seccomp/.gitignore b/samples/seccomp/.gitignore
> index 78fb78184291..d1e2e817d556 100644
> --- a/samples/seccomp/.gitignore
> +++ b/samples/seccomp/.gitignore
> @@ -1,3 +1,4 @@
>  bpf-direct
>  bpf-fancy
>  dropper
> +user-trap
> diff --git a/samples/seccomp/Makefile b/samples/seccomp/Makefile
> index cf34ff6b4065..4920903c8009 100644
> --- a/samples/seccomp/Makefile
> +++ b/samples/seccomp/Makefile
> @@ -1,6 +1,6 @@
>  # SPDX-License-Identifier: GPL-2.0
>  ifndef CROSS_COMPILE
> -hostprogs-$(CONFIG_SAMPLE_SECCOMP) := bpf-fancy dropper bpf-direct
> +hostprogs-$(CONFIG_SAMPLE_SECCOMP) := bpf-fancy dropper bpf-direct user-trap
>
>  HOSTCFLAGS_bpf-fancy.o += -I$(objtree)/usr/include
>  HOSTCFLAGS_bpf-fancy.o += -idirafter $(objtree)/include
> @@ -16,6 +16,10 @@ HOSTCFLAGS_bpf-direct.o += -I$(objtree)/usr/include
>  HOSTCFLAGS_bpf-direct.o += -idirafter $(objtree)/include
>  bpf-direct-objs := bpf-direct.o
>
> +HOSTCFLAGS_user-trap.o += -I$(objtree)/usr/include
> +HOSTCFLAGS_user-trap.o += -idirafter $(objtree)/include
> +user-trap-objs := user-trap.o
> +
>  # Try to match the kernel target.
>  ifndef CONFIG_64BIT
>
> @@ -33,6 +37,7 @@ HOSTCFLAGS_bpf-fancy.o += $(MFLAG)
>  HOSTLDLIBS_bpf-direct += $(MFLAG)
>  HOSTLDLIBS_bpf-fancy += $(MFLAG)
>  HOSTLDLIBS_dropper += $(MFLAG)
> +HOSTLDLIBS_user-trap += $(MFLAG)
>  endif
>  always := $(hostprogs-m)
>  endif
> diff --git a/samples/seccomp/user-trap.c b/samples/seccomp/user-trap.c
> new file mode 100644
> index 000000000000..63c9a5994dc1
> --- /dev/null
> +++ b/samples/seccomp/user-trap.c
> @@ -0,0 +1,312 @@
> +#include <signal.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <string.h>
> +#include <stddef.h>
> +#include <sys/sysmacros.h>
> +#include <sys/types.h>
> +#include <sys/wait.h>
> +#include <sys/socket.h>
> +#include <sys/stat.h>
> +#include <sys/mman.h>
> +#include <sys/syscall.h>
> +#include <sys/user.h>
> +#include <sys/ioctl.h>
> +#include <sys/ptrace.h>
> +#include <sys/mount.h>
> +#include <linux/limits.h>
> +#include <linux/filter.h>
> +#include <linux/seccomp.h>
> +
> +/*
> + * Because of some grossness, we can't include linux/ptrace.h here, so we
> + * re-define PTRACE_SECCOMP_NEW_LISTENER.
> + */
> +#ifndef PTRACE_SECCOMP_NEW_LISTENER
> +#define PTRACE_SECCOMP_NEW_LISTENER    0x420e
> +#endif
> +
> +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
> +
> +static int seccomp(unsigned int op, unsigned int flags, void *args)
> +{
> +       errno = 0;
> +       return syscall(__NR_seccomp, op, flags, args);
> +}
> +
> +static int user_trap_syscall(int nr, unsigned int flags)
> +{
> +       struct sock_filter filter[] = {
> +               BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
> +                       offsetof(struct seccomp_data, nr)),
> +               BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
> +               BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
> +               BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
> +       };
> +
> +       struct sock_fprog prog = {
> +               .len = (unsigned short)ARRAY_SIZE(filter),
> +               .filter = filter,
> +       };
> +
> +       return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
> +}
> +
> +static int handle_req(struct seccomp_notif *req,
> +                     struct seccomp_notif_resp *resp, int listener)
> +{
> +       char path[PATH_MAX], source[PATH_MAX], target[PATH_MAX];
> +       int ret = -1, mem;
> +
> +       resp->len = sizeof(*resp);
> +       resp->id = req->id;
> +       resp->error = -EPERM;
> +       resp->val = 0;
> +
> +       if (req->data.nr != __NR_mount) {
> +               fprintf(stderr, "huh? trapped something besides mknod? %d\n", req->data.nr);
> +               return -1;
> +       }
> +
> +       /* Only allow bind mounts. */
> +       if (!(req->data.args[3] & MS_BIND))
> +               return 0;
> +
> +       /*
> +        * Ok, let's read the task's memory to see where they wanted their
> +        * mount to go.
> +        */
> +       snprintf(path, sizeof(path), "/proc/%d/mem", req->pid);
> +       mem = open(path, O_RDONLY);
> +       if (mem < 0) {
> +               perror("open mem");
> +               return -1;
> +       }
> +
> +       /*
> +        * Now we avoid a TOCTOU: we referred to a pid by its pid, but since
> +        * the pid that made the syscall may have died, we need to confirm that
> +        * the pid is still valid after we open its /proc/pid/mem file. We can
> +        * ask the listener fd this as follows.
> +        *
> +        * Note that this check should occur *after* any task-specific
> +        * resources are opened, to make sure that the task has not died and
> +        * we're not wrongly reading someone else's state in order to make
> +        * decisions.
> +        */
> +       if (ioctl(listener, SECCOMP_NOTIF_ID_VALID, &req->id) < 0) {
> +               fprintf(stderr, "task died before we could map its memory\n");
> +               goto out;
> +       }
> +
> +       /*
> +        * Phew, we've got the right /proc/pid/mem. Now we can read it. Note
> +        * that to avoid another TOCTOU, we should read all of the pointer args
> +        * before we decide to allow the syscall.
> +        */
> +       if (lseek(mem, req->data.args[0], SEEK_SET) < 0) {
> +               perror("seek");
> +               goto out;
> +       }
> +
> +       ret = read(mem, source, sizeof(source));
> +       if (ret < 0) {
> +               perror("read");
> +               goto out;
> +       }
> +
> +       if (lseek(mem, req->data.args[1], SEEK_SET) < 0) {
> +               perror("seek");
> +               goto out;
> +       }
> +
> +       ret = read(mem, target, sizeof(target));
> +       if (ret < 0) {
> +               perror("read");
> +               goto out;
> +       }
> +
> +       /*
> +        * Our policy is to only allow bind mounts inside /tmp. This isn't very
> +        * interesting, because we could do unprivlieged bind mounts with user
> +        * namespaces already, but you get the idea.
> +        */
> +       if (!strncmp(source, "/tmp", 4) && !strncmp(target, "/tmp", 4)) {
> +               if (mount(source, target, NULL, req->data.args[3], NULL) < 0) {
> +                       ret = -1;
> +                       perror("actual mount");
> +                       goto out;
> +               }
> +               resp->error = 0;
> +       }
> +
> +       /* Even if we didn't allow it because of policy, generating the
> +        * response was be a success, because we want to tell the worker EPERM.
> +        */
> +       ret = 0;
> +
> +out:
> +       close(mem);
> +       return ret;
> +}
> +
> +int main(void)
> +{
> +       int sk_pair[2], ret = 1, status, listener;
> +       pid_t worker = 0 , tracer = 0;
> +       char c;
> +
> +       if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair) < 0) {
> +               perror("socketpair");
> +               return 1;
> +       }
> +
> +       worker = fork();
> +       if (worker < 0) {
> +               perror("fork");
> +               goto close_pair;
> +       }
> +
> +       if (worker == 0) {
> +               if (user_trap_syscall(__NR_mount, 0) < 0) {
> +                       perror("seccomp");
> +                       exit(1);
> +               }
> +
> +               if (setuid(1000) < 0) {
> +                       perror("setuid");
> +                       exit(1);
> +               }
> +
> +               if (write(sk_pair[1], "a", 1) != 1) {
> +                       perror("write");
> +                       exit(1);
> +               }
> +
> +               if (read(sk_pair[1], &c, 1) != 1) {
> +                       perror("write");
> +                       exit(1);
> +               }
> +
> +               if (mkdir("/tmp/foo", 0755) < 0) {
> +                       perror("mkdir");
> +                       exit(1);
> +               }
> +
> +               if (mount("/dev/sda", "/tmp/foo", NULL, 0, NULL) != -1) {
> +                       fprintf(stderr, "huh? mounted /dev/sda?\n");
> +                       exit(1);
> +               }
> +
> +               if (errno != EPERM) {
> +                       perror("bad error from mount");
> +                       exit(1);
> +               }
> +
> +               if (mount("/tmp/foo", "/tmp/foo", NULL, MS_BIND, NULL) < 0) {
> +                       perror("mount");
> +                       exit(1);
> +               }
> +
> +               exit(0);
> +       }
> +
> +       if (read(sk_pair[0], &c, 1) != 1) {
> +               perror("read ready signal");
> +               goto out_kill;
> +       }
> +
> +       if (ptrace(PTRACE_ATTACH, worker) < 0) {
> +               perror("ptrace");
> +               goto out_kill;
> +       }
> +
> +       if (waitpid(worker, NULL, 0) != worker) {
> +               perror("waitpid");
> +               goto out_kill;
> +       }
> +
> +       listener = ptrace(PTRACE_SECCOMP_NEW_LISTENER, worker, 0);
> +       if (listener < 0) {
> +               perror("ptrace get listener");
> +               goto out_kill;
> +       }
> +
> +       if (ptrace(PTRACE_DETACH, worker, NULL, 0) < 0) {
> +               perror("ptrace detach");
> +               goto out_kill;
> +       }
> +
> +       if (write(sk_pair[0], "a", 1) != 1) {
> +               perror("write");
> +               exit(1);
> +       }
> +
> +       tracer = fork();
> +       if (tracer < 0) {
> +               perror("fork");
> +               goto out_kill;
> +       }
> +
> +       if (tracer == 0) {
> +               while (1) {
> +                       struct seccomp_notif req = {};
> +                       struct seccomp_notif_resp resp = {};
> +
> +                       req.len = sizeof(req);
> +                       if (ioctl(listener, SECCOMP_NOTIF_RECV, &req) != sizeof(req)) {
> +                               perror("ioctl recv");
> +                               goto out_close;
> +                       }
> +
> +                       if (handle_req(&req, &resp, listener) < 0)
> +                               goto out_close;
> +
> +                       if (ioctl(listener, SECCOMP_NOTIF_SEND, &resp) != sizeof(resp)) {
> +                               perror("ioctl send");
> +                               goto out_close;
> +                       }
> +               }
> +out_close:
> +               close(listener);
> +               exit(1);
> +       }
> +
> +       close(listener);
> +
> +       if (waitpid(worker, &status, 0) != worker) {
> +               perror("waitpid");
> +               goto out_kill;
> +       }
> +
> +       if (umount2("/tmp/foo", MNT_DETACH) < 0 && errno != EINVAL) {
> +               perror("umount2");
> +               goto out_kill;
> +       }
> +
> +       if (remove("/tmp/foo") < 0 && errno != ENOENT) {
> +               perror("remove");
> +               exit(1);
> +       }
> +
> +       if (!WIFEXITED(status) || WEXITSTATUS(status)) {
> +               fprintf(stderr, "worker exited nonzero\n");
> +               goto out_kill;
> +       }
> +
> +       ret = 0;
> +
> +out_kill:
> +       if (tracer > 0)
> +               kill(tracer, SIGKILL);
> +       if (worker > 0)
> +               kill(worker, SIGKILL);
> +
> +close_pair:
> +       close(sk_pair[0]);
> +       close(sk_pair[1]);
> +       return ret;
> +}
> --
> 2.17.1
>

handle_req() is well commented, but main() isn't. Since this is
explicitly a "sample", can you add operational comments to main() as
well? I think it might help people follow what is happening (and what
is expected) during main().

Beyond that, yay! Samples! :)

-Kees

-- 
Kees Cook
Pixel Security

  reply	other threads:[~2018-09-27 22:11 UTC|newest]

Thread overview: 95+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-27 15:11 [PATCH v7 0/6] seccomp trap to userspace Tycho Andersen
2018-09-27 15:11 ` [PATCH v7 1/6] seccomp: add a return code to " Tycho Andersen
2018-09-27 21:31   ` Kees Cook
2018-09-27 22:48     ` Tycho Andersen
2018-09-27 22:48       ` Tycho Andersen
2018-09-27 23:10       ` Kees Cook
2018-09-28 14:39         ` Tycho Andersen
2018-10-08 14:58       ` Christian Brauner
2018-10-09 14:28         ` Tycho Andersen
2018-10-09 16:24           ` Christian Brauner
2018-10-09 16:29             ` Tycho Andersen
2018-10-17 20:29     ` Tycho Andersen
2018-10-17 22:21       ` Kees Cook
2018-10-17 22:33         ` Tycho Andersen
2018-10-21 16:04         ` Tycho Andersen
2018-10-22  9:42           ` Christian Brauner
2018-09-27 21:51   ` Jann Horn
2018-09-27 22:45     ` Kees Cook
2018-09-27 23:08       ` Tycho Andersen
2018-09-27 23:04     ` Tycho Andersen
2018-09-27 23:37       ` Jann Horn
2018-09-29  0:28   ` Aleksa Sarai
2018-09-27 15:11 ` [PATCH v7 2/6] seccomp: make get_nth_filter available outside of CHECKPOINT_RESTORE Tycho Andersen
2018-09-27 16:51   ` Jann Horn
2018-09-27 21:42   ` Kees Cook
2018-10-08 13:55   ` Christian Brauner
2018-09-27 15:11 ` [PATCH v7 3/6] seccomp: add a way to get a listener fd from ptrace Tycho Andersen
2018-09-27 16:20   ` Jann Horn
2018-09-27 16:34     ` Tycho Andersen
2018-09-27 17:35   ` Jann Horn
2018-09-27 18:09     ` Tycho Andersen
2018-09-27 21:53   ` Kees Cook
2018-10-08 15:16   ` Christian Brauner
2018-10-08 15:33     ` Jann Horn
2018-10-08 16:21       ` Christian Brauner
2018-10-08 16:42         ` Jann Horn
2018-10-08 18:18           ` Christian Brauner
2018-10-09 12:39             ` Jann Horn
2018-10-09 13:28               ` Christian Brauner
2018-10-09 13:36                 ` Jann Horn
2018-10-09 13:49                   ` Christian Brauner
2018-10-09 13:50                     ` Jann Horn
2018-10-09 14:09                       ` Christian Brauner
2018-10-09 15:26                         ` Jann Horn
2018-10-09 16:20                           ` Christian Brauner
2018-10-09 16:26                             ` Jann Horn
2018-10-10 12:54                               ` Christian Brauner
2018-10-10 13:09                                 ` Christian Brauner
2018-10-10 13:10                                 ` Jann Horn
2018-10-10 13:18                                   ` Christian Brauner
2018-10-10 15:31                   ` Paul Moore
2018-10-10 15:33                     ` Jann Horn
2018-10-10 15:39                       ` Christian Brauner
2018-10-10 16:54                         ` Tycho Andersen
2018-10-10 17:15                           ` Christian Brauner
2018-10-10 17:26                             ` Tycho Andersen
2018-10-10 18:28                               ` Christian Brauner
2018-10-11  7:24                       ` Paul Moore
2018-10-11  7:24                         ` Paul Moore
2018-10-11 13:39                         ` Jann Horn
2018-10-11 23:10                           ` Paul Moore
2018-10-11 23:10                             ` Paul Moore
2018-10-12  1:02                             ` Andy Lutomirski
2018-10-12 20:02                               ` Tycho Andersen
2018-10-12 20:06                                 ` Jann Horn
2018-10-12 20:06                                   ` Jann Horn
2018-10-12 20:11                                 ` Christian Brauner
2018-10-08 18:00     ` Tycho Andersen
2018-10-08 18:41       ` Christian Brauner
2018-10-10 17:45       ` Andy Lutomirski
2018-10-10 18:26         ` Christian Brauner
2018-09-27 15:11 ` [PATCH v7 4/6] files: add a replace_fd_files() function Tycho Andersen
2018-09-27 16:49   ` Jann Horn
2018-09-27 18:04     ` Tycho Andersen
2018-09-27 21:59   ` Kees Cook
2018-09-28  2:20     ` Kees Cook
2018-09-28  2:46       ` Jann Horn
2018-09-28  5:23       ` Tycho Andersen
2018-09-27 15:11 ` [PATCH v7 5/6] seccomp: add a way to pass FDs via a notification fd Tycho Andersen
2018-09-27 16:39   ` Jann Horn
2018-09-27 22:13     ` Tycho Andersen
2018-09-27 19:28   ` Jann Horn
2018-09-27 22:14     ` Tycho Andersen
2018-09-27 22:17       ` Jann Horn
2018-09-27 22:49         ` Tycho Andersen
2018-09-27 22:09   ` Kees Cook
2018-09-27 22:15     ` Tycho Andersen
2018-09-27 15:11 ` [PATCH v7 6/6] samples: add an example of seccomp user trap Tycho Andersen
2018-09-27 22:11   ` Kees Cook [this message]
2018-09-28 21:57 ` [PATCH v7 0/6] seccomp trap to userspace Michael Kerrisk (man-opages)
2018-09-28 22:03   ` Tycho Andersen
2018-09-28 22:16     ` Michael Kerrisk (man-pages)
2018-09-28 22:34       ` Kees Cook
2018-09-28 22:46         ` Michael Kerrisk (man-pages)
2018-09-28 22:48           ` Jann Horn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAGXu5jJG0RNPabcuiRa7g3xU+mDjXj6qjQCNrKcOooEfCy2aCg@mail.gmail.com \
    --to=keescook@chromium.org \
    --cc=christian.brauner@ubuntu.com \
    --cc=containers@lists.linux-foundation.org \
    --cc=ebiederm@xmission.com \
    --cc=jannh@google.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=oleg@redhat.com \
    --cc=serge@hallyn.com \
    --cc=suda.akihiro@lab.ntt.co.jp \
    --cc=tycho@tycho.ws \
    --cc=tyhicks@canonical.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.