All of lore.kernel.org
 help / color / mirror / Atom feed
From: Felix von Leitner <felix-linuxkernel@fefe.de>
To: Kees Cook <keescook@chromium.org>
Cc: LKML <linux-kernel@vger.kernel.org>,
	Andy Lutomirski <luto@amacapital.net>,
	Will Drewry <wad@chromium.org>
Subject: Re: security problem with seccomp-filter
Date: Sun, 12 Apr 2015 23:33:12 +0200	[thread overview]
Message-ID: <20150412213311.GA16854@qarx.de> (raw)
In-Reply-To: <CAGXu5jKNKQvDk_V3bSa6Bp18VTMJkdixk9tUOVGNWaxjsnfqug@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 873 bytes --]

> What you're describing should work correctly (it's part of the
> regression test suite we use). So, given that, I'd love to get to the
> bottom of what you're seeing. Do you have a URL to your code? What
> architecture are you running on?

Well, I must be doing something wrong then.
I extracted a test case from my program.
I put it on http://ptrace.fefe.de/seccompfail.c

It installs three seccomp filters, the last one containing this:

    DISALLOW_SYSCALL(prctl),

with

#define DISALLOW_SYSCALL(name) \
	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)

It is my understanding that that should then kill the process if the
prctl syscall is called again.

I test this by attempting to install the very same seccomp filter again,
which calls prctl, but the process is not killed.

What am I doing wrong?

Thanks,
Felix

[-- Attachment #2: seccompfail.c --]
[-- Type: text/x-csrc, Size: 6334 bytes --]

#include <stddef.h>
#include <features.h>
#include <inttypes.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/ip_icmp.h>
#include <arpa/inet.h>
#include <sys/poll.h>
#include <unistd.h>
#include <time.h>
#include <netdb.h>
#include <alloca.h>
#include <signal.h>
#include <errno.h>

#include <sys/prctl.h>
#include <linux/unistd.h>
#include <linux/audit.h>
#include <linux/filter.h>
#include <linux/seccomp.h>

#ifndef SECCOMP_MODE_FILTER
# define SECCOMP_MODE_FILTER	2 /* uses user-supplied filter. */
# define SECCOMP_RET_KILL	0x00000000U /* kill the task immediately */
# define SECCOMP_RET_TRAP	0x00030000U /* disallow and force a SIGSYS */
# define SECCOMP_RET_ALLOW	0x7fff0000U /* allow */
struct seccomp_data {
    int nr;
    __u32 arch;
    __u64 instruction_pointer;
    __u64 args[6];
};
#endif
#ifndef SYS_SECCOMP
# define SYS_SECCOMP 1
#endif

#define syscall_nr (offsetof(struct seccomp_data, nr))

#if defined(__i386__)
# define REG_SYSCALL	REG_EAX
# define ARCH_NR	AUDIT_ARCH_I386
#elif defined(__x86_64__)
# define REG_SYSCALL	REG_RAX
# define ARCH_NR	AUDIT_ARCH_X86_64
#else
# error "Platform does not support seccomp filter yet"
#endif

#define ALLOW_SYSCALL(name) \
	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)

static int install_syscall_filter(void) {
  /* Linux allows a process to restrict itself (and potential children)
   * in what syscalls can be issued.  The mechanism is called
   * seccomp-filter or "seccomp mode 2".  It works by reusing the
   * Berkeley Packet Filter, which is meant for PCAP-style packet
   * filtering expressions like "only TCP packets, please".  But it is
   * really a bytecode that has to be passed inside an array, and each
   * instruction is constructed using scary looking macros.  The basics
   * are not so bad, however.  We have two registers, one accumulator
   * and one index register (which is not used in this part of the
   * code), and instead of a network packet we are operating on a
   * certain struct with the syscall info, which is called seccomp_data
   * (reproduced above). */
  struct sock_filter filter[] = {
    /* validate architecture to avoid x32-on-x86_64 syscall aliasing shenanigans */

    /* BPF_LD = load, BPF_W = word, BPF_ABS = absolute offset */
    BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, arch)),
    /* BPF_JMP+BPF_JEQ+BPF_K = compare accumulator to constant (in our
     * case, ARCH_NR), and skip the next instruction if equal */
    BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ARCH_NR, 1, 0),
    /* "return SECCOMP_RET_KILL", tell seccomp to kill the process */
    BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL),

    /* load the syscall number */
    BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr)),

    /* and now a list of allowed syscalls */
    ALLOW_SYSCALL(rt_sigreturn),
#ifdef __NR_sigreturn
    ALLOW_SYSCALL(sigreturn),
#endif
    ALLOW_SYSCALL(exit_group),
    ALLOW_SYSCALL(exit),

#ifdef __NR_socketcall
    ALLOW_SYSCALL(socketcall),
#else
    ALLOW_SYSCALL(socket),
    ALLOW_SYSCALL(sendto),
    ALLOW_SYSCALL(recvfrom),
#endif

    ALLOW_SYSCALL(poll),

    /* so we can further restrict allowed syscalls */
    ALLOW_SYSCALL(prctl),

    /* so gethostbyname can open /etc/resolv.conf */
    ALLOW_SYSCALL(open),
    ALLOW_SYSCALL(read),
    ALLOW_SYSCALL(mmap),
    ALLOW_SYSCALL(mmap2),
    ALLOW_SYSCALL(munmap),
    ALLOW_SYSCALL(lseek),
    ALLOW_SYSCALL(_llseek),
    ALLOW_SYSCALL(close),

    /* for our time keeping */
    ALLOW_SYSCALL(gettimeofday),	// x86_64 uses a vsyscall for this, so this filter will never trigger

    /* for when buffer writes the output; since we only write to stdout, filter for fd==1 */
    BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_write, 0, 4),
    /* it's write(2).  Load first argument into accumulator */
    BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, args[0])),
    /* if it's 1 (stdout), skip 1 instruction */
    BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 1, 1, 0),
    /* "return SECCOMP_RET_KILL", tell seccomp to kill the process */
    BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL),
    /* "return SECCOMP_RET_ALLOW", tell seccomp to allow the syscall */
    BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),

    /* if none of these syscalls matched, kill the process */
    BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)
  };
  struct sock_fprog prog = {
    .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
    .filter = filter
  };

  /* see linux/Documentation/prctl/no_new_privs.txt */
  if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
    /* if this fails, we are running on an ancient kernel without
     * seccomp support; nothing we can do about it, really. */
    return -1;
  }

  /* see linux/Documentation/prctl/seccomp_filter.txt */
  if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
    /* if this happens, we are running on a kernel without seccomp
     * filters support; nothing we can do about it, really. */
    return -1;
  }
  return 0;
}

#define DISALLOW_SYSCALL(name) \
	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)

static int seccomp_denyfile() {
  struct sock_filter filter[] = {
    DISALLOW_SYSCALL(open),
    DISALLOW_SYSCALL(mmap),
    DISALLOW_SYSCALL(mmap2),
    DISALLOW_SYSCALL(munmap),
    DISALLOW_SYSCALL(lseek),
    DISALLOW_SYSCALL(_llseek),
    DISALLOW_SYSCALL(close),
    BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
  };
  struct sock_fprog prog = {
    .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
    .filter = filter
  };
  return prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
}

static int seccomp_denysocket() {
  struct sock_filter filter[] = {
#ifndef __NR_socketcall
    DISALLOW_SYSCALL(setsockopt),
    DISALLOW_SYSCALL(socket),
#endif
    DISALLOW_SYSCALL(prctl),
    BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
  };
  struct sock_fprog prog = {
    .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
    .filter = filter
  };
  return prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
}

int main(int argc,char* argv[]) {
  /* If it fails, the kernel does not support seccomp filter.
   * We'll just continue */
  install_syscall_filter();

  seccomp_denyfile();

  seccomp_denysocket();
  seccomp_denysocket();

  return 0;
}

  reply	other threads:[~2015-04-12 21:33 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-03-27  5:56 security problem with seccomp-filter Felix von Leitner
2015-03-27  6:39 ` Richard Weinberger
2015-03-28 13:03   ` Kees Cook
2015-04-12 21:33     ` Felix von Leitner [this message]
2015-04-13 17:30       ` Kees Cook

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150412213311.GA16854@qarx.de \
    --to=felix-linuxkernel@fefe.de \
    --cc=keescook@chromium.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=wad@chromium.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.