All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH] seccomp: adding a second whitelist
@ 2013-08-29  1:04 Eduardo Otubo
  2013-08-29  8:34 ` Stefan Hajnoczi
  2013-08-29 12:56 ` Paul Moore
  0 siblings, 2 replies; 19+ messages in thread
From: Eduardo Otubo @ 2013-08-29  1:04 UTC (permalink / raw)
  To: qemu-devel; +Cc: pmoore, coreyb, wad, Eduardo Otubo

Now there's a second whitelist, right before the vcpu starts. The second
whitelist is the same as the first one, except for exec() and select().

Signed-off-by: Eduardo Otubo <otubo@linux.vnet.ibm.com>
---
The second whitelist is installed right before the vcpu starts, it contains all
the system calls the first one has except for exec() and select(), which are
big major syscalls that I could extensively test with virt-test and do not
cause any damage to the general execution.

The environment in which the second whitelist is installed seems to need less
system calls than the first, so the procedure here will be the same: Keep
testing with virt-test and get to the smallest list as possible.

 include/sysemu/seccomp.h |   5 +-
 qemu-seccomp.c           | 243 +++++++++++++++++++++++++++++++++++++++++++++--
 vl.c                     |   8 +-
 3 files changed, 244 insertions(+), 12 deletions(-)

diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
index 1189fa2..d1a520d 100644
--- a/include/sysemu/seccomp.h
+++ b/include/sysemu/seccomp.h
@@ -15,8 +15,11 @@
 #ifndef QEMU_SECCOMP_H
 #define QEMU_SECCOMP_H
 
+#define WHITELIST1 0
+#define WHITELIST2 1
+
 #include <seccomp.h>
 #include "qemu/osdep.h"
 
-int seccomp_start(void);
+int seccomp_start(int state);
 #endif
diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index 37d38f8..7805ca2 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -21,7 +21,7 @@ struct QemuSeccompSyscall {
     uint8_t priority;
 };
 
-static const struct QemuSeccompSyscall seccomp_whitelist[] = {
+static const struct QemuSeccompSyscall whitelist1[] = {
     { SCMP_SYS(timer_settime), 255 },
     { SCMP_SYS(timer_gettime), 254 },
     { SCMP_SYS(futex), 253 },
@@ -221,27 +221,250 @@ static const struct QemuSeccompSyscall seccomp_whitelist[] = {
     { SCMP_SYS(arch_prctl), 240 }
 };
 
-int seccomp_start(void)
+static const struct QemuSeccompSyscall whitelist2[] = {
+    { SCMP_SYS(timer_settime), 255 },
+    { SCMP_SYS(timer_gettime), 254 },
+    { SCMP_SYS(futex), 253 },
+    { SCMP_SYS(recvfrom), 251 },
+    { SCMP_SYS(sendto), 250 },
+    { SCMP_SYS(socketcall), 250 },
+    { SCMP_SYS(read), 249 },
+    { SCMP_SYS(io_submit), 249 },
+    { SCMP_SYS(brk), 248 },
+    { SCMP_SYS(clone), 247 },
+    { SCMP_SYS(mmap), 247 },
+    { SCMP_SYS(mprotect), 246 },
+    { SCMP_SYS(open), 245 },
+    { SCMP_SYS(ioctl), 245 },
+    { SCMP_SYS(socket), 245 },
+    { SCMP_SYS(setsockopt), 245 },
+    { SCMP_SYS(recvmsg), 245 },
+    { SCMP_SYS(sendmsg), 245 },
+    { SCMP_SYS(accept), 245 },
+    { SCMP_SYS(connect), 245 },
+    { SCMP_SYS(socketpair), 245 },
+    { SCMP_SYS(bind), 245 },
+    { SCMP_SYS(listen), 245 },
+    { SCMP_SYS(semget), 245 },
+    { SCMP_SYS(ipc), 245 },
+    { SCMP_SYS(gettimeofday), 245 },
+    { SCMP_SYS(readlink), 245 },
+    { SCMP_SYS(access), 245 },
+    { SCMP_SYS(prctl), 245 },
+    { SCMP_SYS(signalfd), 245 },
+    { SCMP_SYS(getrlimit), 245 },
+    { SCMP_SYS(set_tid_address), 245 },
+    { SCMP_SYS(statfs), 245 },
+    { SCMP_SYS(unlink), 245 },
+    { SCMP_SYS(wait4), 245 },
+    { SCMP_SYS(fcntl64), 245 },
+    { SCMP_SYS(fstat64), 245 },
+    { SCMP_SYS(stat64), 245 },
+    { SCMP_SYS(getgid32), 245 },
+    { SCMP_SYS(getegid32), 245 },
+    { SCMP_SYS(getuid32), 245 },
+    { SCMP_SYS(geteuid32), 245 },
+    { SCMP_SYS(sigreturn), 245 },
+    { SCMP_SYS(_newselect), 245 },
+    { SCMP_SYS(_llseek), 245 },
+    { SCMP_SYS(mmap2), 245 },
+    { SCMP_SYS(sigprocmask), 245 },
+    { SCMP_SYS(sched_getparam), 245 },
+    { SCMP_SYS(sched_getscheduler), 245 },
+    { SCMP_SYS(fstat), 245 },
+    { SCMP_SYS(clock_getres), 245 },
+    { SCMP_SYS(sched_get_priority_min), 245 },
+    { SCMP_SYS(sched_get_priority_max), 245 },
+    { SCMP_SYS(stat), 245 },
+    { SCMP_SYS(uname), 245 },
+    { SCMP_SYS(eventfd2), 245 },
+    { SCMP_SYS(io_getevents), 245 },
+    { SCMP_SYS(dup), 245 },
+    { SCMP_SYS(dup2), 245 },
+    { SCMP_SYS(dup3), 245 },
+    { SCMP_SYS(gettid), 245 },
+    { SCMP_SYS(getgid), 245 },
+    { SCMP_SYS(getegid), 245 },
+    { SCMP_SYS(getuid), 245 },
+    { SCMP_SYS(geteuid), 245 },
+    { SCMP_SYS(timer_create), 245 },
+    { SCMP_SYS(exit), 245 },
+    { SCMP_SYS(clock_gettime), 245 },
+    { SCMP_SYS(time), 245 },
+    { SCMP_SYS(restart_syscall), 245 },
+    { SCMP_SYS(pwrite64), 245 },
+    { SCMP_SYS(nanosleep), 245 },
+    { SCMP_SYS(chown), 245 },
+    { SCMP_SYS(openat), 245 },
+    { SCMP_SYS(getdents), 245 },
+    { SCMP_SYS(timer_delete), 245 },
+    { SCMP_SYS(exit_group), 245 },
+    { SCMP_SYS(rt_sigreturn), 245 },
+    { SCMP_SYS(sync), 245 },
+    { SCMP_SYS(pread64), 245 },
+    { SCMP_SYS(madvise), 245 },
+    { SCMP_SYS(set_robust_list), 245 },
+    { SCMP_SYS(lseek), 245 },
+    { SCMP_SYS(pselect6), 245 },
+    { SCMP_SYS(fork), 245 },
+    { SCMP_SYS(rt_sigprocmask), 245 },
+    { SCMP_SYS(write), 244 },
+    { SCMP_SYS(fcntl), 243 },
+    { SCMP_SYS(tgkill), 242 },
+    { SCMP_SYS(rt_sigaction), 242 },
+    { SCMP_SYS(pipe2), 242 },
+    { SCMP_SYS(munmap), 242 },
+    { SCMP_SYS(mremap), 242 },
+    { SCMP_SYS(fdatasync), 242 },
+    { SCMP_SYS(close), 242 },
+    { SCMP_SYS(rt_sigpending), 242 },
+    { SCMP_SYS(rt_sigtimedwait), 242 },
+    { SCMP_SYS(readv), 242 },
+    { SCMP_SYS(writev), 242 },
+    { SCMP_SYS(preadv), 242 },
+    { SCMP_SYS(pwritev), 242 },
+    { SCMP_SYS(setrlimit), 242 },
+    { SCMP_SYS(ftruncate), 242 },
+    { SCMP_SYS(lstat), 242 },
+    { SCMP_SYS(pipe), 242 },
+    { SCMP_SYS(umask), 242 },
+    { SCMP_SYS(chdir), 242 },
+    { SCMP_SYS(setitimer), 242 },
+    { SCMP_SYS(setsid), 242 },
+    { SCMP_SYS(poll), 242 },
+    { SCMP_SYS(epoll_create), 242 },
+    { SCMP_SYS(epoll_ctl), 242 },
+    { SCMP_SYS(epoll_wait), 242 },
+    { SCMP_SYS(waitpid), 242 },
+    { SCMP_SYS(getsockname), 242 },
+    { SCMP_SYS(getpeername), 242 },
+    { SCMP_SYS(accept4), 242 },
+    { SCMP_SYS(newfstatat), 241 },
+    { SCMP_SYS(shutdown), 241 },
+    { SCMP_SYS(getsockopt), 241 },
+    { SCMP_SYS(semop), 241 },
+    { SCMP_SYS(semtimedop), 241 },
+    { SCMP_SYS(epoll_ctl_old), 241 },
+    { SCMP_SYS(epoll_wait_old), 241 },
+    { SCMP_SYS(epoll_pwait), 241 },
+    { SCMP_SYS(epoll_create1), 241 },
+    { SCMP_SYS(ppoll), 241 },
+    { SCMP_SYS(creat), 241 },
+    { SCMP_SYS(link), 241 },
+    { SCMP_SYS(getpid), 241 },
+    { SCMP_SYS(getppid), 241 },
+    { SCMP_SYS(getpgrp), 241 },
+    { SCMP_SYS(getpgid), 241 },
+    { SCMP_SYS(getsid), 241 },
+    { SCMP_SYS(getdents64), 241 },
+    { SCMP_SYS(getresuid), 241 },
+    { SCMP_SYS(getresgid), 241 },
+    { SCMP_SYS(getgroups), 241 },
+    { SCMP_SYS(getresuid32), 241 },
+    { SCMP_SYS(getresgid32), 241 },
+    { SCMP_SYS(getgroups32), 241 },
+    { SCMP_SYS(signal), 241 },
+    { SCMP_SYS(sigaction), 241 },
+    { SCMP_SYS(sigsuspend), 241 },
+    { SCMP_SYS(sigpending), 241 },
+    { SCMP_SYS(truncate64), 241 },
+    { SCMP_SYS(ftruncate64), 241 },
+    { SCMP_SYS(fchown32), 241 },
+    { SCMP_SYS(chown32), 241 },
+    { SCMP_SYS(lchown32), 241 },
+    { SCMP_SYS(statfs64), 241 },
+    { SCMP_SYS(fstatfs64), 241 },
+    { SCMP_SYS(fstatat64), 241 },
+    { SCMP_SYS(lstat64), 241 },
+    { SCMP_SYS(sendfile64), 241 },
+    { SCMP_SYS(ugetrlimit), 241 },
+    { SCMP_SYS(alarm), 241 },
+    { SCMP_SYS(rt_sigsuspend), 241 },
+    { SCMP_SYS(rt_sigqueueinfo), 241 },
+    { SCMP_SYS(rt_tgsigqueueinfo), 241 },
+    { SCMP_SYS(sigaltstack), 241 },
+    { SCMP_SYS(signalfd4), 241 },
+    { SCMP_SYS(truncate), 241 },
+    { SCMP_SYS(fchown), 241 },
+    { SCMP_SYS(lchown), 241 },
+    { SCMP_SYS(fchownat), 241 },
+    { SCMP_SYS(fstatfs), 241 },
+    { SCMP_SYS(getitimer), 241 },
+    { SCMP_SYS(syncfs), 241 },
+    { SCMP_SYS(fsync), 241 },
+    { SCMP_SYS(fchdir), 241 },
+    { SCMP_SYS(msync), 241 },
+    { SCMP_SYS(sched_setparam), 241 },
+    { SCMP_SYS(sched_setscheduler), 241 },
+    { SCMP_SYS(sched_yield), 241 },
+    { SCMP_SYS(sched_rr_get_interval), 241 },
+    { SCMP_SYS(sched_setaffinity), 241 },
+    { SCMP_SYS(sched_getaffinity), 241 },
+    { SCMP_SYS(readahead), 241 },
+    { SCMP_SYS(timer_getoverrun), 241 },
+    { SCMP_SYS(unlinkat), 241 },
+    { SCMP_SYS(readlinkat), 241 },
+    { SCMP_SYS(faccessat), 241 },
+    { SCMP_SYS(get_robust_list), 241 },
+    { SCMP_SYS(splice), 241 },
+    { SCMP_SYS(vmsplice), 241 },
+    { SCMP_SYS(getcpu), 241 },
+    { SCMP_SYS(sendmmsg), 241 },
+    { SCMP_SYS(recvmmsg), 241 },
+    { SCMP_SYS(prlimit64), 241 },
+    { SCMP_SYS(waitid), 241 },
+    { SCMP_SYS(io_cancel), 241 },
+    { SCMP_SYS(io_setup), 241 },
+    { SCMP_SYS(io_destroy), 241 },
+    { SCMP_SYS(arch_prctl), 240 }
+};
+
+static int process_list(scmp_filter_ctx *ctx,
+                        const struct QemuSeccompSyscall *list,
+                        unsigned int list_size)
 {
     int rc = 0;
     unsigned int i = 0;
-    scmp_filter_ctx ctx;
 
+    for (i = 0; i < list_size; i++) {
+        rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, list[i].num, 0);
+        if (rc < 0) {
+            goto seccomp_return;
+        }
+
+        rc = seccomp_syscall_priority(ctx, list[i].num,
+                                      list[i].priority);
+        if (rc < 0) {
+            goto seccomp_return;
+        }
+    }
+
+seccomp_return:
+    return rc;
+}
+
+int seccomp_start(int state)
+{
+    int rc = 0;
+    scmp_filter_ctx ctx;
     ctx = seccomp_init(SCMP_ACT_KILL);
     if (ctx == NULL) {
         goto seccomp_return;
     }
 
-    for (i = 0; i < ARRAY_SIZE(seccomp_whitelist); i++) {
-        rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, seccomp_whitelist[i].num, 0);
-        if (rc < 0) {
-            goto seccomp_return;
-        }
-        rc = seccomp_syscall_priority(ctx, seccomp_whitelist[i].num,
-                                      seccomp_whitelist[i].priority);
+    switch (state) {
+    case WHITELIST1:
+        rc = process_list(ctx, whitelist1, ARRAY_SIZE(whitelist1));
         if (rc < 0) {
             goto seccomp_return;
         }
+        break;
+    case WHITELIST2:
+        rc = process_list(ctx, whitelist2, ARRAY_SIZE(whitelist2));
+        break;
+    default:
+        rc = -1;
+        goto seccomp_return;
     }
 
     rc = seccomp_load(ctx);
diff --git a/vl.c b/vl.c
index dfbc071..6f562ae 100644
--- a/vl.c
+++ b/vl.c
@@ -1033,7 +1033,7 @@ static int parse_sandbox(QemuOpts *opts, void *opaque)
     /* FIXME: change this to true for 1.3 */
     if (qemu_opt_get_bool(opts, "enable", false)) {
 #ifdef CONFIG_SECCOMP
-        if (seccomp_start() < 0) {
+        if (seccomp_start(WHITELIST1) < 0) {
             qerror_report(ERROR_CLASS_GENERIC_ERROR,
                           "failed to install seccomp syscall filter in the kernel");
             return -1;
@@ -1772,6 +1772,12 @@ void vm_start(void)
         runstate_set(RUN_STATE_RUNNING);
         vm_state_notify(1, RUN_STATE_RUNNING);
         resume_all_vcpus();
+#ifdef CONFIG_SECCOMP
+        if (seccomp_start(WHITELIST2) < 0) {
+            qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                          "failed to install seccomp syscall filter in the kernel");
+        }
+#endif
         monitor_protocol_event(QEVENT_RESUME, NULL);
     }
 }
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH] seccomp: adding a second whitelist
  2013-08-29  1:04 [Qemu-devel] [PATCH] seccomp: adding a second whitelist Eduardo Otubo
@ 2013-08-29  8:34 ` Stefan Hajnoczi
  2013-08-29  8:56   ` Paolo Bonzini
  2013-08-30 14:21   ` Eduardo Otubo
  2013-08-29 12:56 ` Paul Moore
  1 sibling, 2 replies; 19+ messages in thread
From: Stefan Hajnoczi @ 2013-08-29  8:34 UTC (permalink / raw)
  To: Eduardo Otubo; +Cc: pmoore, coreyb, wad, qemu-devel

On Wed, Aug 28, 2013 at 10:04:32PM -0300, Eduardo Otubo wrote:
> Now there's a second whitelist, right before the vcpu starts. The second
> whitelist is the same as the first one, except for exec() and select().

-netdev tap,downscript=/path/to/script requires exec() in the QEMU
shutdown code path.  Will this work with seccomp?

Stefan

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH] seccomp: adding a second whitelist
  2013-08-29  8:34 ` Stefan Hajnoczi
@ 2013-08-29  8:56   ` Paolo Bonzini
  2013-08-30 14:22     ` Eduardo Otubo
  2013-08-30 14:21   ` Eduardo Otubo
  1 sibling, 1 reply; 19+ messages in thread
From: Paolo Bonzini @ 2013-08-29  8:56 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: pmoore, coreyb, wad, qemu-devel, Eduardo Otubo

Il 29/08/2013 10:34, Stefan Hajnoczi ha scritto:
> On Wed, Aug 28, 2013 at 10:04:32PM -0300, Eduardo Otubo wrote:
>> Now there's a second whitelist, right before the vcpu starts. The second
>> whitelist is the same as the first one, except for exec() and select().
> 
> -netdev tap,downscript=/path/to/script requires exec() in the QEMU
> shutdown code path.  Will this work with seccomp?

It won't by design (seccomp is supposed to run with file descriptor
passing).

However, removing select() seems a bit risky.  We cannot exclude that
external libraries are not using it instead of, say, poll.

BTW, recent QEMU is using ppoll instead of poll; does the whitelist
require an update?

Paolo

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH] seccomp: adding a second whitelist
  2013-08-29  1:04 [Qemu-devel] [PATCH] seccomp: adding a second whitelist Eduardo Otubo
  2013-08-29  8:34 ` Stefan Hajnoczi
@ 2013-08-29 12:56 ` Paul Moore
  2013-08-30 14:27   ` Eduardo Otubo
  1 sibling, 1 reply; 19+ messages in thread
From: Paul Moore @ 2013-08-29 12:56 UTC (permalink / raw)
  To: Eduardo Otubo; +Cc: coreyb, wad, qemu-devel

On Wednesday, August 28, 2013 10:04:32 PM Eduardo Otubo wrote:
> Now there's a second whitelist, right before the vcpu starts. The second
> whitelist is the same as the first one, except for exec() and select().
> 
> Signed-off-by: Eduardo Otubo <otubo@linux.vnet.ibm.com>

We talked about this in a previous thread, but as a reminder, the kernel's 
seccomp BPF filter works by executing all of the loaded filters for each 
syscall and taking the least permissive action for all of the results.  In 
other words, if one filter returns ALLOW for a given syscall and another 
filter returns KILL, the kernel will select the KILL action for the syscall.

With that in mind, I think the best option is to keep the existing whitelist 
and instead of creating a second whitelist, create a second *blacklist* that 
removes the syscalls you don't want to allow anymore, e.g. exec() and 
select().  This approach should be easier to maintain and would result in less 
overhead in the kernel's seccomp evaluator (the blacklist filter would be much 
smaller than a second whitelist filter).

-Paul

-- 
paul moore
security and virtualization @ redhat

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH] seccomp: adding a second whitelist
  2013-08-29  8:34 ` Stefan Hajnoczi
  2013-08-29  8:56   ` Paolo Bonzini
@ 2013-08-30 14:21   ` Eduardo Otubo
  2013-08-30 15:23     ` Stefan Hajnoczi
  2013-09-03 18:02     ` Corey Bryant
  1 sibling, 2 replies; 19+ messages in thread
From: Eduardo Otubo @ 2013-08-30 14:21 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: pmoore, coreyb, wad, qemu-devel



On 08/29/2013 05:34 AM, Stefan Hajnoczi wrote:
> On Wed, Aug 28, 2013 at 10:04:32PM -0300, Eduardo Otubo wrote:
>> Now there's a second whitelist, right before the vcpu starts. The second
>> whitelist is the same as the first one, except for exec() and select().
>
> -netdev tap,downscript=/path/to/script requires exec() in the QEMU
> shutdown code path.  Will this work with seccomp?

I actually don't know, but I'll test that as well. Can you run a test 
with this patch and -netdev? I mean, if you're pointing that out you 
might have a scenario already setup, right?

Thanks!

>
> Stefan
>

-- 
Eduardo Otubo
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH] seccomp: adding a second whitelist
  2013-08-29  8:56   ` Paolo Bonzini
@ 2013-08-30 14:22     ` Eduardo Otubo
  0 siblings, 0 replies; 19+ messages in thread
From: Eduardo Otubo @ 2013-08-30 14:22 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: pmoore, Stefan Hajnoczi, coreyb, wad, qemu-devel



On 08/29/2013 05:56 AM, Paolo Bonzini wrote:
> Il 29/08/2013 10:34, Stefan Hajnoczi ha scritto:
>> On Wed, Aug 28, 2013 at 10:04:32PM -0300, Eduardo Otubo wrote:
>>> Now there's a second whitelist, right before the vcpu starts. The second
>>> whitelist is the same as the first one, except for exec() and select().
>>
>> -netdev tap,downscript=/path/to/script requires exec() in the QEMU
>> shutdown code path.  Will this work with seccomp?
>
> It won't by design (seccomp is supposed to run with file descriptor
> passing).
>
> However, removing select() seems a bit risky.  We cannot exclude that
> external libraries are not using it instead of, say, poll.
>
> BTW, recent QEMU is using ppoll instead of poll; does the whitelist
> require an update?

It might need some update, yes. I'll run some other tests with this 
specific syscall and, if needed, I'll send another patch for the 
whitelist update.

Thanks for pointing that, Paolo.

>
> Paolo
>

-- 
Eduardo Otubo
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH] seccomp: adding a second whitelist
  2013-08-29 12:56 ` Paul Moore
@ 2013-08-30 14:27   ` Eduardo Otubo
  2013-08-30 15:32     ` Paul Moore
  0 siblings, 1 reply; 19+ messages in thread
From: Eduardo Otubo @ 2013-08-30 14:27 UTC (permalink / raw)
  To: Paul Moore; +Cc: coreyb, wad, qemu-devel



On 08/29/2013 09:56 AM, Paul Moore wrote:
> On Wednesday, August 28, 2013 10:04:32 PM Eduardo Otubo wrote:
>> Now there's a second whitelist, right before the vcpu starts. The second
>> whitelist is the same as the first one, except for exec() and select().
>>
>> Signed-off-by: Eduardo Otubo <otubo@linux.vnet.ibm.com>
>
> We talked about this in a previous thread, but as a reminder, the kernel's
> seccomp BPF filter works by executing all of the loaded filters for each
> syscall and taking the least permissive action for all of the results.  In
> other words, if one filter returns ALLOW for a given syscall and another
> filter returns KILL, the kernel will select the KILL action for the syscall.
>
> With that in mind, I think the best option is to keep the existing whitelist
> and instead of creating a second whitelist, create a second *blacklist* that
> removes the syscalls you don't want to allow anymore, e.g. exec() and
> select().  This approach should be easier to maintain and would result in less
> overhead in the kernel's seccomp evaluator (the blacklist filter would be much
> smaller than a second whitelist filter).

You're correct. I was thinking in a whole other approach, but your point 
makes a lot more sense. As I mentioned on the IRC, I should call 
seccomp_init(SCMP_ACT_ALLOW) and seccomp_rule_add(ctx, SCMP_ACT_KILL, 
list[i].num, 0); is that correct?

Thanks,

-- 
Eduardo Otubo
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH] seccomp: adding a second whitelist
  2013-08-30 14:21   ` Eduardo Otubo
@ 2013-08-30 15:23     ` Stefan Hajnoczi
  2013-08-30 15:42       ` Paul Moore
  2013-09-03 18:02     ` Corey Bryant
  1 sibling, 1 reply; 19+ messages in thread
From: Stefan Hajnoczi @ 2013-08-30 15:23 UTC (permalink / raw)
  To: Eduardo Otubo; +Cc: Paul Moore, Corey Bryant, wad, qemu-devel

On Fri, Aug 30, 2013 at 4:21 PM, Eduardo Otubo <otubo@linux.vnet.ibm.com> wrote:
> On 08/29/2013 05:34 AM, Stefan Hajnoczi wrote:
>> On Wed, Aug 28, 2013 at 10:04:32PM -0300, Eduardo Otubo wrote:
>>>
>>> Now there's a second whitelist, right before the vcpu starts. The second
>>> whitelist is the same as the first one, except for exec() and select().
>>
>>
>> -netdev tap,downscript=/path/to/script requires exec() in the QEMU
>> shutdown code path.  Will this work with seccomp?
>
>
> I actually don't know, but I'll test that as well. Can you run a test with
> this patch and -netdev? I mean, if you're pointing that out you might have a
> scenario already setup, right?

I'm not having much luck running qemu.git/master with CONFIG_SECCOMP
on Fedora 19.  The GTK UI opens but I don't see the guest's display.

$ x86_64-softmmu/qemu-system-x86_64
[...GTK UI opens but QEMU is hung...]

strace shows the process is hung somehow and ps says it's <defunct>
although it never exited.

$ sudo cat /proc/5912/stack
[<ffffffff81061fda>] do_exit+0x6ca/0xa20
[<ffffffff810ef090>] __secure_computing+0xe0/0x240
[<ffffffff8101d722>] syscall_trace_enter+0x172/0x230
[<ffffffff816478c8>] tracesys+0x7e/0xe2
[<ffffffffffffffff>] 0xffffffffffffffff

Okay, so seccomp killed the process.

$ sudo cat /proc/5912/syscall
29 0x0 0x1000 0x380 0x7fffbeb49380 0x0 0x0 0x7fffbeb495b8 0x7f6b72402657

$ git grep '\<29\>' arch/x86/include/generated/uapi/asm/unistd_64.h
#define __NR_shmget 29

Now it needs syscall 30.  I guess the whitelist is only designed for a
specific invocation that you are testing?

BTW, I noticed a bug in your patch: WHITELIST1 is only enforced when
the sandbox enable=on option is set.  But after your patch WHITELIST2
is applied unconditionally - it should also be controlled by the
command-line option.

Stefan

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH] seccomp: adding a second whitelist
  2013-08-30 14:27   ` Eduardo Otubo
@ 2013-08-30 15:32     ` Paul Moore
  0 siblings, 0 replies; 19+ messages in thread
From: Paul Moore @ 2013-08-30 15:32 UTC (permalink / raw)
  To: Eduardo Otubo; +Cc: coreyb, wad, qemu-devel

On Friday, August 30, 2013 11:27:28 AM Eduardo Otubo wrote:
> On 08/29/2013 09:56 AM, Paul Moore wrote:
> > On Wednesday, August 28, 2013 10:04:32 PM Eduardo Otubo wrote:
> >> Now there's a second whitelist, right before the vcpu starts. The second
> >> whitelist is the same as the first one, except for exec() and select().
> >> 
> >> Signed-off-by: Eduardo Otubo <otubo@linux.vnet.ibm.com>
> > 
> > We talked about this in a previous thread, but as a reminder, the kernel's
> > seccomp BPF filter works by executing all of the loaded filters for each
> > syscall and taking the least permissive action for all of the results.  In
> > other words, if one filter returns ALLOW for a given syscall and another
> > filter returns KILL, the kernel will select the KILL action for the
> > syscall.
> > 
> > With that in mind, I think the best option is to keep the existing
> > whitelist and instead of creating a second whitelist, create a second
> > *blacklist* that removes the syscalls you don't want to allow anymore,
> > e.g. exec() and select().  This approach should be easier to maintain and
> > would result in less overhead in the kernel's seccomp evaluator (the
> > blacklist filter would be much smaller than a second whitelist filter).
> 
> You're correct. I was thinking in a whole other approach, but your point
> makes a lot more sense. As I mentioned on the IRC, I should call
> seccomp_init(SCMP_ACT_ALLOW) and seccomp_rule_add(ctx, SCMP_ACT_KILL,
> list[i].num, 0); is that correct?

Yes, just basically swap the actions.

Also, as an FYI, while I may be in the IRC room, I typically don't actually 
monitor the room unless you direct a comment at me (it starts blinking and 
grabs my attention).

-- 
paul moore
security and virtualization @ redhat

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH] seccomp: adding a second whitelist
  2013-08-30 15:23     ` Stefan Hajnoczi
@ 2013-08-30 15:42       ` Paul Moore
  2013-09-02  9:05         ` Stefan Hajnoczi
  0 siblings, 1 reply; 19+ messages in thread
From: Paul Moore @ 2013-08-30 15:42 UTC (permalink / raw)
  To: Stefan Hajnoczi; +Cc: Corey Bryant, wad, qemu-devel, Eduardo Otubo

On Friday, August 30, 2013 05:23:45 PM Stefan Hajnoczi wrote:
> On Fri, Aug 30, 2013 at 4:21 PM, Eduardo Otubo <otubo@linux.vnet.ibm.com> 
wrote:
> > On 08/29/2013 05:34 AM, Stefan Hajnoczi wrote:
> >> On Wed, Aug 28, 2013 at 10:04:32PM -0300, Eduardo Otubo wrote:
> >>> Now there's a second whitelist, right before the vcpu starts. The second
> >>> whitelist is the same as the first one, except for exec() and select().
> >> 
> >> -netdev tap,downscript=/path/to/script requires exec() in the QEMU
> >> shutdown code path.  Will this work with seccomp?
> > 
> > I actually don't know, but I'll test that as well. Can you run a test with
> > this patch and -netdev? I mean, if you're pointing that out you might have
> > a scenario already setup, right?
> 
> I'm not having much luck running qemu.git/master with CONFIG_SECCOMP
> on Fedora 19.  The GTK UI opens but I don't see the guest's display.
> 
> $ x86_64-softmmu/qemu-system-x86_64
> [...GTK UI opens but QEMU is hung...]
> 
> strace shows the process is hung somehow and ps says it's <defunct>
> although it never exited.
> 
> $ sudo cat /proc/5912/stack
> [<ffffffff81061fda>] do_exit+0x6ca/0xa20
> [<ffffffff810ef090>] __secure_computing+0xe0/0x240
> [<ffffffff8101d722>] syscall_trace_enter+0x172/0x230
> [<ffffffff816478c8>] tracesys+0x7e/0xe2
> [<ffffffffffffffff>] 0xffffffffffffffff
> 
> Okay, so seccomp killed the process.
> 
> $ sudo cat /proc/5912/syscall
> 29 0x0 0x1000 0x380 0x7fffbeb49380 0x0 0x0 0x7fffbeb495b8 0x7f6b72402657
> 
> $ git grep '\<29\>' arch/x86/include/generated/uapi/asm/unistd_64.h
> #define __NR_shmget 29
> 
> Now it needs syscall 30.  I guess the whitelist is only designed for a
> specific invocation that you are testing?

For future reference, it doesn't need to be that hard to identify when seccomp 
has killed a process.  If you're running audit go ahead and check the audit 
log:

 # ausearch -m SECCOMP
 ----
 time->Fri Aug 30 11:37:46 2013
 type=SECCOMP msg=audit(1377877066.414:64): auid=0 uid=0 gid=0 ses=1
 subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 pid=3787
 comm="20-live-basic_d" sig=31 syscall=2 compat=0 ip=0x3a27ae6570 code=0x0

... and notice the 'syscall' field which in this case happens to be '2'.  If 
you have the 'scmp_sys_resolver' tool installed on your system (libseccomp-
devel >= 2.1.0 on Fedora) you can then resolve the syscall number:

 # scmp_sys_resolver 2
 open

It is also worth mentioning that while scmp_sys_resolver resolves syscalls for 
the native architecture by default, it can resolve for any of the 
architectures that libseccomp supports, see the manpage for details 
(currently: x86, x86_64, x32, and arm).

-- 
paul moore
security and virtualization @ redhat

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH] seccomp: adding a second whitelist
  2013-08-30 15:42       ` Paul Moore
@ 2013-09-02  9:05         ` Stefan Hajnoczi
  0 siblings, 0 replies; 19+ messages in thread
From: Stefan Hajnoczi @ 2013-09-02  9:05 UTC (permalink / raw)
  To: Paul Moore; +Cc: Corey Bryant, wad, qemu-devel, Eduardo Otubo

On Fri, Aug 30, 2013 at 11:42:34AM -0400, Paul Moore wrote:
> On Friday, August 30, 2013 05:23:45 PM Stefan Hajnoczi wrote:
> > On Fri, Aug 30, 2013 at 4:21 PM, Eduardo Otubo <otubo@linux.vnet.ibm.com> 
> wrote:
> > > On 08/29/2013 05:34 AM, Stefan Hajnoczi wrote:
> > >> On Wed, Aug 28, 2013 at 10:04:32PM -0300, Eduardo Otubo wrote:
> > >>> Now there's a second whitelist, right before the vcpu starts. The second
> > >>> whitelist is the same as the first one, except for exec() and select().
> > >> 
> > >> -netdev tap,downscript=/path/to/script requires exec() in the QEMU
> > >> shutdown code path.  Will this work with seccomp?
> > > 
> > > I actually don't know, but I'll test that as well. Can you run a test with
> > > this patch and -netdev? I mean, if you're pointing that out you might have
> > > a scenario already setup, right?
> > 
> > I'm not having much luck running qemu.git/master with CONFIG_SECCOMP
> > on Fedora 19.  The GTK UI opens but I don't see the guest's display.
> > 
> > $ x86_64-softmmu/qemu-system-x86_64
> > [...GTK UI opens but QEMU is hung...]
> > 
> > strace shows the process is hung somehow and ps says it's <defunct>
> > although it never exited.
> > 
> > $ sudo cat /proc/5912/stack
> > [<ffffffff81061fda>] do_exit+0x6ca/0xa20
> > [<ffffffff810ef090>] __secure_computing+0xe0/0x240
> > [<ffffffff8101d722>] syscall_trace_enter+0x172/0x230
> > [<ffffffff816478c8>] tracesys+0x7e/0xe2
> > [<ffffffffffffffff>] 0xffffffffffffffff
> > 
> > Okay, so seccomp killed the process.
> > 
> > $ sudo cat /proc/5912/syscall
> > 29 0x0 0x1000 0x380 0x7fffbeb49380 0x0 0x0 0x7fffbeb495b8 0x7f6b72402657
> > 
> > $ git grep '\<29\>' arch/x86/include/generated/uapi/asm/unistd_64.h
> > #define __NR_shmget 29
> > 
> > Now it needs syscall 30.  I guess the whitelist is only designed for a
> > specific invocation that you are testing?
> 
> For future reference, it doesn't need to be that hard to identify when seccomp 
> has killed a process.  If you're running audit go ahead and check the audit 
> log:
> 
>  # ausearch -m SECCOMP
>  ----
>  time->Fri Aug 30 11:37:46 2013
>  type=SECCOMP msg=audit(1377877066.414:64): auid=0 uid=0 gid=0 ses=1
>  subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 pid=3787
>  comm="20-live-basic_d" sig=31 syscall=2 compat=0 ip=0x3a27ae6570 code=0x0
> 
> ... and notice the 'syscall' field which in this case happens to be '2'.  If 
> you have the 'scmp_sys_resolver' tool installed on your system (libseccomp-
> devel >= 2.1.0 on Fedora) you can then resolve the syscall number:
> 
>  # scmp_sys_resolver 2
>  open
> 
> It is also worth mentioning that while scmp_sys_resolver resolves syscalls for 
> the native architecture by default, it can resolve for any of the 
> architectures that libseccomp supports, see the manpage for details 
> (currently: x86, x86_64, x32, and arm).

Useful tips, thanks.

Stefan

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH] seccomp: adding a second whitelist
  2013-08-30 14:21   ` Eduardo Otubo
  2013-08-30 15:23     ` Stefan Hajnoczi
@ 2013-09-03 18:02     ` Corey Bryant
  2013-09-03 18:08       ` Corey Bryant
  2013-09-03 20:05       ` Eduardo Otubo
  1 sibling, 2 replies; 19+ messages in thread
From: Corey Bryant @ 2013-09-03 18:02 UTC (permalink / raw)
  To: Eduardo Otubo; +Cc: pmoore, Stefan Hajnoczi, wad, qemu-devel



On 08/30/2013 10:21 AM, Eduardo Otubo wrote:
>
>
> On 08/29/2013 05:34 AM, Stefan Hajnoczi wrote:
>> On Wed, Aug 28, 2013 at 10:04:32PM -0300, Eduardo Otubo wrote:
>>> Now there's a second whitelist, right before the vcpu starts. The second
>>> whitelist is the same as the first one, except for exec() and select().
>>
>> -netdev tap,downscript=/path/to/script requires exec() in the QEMU
>> shutdown code path.  Will this work with seccomp?
>
> I actually don't know, but I'll test that as well. Can you run a test
> with this patch and -netdev? I mean, if you're pointing that out you
> might have a scenario already setup, right?
>
> Thanks!
>

This uses exec() in net/tap.c.

I think if we're going to introduce a sandbox environment that restricts 
existing QEMU behavior, then we have to introduce a new argument to the 
-sandbox option.  So for example, "-sandbox on" would continue to use 
the whitelist that allows everything in QEMU to work (or at least it 
should :).  And something like "-sandbox on,strict=on" would use the 
whitelist + blacklist.

If this is acceptable though, then I wonder how we could go about adding 
new syscalls to the blacklist in future QEMU releases without regressing 
"-sandbox on,strict=on".

By the way, are any test buckets running regularly with -sandbox on?

-- 
Regards,
Corey Bryant

>>
>> Stefan
>>
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH] seccomp: adding a second whitelist
  2013-09-03 18:02     ` Corey Bryant
@ 2013-09-03 18:08       ` Corey Bryant
  2013-09-03 18:21         ` Paul Moore
  2013-09-03 20:05       ` Eduardo Otubo
  1 sibling, 1 reply; 19+ messages in thread
From: Corey Bryant @ 2013-09-03 18:08 UTC (permalink / raw)
  To: Eduardo Otubo; +Cc: pmoore, Stefan Hajnoczi, wad, qemu-devel



On 09/03/2013 02:02 PM, Corey Bryant wrote:
>
>
> On 08/30/2013 10:21 AM, Eduardo Otubo wrote:
>>
>>
>> On 08/29/2013 05:34 AM, Stefan Hajnoczi wrote:
>>> On Wed, Aug 28, 2013 at 10:04:32PM -0300, Eduardo Otubo wrote:
>>>> Now there's a second whitelist, right before the vcpu starts. The
>>>> second
>>>> whitelist is the same as the first one, except for exec() and select().
>>>
>>> -netdev tap,downscript=/path/to/script requires exec() in the QEMU
>>> shutdown code path.  Will this work with seccomp?
>>
>> I actually don't know, but I'll test that as well. Can you run a test
>> with this patch and -netdev? I mean, if you're pointing that out you
>> might have a scenario already setup, right?
>>
>> Thanks!
>>
>
> This uses exec() in net/tap.c.
>
> I think if we're going to introduce a sandbox environment that restricts
> existing QEMU behavior, then we have to introduce a new argument to the
> -sandbox option.  So for example, "-sandbox on" would continue to use
> the whitelist that allows everything in QEMU to work (or at least it
> should :).  And something like "-sandbox on,strict=on" would use the
> whitelist + blacklist.
>
> If this is acceptable though, then I wonder how we could go about adding
> new syscalls to the blacklist in future QEMU releases without regressing
> "-sandbox on,strict=on".

Maybe a better approach is to provide support that allows libvirt to 
define the blacklist and pass it to QEMU?

>
> By the way, are any test buckets running regularly with -sandbox on?
>

-- 
Regards,
Corey Bryant

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH] seccomp: adding a second whitelist
  2013-09-03 18:08       ` Corey Bryant
@ 2013-09-03 18:21         ` Paul Moore
  2013-09-03 18:23           ` Corey Bryant
  2013-09-03 20:07           ` Eduardo Otubo
  0 siblings, 2 replies; 19+ messages in thread
From: Paul Moore @ 2013-09-03 18:21 UTC (permalink / raw)
  To: Corey Bryant, Eduardo Otubo; +Cc: Stefan Hajnoczi, wad, qemu-devel

On Tuesday, September 03, 2013 02:08:28 PM Corey Bryant wrote:
> On 09/03/2013 02:02 PM, Corey Bryant wrote:
> > On 08/30/2013 10:21 AM, Eduardo Otubo wrote:
> >> On 08/29/2013 05:34 AM, Stefan Hajnoczi wrote:
> >>> On Wed, Aug 28, 2013 at 10:04:32PM -0300, Eduardo Otubo wrote:
> >>>> Now there's a second whitelist, right before the vcpu starts. The
> >>>> second
> >>>> whitelist is the same as the first one, except for exec() and select().
> >>> 
> >>> -netdev tap,downscript=/path/to/script requires exec() in the QEMU
> >>> shutdown code path.  Will this work with seccomp?
> >> 
> >> I actually don't know, but I'll test that as well. Can you run a test
> >> with this patch and -netdev? I mean, if you're pointing that out you
> >> might have a scenario already setup, right?
> >> 
> >> Thanks!
> > 
> > This uses exec() in net/tap.c.
> > 
> > I think if we're going to introduce a sandbox environment that restricts
> > existing QEMU behavior, then we have to introduce a new argument to the
> > -sandbox option.  So for example, "-sandbox on" would continue to use
> > the whitelist that allows everything in QEMU to work (or at least it
> > should :).  And something like "-sandbox on,strict=on" would use the
> > whitelist + blacklist.
> > 
> > If this is acceptable though, then I wonder how we could go about adding
> > new syscalls to the blacklist in future QEMU releases without regressing
> > "-sandbox on,strict=on".
> 
> Maybe a better approach is to provide support that allows libvirt to
> define the blacklist and pass it to QEMU?

FYI: I didn't want to mention this until I had some patches ready to post, but 
I'm currently working on adding syscall filtering, via libseccomp, to libvirt.  
I hope to get an initial RFC-quality patch out "soon".

-- 
paul moore
security and virtualization @ redhat

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH] seccomp: adding a second whitelist
  2013-09-03 18:21         ` Paul Moore
@ 2013-09-03 18:23           ` Corey Bryant
  2013-09-03 20:07           ` Eduardo Otubo
  1 sibling, 0 replies; 19+ messages in thread
From: Corey Bryant @ 2013-09-03 18:23 UTC (permalink / raw)
  To: Paul Moore; +Cc: Stefan Hajnoczi, wad, qemu-devel, Eduardo Otubo



On 09/03/2013 02:21 PM, Paul Moore wrote:
> On Tuesday, September 03, 2013 02:08:28 PM Corey Bryant wrote:
>> On 09/03/2013 02:02 PM, Corey Bryant wrote:
>>> On 08/30/2013 10:21 AM, Eduardo Otubo wrote:
>>>> On 08/29/2013 05:34 AM, Stefan Hajnoczi wrote:
>>>>> On Wed, Aug 28, 2013 at 10:04:32PM -0300, Eduardo Otubo wrote:
>>>>>> Now there's a second whitelist, right before the vcpu starts. The
>>>>>> second
>>>>>> whitelist is the same as the first one, except for exec() and select().
>>>>>
>>>>> -netdev tap,downscript=/path/to/script requires exec() in the QEMU
>>>>> shutdown code path.  Will this work with seccomp?
>>>>
>>>> I actually don't know, but I'll test that as well. Can you run a test
>>>> with this patch and -netdev? I mean, if you're pointing that out you
>>>> might have a scenario already setup, right?
>>>>
>>>> Thanks!
>>>
>>> This uses exec() in net/tap.c.
>>>
>>> I think if we're going to introduce a sandbox environment that restricts
>>> existing QEMU behavior, then we have to introduce a new argument to the
>>> -sandbox option.  So for example, "-sandbox on" would continue to use
>>> the whitelist that allows everything in QEMU to work (or at least it
>>> should :).  And something like "-sandbox on,strict=on" would use the
>>> whitelist + blacklist.
>>>
>>> If this is acceptable though, then I wonder how we could go about adding
>>> new syscalls to the blacklist in future QEMU releases without regressing
>>> "-sandbox on,strict=on".
>>
>> Maybe a better approach is to provide support that allows libvirt to
>> define the blacklist and pass it to QEMU?
>
> FYI: I didn't want to mention this until I had some patches ready to post, but
> I'm currently working on adding syscall filtering, via libseccomp, to libvirt.
> I hope to get an initial RFC-quality patch out "soon".
>

Great, looking forward to seeing them.

-- 
Regards,
Corey Bryant

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH] seccomp: adding a second whitelist
  2013-09-03 18:02     ` Corey Bryant
  2013-09-03 18:08       ` Corey Bryant
@ 2013-09-03 20:05       ` Eduardo Otubo
  2013-09-03 21:08         ` Corey Bryant
  1 sibling, 1 reply; 19+ messages in thread
From: Eduardo Otubo @ 2013-09-03 20:05 UTC (permalink / raw)
  To: Corey Bryant; +Cc: pmoore, Stefan Hajnoczi, wad, qemu-devel



On 09/03/2013 03:02 PM, Corey Bryant wrote:
>
>
> On 08/30/2013 10:21 AM, Eduardo Otubo wrote:
>>
>>
>> On 08/29/2013 05:34 AM, Stefan Hajnoczi wrote:
>>> On Wed, Aug 28, 2013 at 10:04:32PM -0300, Eduardo Otubo wrote:
>>>> Now there's a second whitelist, right before the vcpu starts. The
>>>> second
>>>> whitelist is the same as the first one, except for exec() and select().
>>>
>>> -netdev tap,downscript=/path/to/script requires exec() in the QEMU
>>> shutdown code path.  Will this work with seccomp?
>>
>> I actually don't know, but I'll test that as well. Can you run a test
>> with this patch and -netdev? I mean, if you're pointing that out you
>> might have a scenario already setup, right?
>>
>> Thanks!
>>
>
> This uses exec() in net/tap.c.
>
> I think if we're going to introduce a sandbox environment that restricts
> existing QEMU behavior, then we have to introduce a new argument to the
> -sandbox option.  So for example, "-sandbox on" would continue to use
> the whitelist that allows everything in QEMU to work (or at least it
> should :).  And something like "-sandbox on,strict=on" would use the
> whitelist + blacklist.

I think tihs is very reasonable. I'll working on implementing this 
options for v2.

>
> If this is acceptable though, then I wonder how we could go about adding
> new syscalls to the blacklist in future QEMU releases without regressing
> "-sandbox on,strict=on".
>
> By the way, are any test buckets running regularly with -sandbox on?

I am running tests with virt-test. Need to come up with a script that 
checks for unused syscalls, though.

-- 
Eduardo Otubo
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH] seccomp: adding a second whitelist
  2013-09-03 18:21         ` Paul Moore
  2013-09-03 18:23           ` Corey Bryant
@ 2013-09-03 20:07           ` Eduardo Otubo
  2013-09-03 21:49             ` Paul Moore
  1 sibling, 1 reply; 19+ messages in thread
From: Eduardo Otubo @ 2013-09-03 20:07 UTC (permalink / raw)
  To: Paul Moore; +Cc: Stefan Hajnoczi, Corey Bryant, wad, qemu-devel



On 09/03/2013 03:21 PM, Paul Moore wrote:
> On Tuesday, September 03, 2013 02:08:28 PM Corey Bryant wrote:
>> On 09/03/2013 02:02 PM, Corey Bryant wrote:
>>> On 08/30/2013 10:21 AM, Eduardo Otubo wrote:
>>>> On 08/29/2013 05:34 AM, Stefan Hajnoczi wrote:
>>>>> On Wed, Aug 28, 2013 at 10:04:32PM -0300, Eduardo Otubo wrote:
>>>>>> Now there's a second whitelist, right before the vcpu starts. The
>>>>>> second
>>>>>> whitelist is the same as the first one, except for exec() and select().
>>>>>
>>>>> -netdev tap,downscript=/path/to/script requires exec() in the QEMU
>>>>> shutdown code path.  Will this work with seccomp?
>>>>
>>>> I actually don't know, but I'll test that as well. Can you run a test
>>>> with this patch and -netdev? I mean, if you're pointing that out you
>>>> might have a scenario already setup, right?
>>>>
>>>> Thanks!
>>>
>>> This uses exec() in net/tap.c.
>>>
>>> I think if we're going to introduce a sandbox environment that restricts
>>> existing QEMU behavior, then we have to introduce a new argument to the
>>> -sandbox option.  So for example, "-sandbox on" would continue to use
>>> the whitelist that allows everything in QEMU to work (or at least it
>>> should :).  And something like "-sandbox on,strict=on" would use the
>>> whitelist + blacklist.
>>>
>>> If this is acceptable though, then I wonder how we could go about adding
>>> new syscalls to the blacklist in future QEMU releases without regressing
>>> "-sandbox on,strict=on".
>>
>> Maybe a better approach is to provide support that allows libvirt to
>> define the blacklist and pass it to QEMU?
>
> FYI: I didn't want to mention this until I had some patches ready to post, but
> I'm currently working on adding syscall filtering, via libseccomp, to libvirt.
> I hope to get an initial RFC-quality patch out "soon".
>

Paul, if you need any help with Qemu and/or testing, please let me know. 
I would be glad to help :) When you post your RFC to libvirt mailing 
list please add me as CC.

-- 
Eduardo Otubo
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH] seccomp: adding a second whitelist
  2013-09-03 20:05       ` Eduardo Otubo
@ 2013-09-03 21:08         ` Corey Bryant
  0 siblings, 0 replies; 19+ messages in thread
From: Corey Bryant @ 2013-09-03 21:08 UTC (permalink / raw)
  To: Eduardo Otubo; +Cc: pmoore, Stefan Hajnoczi, wad, qemu-devel



On 09/03/2013 04:05 PM, Eduardo Otubo wrote:
>
>
> On 09/03/2013 03:02 PM, Corey Bryant wrote:
>>
>>
>> On 08/30/2013 10:21 AM, Eduardo Otubo wrote:
>>>
>>>
>>> On 08/29/2013 05:34 AM, Stefan Hajnoczi wrote:
>>>> On Wed, Aug 28, 2013 at 10:04:32PM -0300, Eduardo Otubo wrote:
>>>>> Now there's a second whitelist, right before the vcpu starts. The
>>>>> second
>>>>> whitelist is the same as the first one, except for exec() and
>>>>> select().
>>>>
>>>> -netdev tap,downscript=/path/to/script requires exec() in the QEMU
>>>> shutdown code path.  Will this work with seccomp?
>>>
>>> I actually don't know, but I'll test that as well. Can you run a test
>>> with this patch and -netdev? I mean, if you're pointing that out you
>>> might have a scenario already setup, right?
>>>
>>> Thanks!
>>>
>>
>> This uses exec() in net/tap.c.
>>
>> I think if we're going to introduce a sandbox environment that restricts
>> existing QEMU behavior, then we have to introduce a new argument to the
>> -sandbox option.  So for example, "-sandbox on" would continue to use
>> the whitelist that allows everything in QEMU to work (or at least it
>> should :).  And something like "-sandbox on,strict=on" would use the
>> whitelist + blacklist.
>
> I think tihs is very reasonable. I'll working on implementing this
> options for v2.
>
>>
>> If this is acceptable though, then I wonder how we could go about adding
>> new syscalls to the blacklist in future QEMU releases without regressing
>> "-sandbox on,strict=on".
>>
>> By the way, are any test buckets running regularly with -sandbox on?
>
> I am running tests with virt-test. Need to come up with a script that
> checks for unused syscalls, though.
>

Would it be possible to submit a patch to turn on -sandbox for some/all 
QEMU virt-test tests?  That would enable regular regression runs that 
aren't dependent on you.  Plus it would be a good proof point of the 
QEMU seccomp support if the tests run successfully.

-- 
Regards,
Corey Bryant

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH] seccomp: adding a second whitelist
  2013-09-03 20:07           ` Eduardo Otubo
@ 2013-09-03 21:49             ` Paul Moore
  0 siblings, 0 replies; 19+ messages in thread
From: Paul Moore @ 2013-09-03 21:49 UTC (permalink / raw)
  To: Eduardo Otubo; +Cc: Stefan Hajnoczi, Corey Bryant, wad, qemu-devel

On Tuesday, September 03, 2013 05:07:53 PM Eduardo Otubo wrote:
> On 09/03/2013 03:21 PM, Paul Moore wrote:
> > On Tuesday, September 03, 2013 02:08:28 PM Corey Bryant wrote:
> >> On 09/03/2013 02:02 PM, Corey Bryant wrote:
> >>> On 08/30/2013 10:21 AM, Eduardo Otubo wrote:
> >>>> On 08/29/2013 05:34 AM, Stefan Hajnoczi wrote:
> >>>>> On Wed, Aug 28, 2013 at 10:04:32PM -0300, Eduardo Otubo wrote:
> >>>>>> Now there's a second whitelist, right before the vcpu starts. The
> >>>>>> second
> >>>>>> whitelist is the same as the first one, except for exec() and
> >>>>>> select().
> >>>>> 
> >>>>> -netdev tap,downscript=/path/to/script requires exec() in the QEMU
> >>>>> shutdown code path.  Will this work with seccomp?
> >>>> 
> >>>> I actually don't know, but I'll test that as well. Can you run a test
> >>>> with this patch and -netdev? I mean, if you're pointing that out you
> >>>> might have a scenario already setup, right?
> >>>> 
> >>>> Thanks!
> >>> 
> >>> This uses exec() in net/tap.c.
> >>> 
> >>> I think if we're going to introduce a sandbox environment that restricts
> >>> existing QEMU behavior, then we have to introduce a new argument to the
> >>> -sandbox option.  So for example, "-sandbox on" would continue to use
> >>> the whitelist that allows everything in QEMU to work (or at least it
> >>> should :).  And something like "-sandbox on,strict=on" would use the
> >>> whitelist + blacklist.
> >>> 
> >>> If this is acceptable though, then I wonder how we could go about adding
> >>> new syscalls to the blacklist in future QEMU releases without regressing
> >>> "-sandbox on,strict=on".
> >> 
> >> Maybe a better approach is to provide support that allows libvirt to
> >> define the blacklist and pass it to QEMU?
> > 
> > FYI: I didn't want to mention this until I had some patches ready to post,
> > but I'm currently working on adding syscall filtering, via libseccomp, to
> > libvirt. I hope to get an initial RFC-quality patch out "soon".
> 
> Paul, if you need any help with Qemu and/or testing, please let me know.
> I would be glad to help :) When you post your RFC to libvirt mailing
> list please add me as CC.

Of course, I appreciate all the help I can get.  We can chat a bit more once 
the patches are posted.

-- 
paul moore
security and virtualization @ redhat

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2013-09-03 21:50 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-08-29  1:04 [Qemu-devel] [PATCH] seccomp: adding a second whitelist Eduardo Otubo
2013-08-29  8:34 ` Stefan Hajnoczi
2013-08-29  8:56   ` Paolo Bonzini
2013-08-30 14:22     ` Eduardo Otubo
2013-08-30 14:21   ` Eduardo Otubo
2013-08-30 15:23     ` Stefan Hajnoczi
2013-08-30 15:42       ` Paul Moore
2013-09-02  9:05         ` Stefan Hajnoczi
2013-09-03 18:02     ` Corey Bryant
2013-09-03 18:08       ` Corey Bryant
2013-09-03 18:21         ` Paul Moore
2013-09-03 18:23           ` Corey Bryant
2013-09-03 20:07           ` Eduardo Otubo
2013-09-03 21:49             ` Paul Moore
2013-09-03 20:05       ` Eduardo Otubo
2013-09-03 21:08         ` Corey Bryant
2013-08-29 12:56 ` Paul Moore
2013-08-30 14:27   ` Eduardo Otubo
2013-08-30 15:32     ` Paul Moore

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.