All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCHv2 0/3] seccomp: adding blacklist support with command line
@ 2013-09-06 19:21 Eduardo Otubo
  2013-09-06 19:21 ` [Qemu-devel] [PATCHv2 1/3] seccomp: adding blacklist support Eduardo Otubo
                   ` (2 more replies)
  0 siblings, 3 replies; 25+ messages in thread
From: Eduardo Otubo @ 2013-09-06 19:21 UTC (permalink / raw)
  To: qemu-devel; +Cc: pmoore, coreyb, Eduardo Otubo

v2: The blacklist works exactly the opposite as the whitelist. I set the
default behaiour to SCMP_ACT_ALLOW and the exceptions to SCMP_ACT_KILL;
remembering it inherits the behavior from the previous installed whitelist.

This patch series also contain the command line support for this feature and
some minor fixes, all of them described in their own commit messages.

v1: The second whitelist is installed right before the vcpu starts, it contains all
the system calls the first one has except for exec() and select(), which are
big major syscalls that I could extensively test with virt-test and do not
cause any damage to the general execution.

The environment in which the second whitelist is installed seems to need less
system calls than the first, so the procedure here will be the same: Keep
testing with virt-test and get to the smallest list as possible.


Eduardo Otubo (3):
  seccomp: adding blacklist support
  seccomp: adding command line support for blacklist
  seccomp: general fixes

 include/sysemu/seccomp.h |  5 +++-
 qemu-options.hx          |  8 ++++---
 qemu-seccomp.c           | 60 +++++++++++++++++++++++++++++++++++++-----------
 vl.c                     | 31 +++++++++++++++++++++----
 4 files changed, 83 insertions(+), 21 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [Qemu-devel] [PATCHv2 1/3] seccomp: adding blacklist support
  2013-09-06 19:21 [Qemu-devel] [PATCHv2 0/3] seccomp: adding blacklist support with command line Eduardo Otubo
@ 2013-09-06 19:21 ` Eduardo Otubo
  2013-09-09  3:49   ` Lei Li
  2013-09-11 16:29   ` Corey Bryant
  2013-09-06 19:21 ` [Qemu-devel] [PATCHv2 2/3] seccomp: adding command line support for blacklist Eduardo Otubo
  2013-09-06 19:21 ` [Qemu-devel] [PATCHv3 3/3] seccomp: general fixes Eduardo Otubo
  2 siblings, 2 replies; 25+ messages in thread
From: Eduardo Otubo @ 2013-09-06 19:21 UTC (permalink / raw)
  To: qemu-devel; +Cc: pmoore, coreyb, Eduardo Otubo

Adding a system call blacklist right before the vcpus starts. This filter is
composed by the system calls that can't be executed after the guests are up.
This list should be refined as the whitelist is, with as much testing as we can
do using virt-test.

Signed-off-by: Eduardo Otubo <otubo@linux.vnet.ibm.com>
---
 include/sysemu/seccomp.h |  5 ++++-
 qemu-seccomp.c           | 57 ++++++++++++++++++++++++++++++++++++++----------
 vl.c                     | 16 +++++++++++++-
 3 files changed, 64 insertions(+), 14 deletions(-)

diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
index 1189fa2..551ad12 100644
--- a/include/sysemu/seccomp.h
+++ b/include/sysemu/seccomp.h
@@ -15,8 +15,11 @@
 #ifndef QEMU_SECCOMP_H
 #define QEMU_SECCOMP_H
 
+#define WHITELIST 0
+#define BLACKLIST 1
+
 #include <seccomp.h>
 #include "qemu/osdep.h"
 
-int seccomp_start(void);
+int seccomp_start(int state);
 #endif
diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index 37d38f8..5e85eb5 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -21,7 +21,7 @@ struct QemuSeccompSyscall {
     uint8_t priority;
 };
 
-static const struct QemuSeccompSyscall seccomp_whitelist[] = {
+static const struct QemuSeccompSyscall whitelist[] = {
     { SCMP_SYS(timer_settime), 255 },
     { SCMP_SYS(timer_gettime), 254 },
     { SCMP_SYS(futex), 253 },
@@ -221,32 +221,65 @@ static const struct QemuSeccompSyscall seccomp_whitelist[] = {
     { SCMP_SYS(arch_prctl), 240 }
 };
 
-int seccomp_start(void)
+static const struct QemuSeccompSyscall blacklist[] = {
+    { SCMP_SYS(execve), 255 }
+};
+
+static int process_list(scmp_filter_ctx *ctx,
+                        const struct QemuSeccompSyscall *list,
+                        unsigned int list_size, uint32_t action)
 {
     int rc = 0;
     unsigned int i = 0;
-    scmp_filter_ctx ctx;
 
-    ctx = seccomp_init(SCMP_ACT_KILL);
-    if (ctx == NULL) {
-        goto seccomp_return;
-    }
+    for (i = 0; i < list_size; i++) {
+        rc = seccomp_rule_add(ctx, action, list[i].num, 0);
+        if (rc < 0) {
+            goto seccomp_return;
+        }
 
-    for (i = 0; i < ARRAY_SIZE(seccomp_whitelist); i++) {
-        rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, seccomp_whitelist[i].num, 0);
+        rc = seccomp_syscall_priority(ctx, list[i].num,
+                                      list[i].priority);
         if (rc < 0) {
             goto seccomp_return;
         }
-        rc = seccomp_syscall_priority(ctx, seccomp_whitelist[i].num,
-                                      seccomp_whitelist[i].priority);
+    }
+
+seccomp_return:
+    return rc;
+}
+
+int seccomp_start(int list_type)
+{
+    int rc = 0;
+    scmp_filter_ctx ctx;
+
+    switch (list_type) {
+    case WHITELIST:
+        ctx = seccomp_init(SCMP_ACT_KILL);
+        if (ctx == NULL) {
+            goto seccomp_return;
+        }
+        rc = process_list(ctx, whitelist, ARRAY_SIZE(whitelist), SCMP_ACT_ALLOW);
         if (rc < 0) {
             goto seccomp_return;
         }
+        break;
+    case BLACKLIST:
+        ctx = seccomp_init(SCMP_ACT_ALLOW);
+        if (ctx == NULL) {
+            goto seccomp_return;
+        }
+        rc = process_list(ctx, blacklist, ARRAY_SIZE(blacklist), SCMP_ACT_KILL);
+        break;
+    default:
+        rc = -1;
+        goto seccomp_return;
     }
 
     rc = seccomp_load(ctx);
 
-  seccomp_return:
+seccomp_return:
     seccomp_release(ctx);
     return rc;
 }
diff --git a/vl.c b/vl.c
index b4b119a..02f7486 100644
--- a/vl.c
+++ b/vl.c
@@ -179,6 +179,7 @@ int main(int argc, char **argv)
 #define MAX_VIRTIO_CONSOLES 1
 #define MAX_SCLP_CONSOLES 1
 
+static bool enable_blacklist = false;
 static const char *data_dir[16];
 static int data_dir_idx;
 const char *bios_name = NULL;
@@ -1033,11 +1034,13 @@ static int parse_sandbox(QemuOpts *opts, void *opaque)
     /* FIXME: change this to true for 1.3 */
     if (qemu_opt_get_bool(opts, "enable", false)) {
 #ifdef CONFIG_SECCOMP
-        if (seccomp_start() < 0) {
+        if (seccomp_start(WHITELIST) < 0) {
             qerror_report(ERROR_CLASS_GENERIC_ERROR,
                           "failed to install seccomp syscall filter in the kernel");
             return -1;
         }
+
+        enable_blacklist = true;
 #else
         qerror_report(ERROR_CLASS_GENERIC_ERROR,
                       "sandboxing request but seccomp is not compiled into this build");
@@ -1765,12 +1768,23 @@ void vm_state_notify(int running, RunState state)
     }
 }
 
+static void install_seccomp_blacklist(void)
+{
+    if (enable_blacklist) {
+        if (seccomp_start(BLACKLIST) < 0) {
+            qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                          "failed to install seccomp syscall second level filter in the kernel");
+        }
+    }
+}
+
 void vm_start(void)
 {
     if (!runstate_is_running()) {
         cpu_enable_ticks();
         runstate_set(RUN_STATE_RUNNING);
         vm_state_notify(1, RUN_STATE_RUNNING);
+        install_seccomp_blacklist();
         resume_all_vcpus();
         monitor_protocol_event(QEVENT_RESUME, NULL);
     }
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Qemu-devel] [PATCHv2 2/3] seccomp: adding command line support for blacklist
  2013-09-06 19:21 [Qemu-devel] [PATCHv2 0/3] seccomp: adding blacklist support with command line Eduardo Otubo
  2013-09-06 19:21 ` [Qemu-devel] [PATCHv2 1/3] seccomp: adding blacklist support Eduardo Otubo
@ 2013-09-06 19:21 ` Eduardo Otubo
  2013-09-11 16:45   ` Corey Bryant
  2013-09-06 19:21 ` [Qemu-devel] [PATCHv3 3/3] seccomp: general fixes Eduardo Otubo
  2 siblings, 1 reply; 25+ messages in thread
From: Eduardo Otubo @ 2013-09-06 19:21 UTC (permalink / raw)
  To: qemu-devel; +Cc: pmoore, coreyb, Eduardo Otubo

New command line options for the seccomp blacklist feature:

 $ qemu -sandbox on[,strict=<on|off>]

The strict parameter will turn on or off the new system call blacklist

Signed-off-by: Eduardo Otubo <otubo@linux.vnet.ibm.com>
---
 qemu-options.hx |  8 +++++---
 vl.c            | 11 ++++++++++-
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/qemu-options.hx b/qemu-options.hx
index d15338e..05485e1 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2978,13 +2978,15 @@ Old param mode (ARM only).
 ETEXI
 
 DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
-    "-sandbox <arg>  Enable seccomp mode 2 system call filter (default 'off').\n",
+    "-sandbox <arg>  Enable seccomp mode 2 system call filter (default 'off').\n"
+    "-sandbox on[,strict=<arg>]\n"
+    "                Enable seccomp mode 2 system call second level filter (default 'off').\n",
     QEMU_ARCH_ALL)
 STEXI
-@item -sandbox @var{arg}
+@item -sandbox @var{arg}[,strict=@var{value}]
 @findex -sandbox
 Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will
-disable it.  The default is 'off'.
+disable it.  The default is 'off'. 'strict=on' will enable second level filter (default is 'off').
 ETEXI
 
 DEF("readconfig", HAS_ARG, QEMU_OPTION_readconfig,
diff --git a/vl.c b/vl.c
index 02f7486..909f685 100644
--- a/vl.c
+++ b/vl.c
@@ -329,6 +329,9 @@ static QemuOptsList qemu_sandbox_opts = {
         {
             .name = "enable",
             .type = QEMU_OPT_BOOL,
+        },{
+            .name = "strict",
+            .type = QEMU_OPT_STRING,
         },
         { /* end of list */ }
     },
@@ -1031,6 +1034,7 @@ static int bt_parse(const char *opt)
 
 static int parse_sandbox(QemuOpts *opts, void *opaque)
 {
+    const char * strict_value = NULL;
     /* FIXME: change this to true for 1.3 */
     if (qemu_opt_get_bool(opts, "enable", false)) {
 #ifdef CONFIG_SECCOMP
@@ -1040,7 +1044,12 @@ static int parse_sandbox(QemuOpts *opts, void *opaque)
             return -1;
         }
 
-        enable_blacklist = true;
+        strict_value = qemu_opt_get(opts, "strict");
+        if (strict_value) {
+            if (!strcmp(strict_value, "on")) {
+                enable_blacklist = true;
+            }
+        }
 #else
         qerror_report(ERROR_CLASS_GENERIC_ERROR,
                       "sandboxing request but seccomp is not compiled into this build");
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Qemu-devel] [PATCHv3 3/3] seccomp: general fixes
  2013-09-06 19:21 [Qemu-devel] [PATCHv2 0/3] seccomp: adding blacklist support with command line Eduardo Otubo
  2013-09-06 19:21 ` [Qemu-devel] [PATCHv2 1/3] seccomp: adding blacklist support Eduardo Otubo
  2013-09-06 19:21 ` [Qemu-devel] [PATCHv2 2/3] seccomp: adding command line support for blacklist Eduardo Otubo
@ 2013-09-06 19:21 ` Eduardo Otubo
  2013-09-11 16:56   ` Corey Bryant
  2 siblings, 1 reply; 25+ messages in thread
From: Eduardo Otubo @ 2013-09-06 19:21 UTC (permalink / raw)
  To: qemu-devel; +Cc: pmoore, coreyb, Eduardo Otubo

 1) On qemu-seccomp.c:255, the variable ctx was being used
uninitialized; now it's initialized with NULL and it's being checked at
the end of the function.

 2) Changed the name of the command line option from "enable" to
"sandbox" for a better understanding from user side.

Signed-off-by: Eduardo Otubo <otubo@linux.vnet.ibm.com>
---
 qemu-seccomp.c | 5 +++--
 vl.c           | 6 +++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index 5e85eb5..f39d636 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -252,7 +252,7 @@ seccomp_return:
 int seccomp_start(int list_type)
 {
     int rc = 0;
-    scmp_filter_ctx ctx;
+    scmp_filter_ctx ctx = NULL;
 
     switch (list_type) {
     case WHITELIST:
@@ -280,6 +280,7 @@ int seccomp_start(int list_type)
     rc = seccomp_load(ctx);
 
 seccomp_return:
-    seccomp_release(ctx);
+    if (!ctx)
+        seccomp_release(ctx);
     return rc;
 }
diff --git a/vl.c b/vl.c
index 909f685..129919d 100644
--- a/vl.c
+++ b/vl.c
@@ -323,11 +323,11 @@ static QemuOptsList qemu_rtc_opts = {
 
 static QemuOptsList qemu_sandbox_opts = {
     .name = "sandbox",
-    .implied_opt_name = "enable",
+    .implied_opt_name = "sandbox",
     .head = QTAILQ_HEAD_INITIALIZER(qemu_sandbox_opts.head),
     .desc = {
         {
-            .name = "enable",
+            .name = "sandbox",
             .type = QEMU_OPT_BOOL,
         },{
             .name = "strict",
@@ -1036,7 +1036,7 @@ static int parse_sandbox(QemuOpts *opts, void *opaque)
 {
     const char * strict_value = NULL;
     /* FIXME: change this to true for 1.3 */
-    if (qemu_opt_get_bool(opts, "enable", false)) {
+    if (qemu_opt_get_bool(opts, "sandbox", false)) {
 #ifdef CONFIG_SECCOMP
         if (seccomp_start(WHITELIST) < 0) {
             qerror_report(ERROR_CLASS_GENERIC_ERROR,
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCHv2 1/3] seccomp: adding blacklist support
  2013-09-06 19:21 ` [Qemu-devel] [PATCHv2 1/3] seccomp: adding blacklist support Eduardo Otubo
@ 2013-09-09  3:49   ` Lei Li
  2013-09-11 16:29   ` Corey Bryant
  1 sibling, 0 replies; 25+ messages in thread
From: Lei Li @ 2013-09-09  3:49 UTC (permalink / raw)
  To: Eduardo Otubo; +Cc: pmoore, coreyb, qemu-devel

On 09/07/2013 03:21 AM, Eduardo Otubo wrote:
> Adding a system call blacklist right before the vcpus starts. This filter is
> composed by the system calls that can't be executed after the guests are up.
> This list should be refined as the whitelist is, with as much testing as we can
> do using virt-test.
>
> Signed-off-by: Eduardo Otubo <otubo@linux.vnet.ibm.com>
> ---
>   include/sysemu/seccomp.h |  5 ++++-
>   qemu-seccomp.c           | 57 ++++++++++++++++++++++++++++++++++++++----------
>   vl.c                     | 16 +++++++++++++-
>   3 files changed, 64 insertions(+), 14 deletions(-)
>
> diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
> index 1189fa2..551ad12 100644
> --- a/include/sysemu/seccomp.h
> +++ b/include/sysemu/seccomp.h
> @@ -15,8 +15,11 @@
>   #ifndef QEMU_SECCOMP_H
>   #define QEMU_SECCOMP_H
>
> +#define WHITELIST 0
> +#define BLACKLIST 1
> +
>   #include <seccomp.h>
>   #include "qemu/osdep.h"
>
> -int seccomp_start(void);
> +int seccomp_start(int state);
>   #endif
> diff --git a/qemu-seccomp.c b/qemu-seccomp.c
> index 37d38f8..5e85eb5 100644
> --- a/qemu-seccomp.c
> +++ b/qemu-seccomp.c
> @@ -21,7 +21,7 @@ struct QemuSeccompSyscall {
>       uint8_t priority;
>   };
>
> -static const struct QemuSeccompSyscall seccomp_whitelist[] = {
> +static const struct QemuSeccompSyscall whitelist[] = {
>       { SCMP_SYS(timer_settime), 255 },
>       { SCMP_SYS(timer_gettime), 254 },
>       { SCMP_SYS(futex), 253 },
> @@ -221,32 +221,65 @@ static const struct QemuSeccompSyscall seccomp_whitelist[] = {
>       { SCMP_SYS(arch_prctl), 240 }
>   };
>
> -int seccomp_start(void)
> +static const struct QemuSeccompSyscall blacklist[] = {
> +    { SCMP_SYS(execve), 255 }
> +};
> +
> +static int process_list(scmp_filter_ctx *ctx,
> +                        const struct QemuSeccompSyscall *list,
> +                        unsigned int list_size, uint32_t action)
>   {
>       int rc = 0;
>       unsigned int i = 0;
> -    scmp_filter_ctx ctx;
>
> -    ctx = seccomp_init(SCMP_ACT_KILL);
> -    if (ctx == NULL) {
> -        goto seccomp_return;
> -    }
> +    for (i = 0; i < list_size; i++) {
> +        rc = seccomp_rule_add(ctx, action, list[i].num, 0);
> +        if (rc < 0) {
> +            goto seccomp_return;
> +        }
>
> -    for (i = 0; i < ARRAY_SIZE(seccomp_whitelist); i++) {
> -        rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, seccomp_whitelist[i].num, 0);
> +        rc = seccomp_syscall_priority(ctx, list[i].num,
> +                                      list[i].priority);
>           if (rc < 0) {
>               goto seccomp_return;
>           }
> -        rc = seccomp_syscall_priority(ctx, seccomp_whitelist[i].num,
> -                                      seccomp_whitelist[i].priority);
> +    }
> +
> +seccomp_return:
> +    return rc;
> +}
> +
> +int seccomp_start(int list_type)
> +{
> +    int rc = 0;
> +    scmp_filter_ctx ctx;
> +
> +    switch (list_type) {
> +    case WHITELIST:
> +        ctx = seccomp_init(SCMP_ACT_KILL);
> +        if (ctx == NULL) {
> +            goto seccomp_return;
> +        }
> +        rc = process_list(ctx, whitelist, ARRAY_SIZE(whitelist), SCMP_ACT_ALLOW);
>           if (rc < 0) {
>               goto seccomp_return;
>           }
> +        break;
> +    case BLACKLIST:
> +        ctx = seccomp_init(SCMP_ACT_ALLOW);
> +        if (ctx == NULL) {
> +            goto seccomp_return;
> +        }
> +        rc = process_list(ctx, blacklist, ARRAY_SIZE(blacklist), SCMP_ACT_KILL);
> +        break;
> +    default:
> +        rc = -1;
> +        goto seccomp_return;
>       }
>
>       rc = seccomp_load(ctx);
>
> -  seccomp_return:
> +seccomp_return:
>       seccomp_release(ctx);
>       return rc;
>   }
> diff --git a/vl.c b/vl.c
> index b4b119a..02f7486 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -179,6 +179,7 @@ int main(int argc, char **argv)
>   #define MAX_VIRTIO_CONSOLES 1
>   #define MAX_SCLP_CONSOLES 1
>
> +static bool enable_blacklist = false;
>   static const char *data_dir[16];
>   static int data_dir_idx;
>   const char *bios_name = NULL;
> @@ -1033,11 +1034,13 @@ static int parse_sandbox(QemuOpts *opts, void *opaque)
>       /* FIXME: change this to true for 1.3 */
>       if (qemu_opt_get_bool(opts, "enable", false)) {
>   #ifdef CONFIG_SECCOMP
> -        if (seccomp_start() < 0) {
> +        if (seccomp_start(WHITELIST) < 0) {
>               qerror_report(ERROR_CLASS_GENERIC_ERROR,
>                             "failed to install seccomp syscall filter in the kernel");
>               return -1;
>           }
> +
> +        enable_blacklist = true;
>   #else
>           qerror_report(ERROR_CLASS_GENERIC_ERROR,
>                         "sandboxing request but seccomp is not compiled into this build");
> @@ -1765,12 +1768,23 @@ void vm_state_notify(int running, RunState state)
>       }
>   }
>
> +static void install_seccomp_blacklist(void)
> +{
> +    if (enable_blacklist) {
> +        if (seccomp_start(BLACKLIST) < 0) {
> +            qerror_report(ERROR_CLASS_GENERIC_ERROR,
> +                          "failed to install seccomp syscall second level filter in the kernel");
> +        }
> +    }
> +}
> +
>   void vm_start(void)
>   {
>       if (!runstate_is_running()) {
>           cpu_enable_ticks();
>           runstate_set(RUN_STATE_RUNNING);
>           vm_state_notify(1, RUN_STATE_RUNNING);
> +        install_seccomp_blacklist();

Hi Eduardo,

Looks good to me, especially the implementation of blacklist than the previous
one that the second whitelist is the same as the first one except that two
system calls.

Just one question:
If the seccomp_start(WHITELIST) failed, it will lead to the QEMU exit(1).
But seems that if this seccomp_start(BLACKLIST) failed, it won't, just report
an error that the second level filter in kernel failed to install.
Is this the expected behaver?
  

>           resume_all_vcpus();
>           monitor_protocol_event(QEVENT_RESUME, NULL);
>       }


-- 
Lei

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCHv2 1/3] seccomp: adding blacklist support
  2013-09-06 19:21 ` [Qemu-devel] [PATCHv2 1/3] seccomp: adding blacklist support Eduardo Otubo
  2013-09-09  3:49   ` Lei Li
@ 2013-09-11 16:29   ` Corey Bryant
  1 sibling, 0 replies; 25+ messages in thread
From: Corey Bryant @ 2013-09-11 16:29 UTC (permalink / raw)
  To: Eduardo Otubo; +Cc: pmoore, qemu-devel



On 09/06/2013 03:21 PM, Eduardo Otubo wrote:
> Adding a system call blacklist right before the vcpus starts. This filter is
> composed by the system calls that can't be executed after the guests are up.
> This list should be refined as the whitelist is, with as much testing as we can
> do using virt-test.

It would be useful to point out in the commit message and maybe also in 
the code comments that the blacklist essentially reduces the previously 
installed whitelist.

>
> Signed-off-by: Eduardo Otubo <otubo@linux.vnet.ibm.com>
> ---
>   include/sysemu/seccomp.h |  5 ++++-
>   qemu-seccomp.c           | 57 ++++++++++++++++++++++++++++++++++++++----------
>   vl.c                     | 16 +++++++++++++-
>   3 files changed, 64 insertions(+), 14 deletions(-)
>
> diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
> index 1189fa2..551ad12 100644
> --- a/include/sysemu/seccomp.h
> +++ b/include/sysemu/seccomp.h
> @@ -15,8 +15,11 @@
>   #ifndef QEMU_SECCOMP_H
>   #define QEMU_SECCOMP_H
>
> +#define WHITELIST 0
> +#define BLACKLIST 1
> +
>   #include <seccomp.h>
>   #include "qemu/osdep.h"
>
> -int seccomp_start(void);
> +int seccomp_start(int state);

This parameter is more of a list type than a state.

>   #endif
> diff --git a/qemu-seccomp.c b/qemu-seccomp.c
> index 37d38f8..5e85eb5 100644
> --- a/qemu-seccomp.c
> +++ b/qemu-seccomp.c
> @@ -21,7 +21,7 @@ struct QemuSeccompSyscall {
>       uint8_t priority;
>   };
>
> -static const struct QemuSeccompSyscall seccomp_whitelist[] = {
> +static const struct QemuSeccompSyscall whitelist[] = {
>       { SCMP_SYS(timer_settime), 255 },
>       { SCMP_SYS(timer_gettime), 254 },
>       { SCMP_SYS(futex), 253 },
> @@ -221,32 +221,65 @@ static const struct QemuSeccompSyscall seccomp_whitelist[] = {
>       { SCMP_SYS(arch_prctl), 240 }
>   };
>
> -int seccomp_start(void)
> +static const struct QemuSeccompSyscall blacklist[] = {
> +    { SCMP_SYS(execve), 255 }
> +};
> +
> +static int process_list(scmp_filter_ctx *ctx,
> +                        const struct QemuSeccompSyscall *list,
> +                        unsigned int list_size, uint32_t action)
>   {
>       int rc = 0;
>       unsigned int i = 0;
> -    scmp_filter_ctx ctx;
>
> -    ctx = seccomp_init(SCMP_ACT_KILL);
> -    if (ctx == NULL) {
> -        goto seccomp_return;
> -    }
> +    for (i = 0; i < list_size; i++) {
> +        rc = seccomp_rule_add(ctx, action, list[i].num, 0);
> +        if (rc < 0) {
> +            goto seccomp_return;
> +        }
>
> -    for (i = 0; i < ARRAY_SIZE(seccomp_whitelist); i++) {
> -        rc = seccomp_rule_add(ctx, SCMP_ACT_ALLOW, seccomp_whitelist[i].num, 0);
> +        rc = seccomp_syscall_priority(ctx, list[i].num,
> +                                      list[i].priority);
>           if (rc < 0) {
>               goto seccomp_return;
>           }
> -        rc = seccomp_syscall_priority(ctx, seccomp_whitelist[i].num,
> -                                      seccomp_whitelist[i].priority);
> +    }
> +
> +seccomp_return:
> +    return rc;
> +}
> +
> +int seccomp_start(int list_type)
> +{
> +    int rc = 0;
> +    scmp_filter_ctx ctx;
> +
> +    switch (list_type) {
> +    case WHITELIST:
> +        ctx = seccomp_init(SCMP_ACT_KILL);
> +        if (ctx == NULL) {
> +            goto seccomp_return;
> +        }
> +        rc = process_list(ctx, whitelist, ARRAY_SIZE(whitelist), SCMP_ACT_ALLOW);
>           if (rc < 0) {
>               goto seccomp_return;
>           }
> +        break;
> +    case BLACKLIST:
> +        ctx = seccomp_init(SCMP_ACT_ALLOW);
> +        if (ctx == NULL) {
> +            goto seccomp_return;
> +        }
> +        rc = process_list(ctx, blacklist, ARRAY_SIZE(blacklist), SCMP_ACT_KILL);
> +        break;
> +    default:
> +        rc = -1;
> +        goto seccomp_return;
>       }
>
>       rc = seccomp_load(ctx);
>
> -  seccomp_return:
> +seccomp_return:
>       seccomp_release(ctx);
>       return rc;
>   }
> diff --git a/vl.c b/vl.c
> index b4b119a..02f7486 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -179,6 +179,7 @@ int main(int argc, char **argv)
>   #define MAX_VIRTIO_CONSOLES 1
>   #define MAX_SCLP_CONSOLES 1
>
> +static bool enable_blacklist = false;
>   static const char *data_dir[16];
>   static int data_dir_idx;
>   const char *bios_name = NULL;
> @@ -1033,11 +1034,13 @@ static int parse_sandbox(QemuOpts *opts, void *opaque)
>       /* FIXME: change this to true for 1.3 */
>       if (qemu_opt_get_bool(opts, "enable", false)) {
>   #ifdef CONFIG_SECCOMP
> -        if (seccomp_start() < 0) {
> +        if (seccomp_start(WHITELIST) < 0) {
>               qerror_report(ERROR_CLASS_GENERIC_ERROR,
>                             "failed to install seccomp syscall filter in the kernel");
>               return -1;
>           }
> +
> +        enable_blacklist = true;

The blacklist shouldn't be enabled until the command line option asks 
for it to be enabled in the next patch.

>   #else
>           qerror_report(ERROR_CLASS_GENERIC_ERROR,
>                         "sandboxing request but seccomp is not compiled into this build");
> @@ -1765,12 +1768,23 @@ void vm_state_notify(int running, RunState state)
>       }
>   }
>
> +static void install_seccomp_blacklist(void)
> +{
> +    if (enable_blacklist) {
> +        if (seccomp_start(BLACKLIST) < 0) {
> +            qerror_report(ERROR_CLASS_GENERIC_ERROR,
> +                          "failed to install seccomp syscall second level filter in the kernel");

As Lei Li mentioned, should you return -1 here like the whitelist 
failure path does?

> +        }
> +    }
> +}
> +
>   void vm_start(void)
>   {
>       if (!runstate_is_running()) {
>           cpu_enable_ticks();
>           runstate_set(RUN_STATE_RUNNING);
>           vm_state_notify(1, RUN_STATE_RUNNING);
> +        install_seccomp_blacklist();

The blacklist isn't installed when the incoming migration path is taken. 
  In other words, should install_seccomp_blacklist() be called before 
the "if (incoming)" check in main()?

>           resume_all_vcpus();
>           monitor_protocol_event(QEVENT_RESUME, NULL);
>       }
>

-- 
Regards,
Corey Bryant

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCHv2 2/3] seccomp: adding command line support for blacklist
  2013-09-06 19:21 ` [Qemu-devel] [PATCHv2 2/3] seccomp: adding command line support for blacklist Eduardo Otubo
@ 2013-09-11 16:45   ` Corey Bryant
  2013-09-11 16:49     ` Daniel P. Berrange
  0 siblings, 1 reply; 25+ messages in thread
From: Corey Bryant @ 2013-09-11 16:45 UTC (permalink / raw)
  To: Eduardo Otubo; +Cc: pmoore, qemu-devel



On 09/06/2013 03:21 PM, Eduardo Otubo wrote:
> New command line options for the seccomp blacklist feature:
>
>   $ qemu -sandbox on[,strict=<on|off>]
>
> The strict parameter will turn on or off the new system call blacklist

I mentioned this before but I'll say it again since I think it needs to 
be discussed.  Since this regresses support (it'll prevent -net bridge 
and -net tap from using execv) the concern I have with the strict=on|off 
option is whether or not we will have the flexibility to modify the 
blacklist once QEMU is released with this support.  Of course we should 
be able to add more syscalls to the blacklist as long as they don't 
regress QEMU functionality.  But if we want to add a syscall that does 
regress QEMU functionality, I think we'd have to add a new command line 
option, which doesn't seem desirable.

So a more flexible approach may be necessary.  Maybe the blacklist 
should be passed on the command line, which would enable it to be 
defined by libvirt and passed to QEMU.  I know Paul is working on 
something for libvirt so maybe that answers this question.

>
> Signed-off-by: Eduardo Otubo <otubo@linux.vnet.ibm.com>
> ---
>   qemu-options.hx |  8 +++++---
>   vl.c            | 11 ++++++++++-
>   2 files changed, 15 insertions(+), 4 deletions(-)
>
> diff --git a/qemu-options.hx b/qemu-options.hx
> index d15338e..05485e1 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -2978,13 +2978,15 @@ Old param mode (ARM only).
>   ETEXI
>
>   DEF("sandbox", HAS_ARG, QEMU_OPTION_sandbox, \
> -    "-sandbox <arg>  Enable seccomp mode 2 system call filter (default 'off').\n",
> +    "-sandbox <arg>  Enable seccomp mode 2 system call filter (default 'off').\n"
> +    "-sandbox on[,strict=<arg>]\n"
> +    "                Enable seccomp mode 2 system call second level filter (default 'off').\n",

Does this need to mention the QEMU features restricted by the blacklist?

>       QEMU_ARCH_ALL)
>   STEXI
> -@item -sandbox @var{arg}
> +@item -sandbox @var{arg}[,strict=@var{value}]
>   @findex -sandbox
>   Enable Seccomp mode 2 system call filter. 'on' will enable syscall filtering and 'off' will
> -disable it.  The default is 'off'.
> +disable it.  The default is 'off'. 'strict=on' will enable second level filter (default is 'off').

And here too?

>   ETEXI
>
>   DEF("readconfig", HAS_ARG, QEMU_OPTION_readconfig,
> diff --git a/vl.c b/vl.c
> index 02f7486..909f685 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -329,6 +329,9 @@ static QemuOptsList qemu_sandbox_opts = {
>           {
>               .name = "enable",
>               .type = QEMU_OPT_BOOL,
> +        },{
> +            .name = "strict",
> +            .type = QEMU_OPT_STRING,
>           },
>           { /* end of list */ }
>       },
> @@ -1031,6 +1034,7 @@ static int bt_parse(const char *opt)
>
>   static int parse_sandbox(QemuOpts *opts, void *opaque)
>   {
> +    const char * strict_value = NULL;
>       /* FIXME: change this to true for 1.3 */
>       if (qemu_opt_get_bool(opts, "enable", false)) {
>   #ifdef CONFIG_SECCOMP
> @@ -1040,7 +1044,12 @@ static int parse_sandbox(QemuOpts *opts, void *opaque)
>               return -1;
>           }
>
> -        enable_blacklist = true;
> +        strict_value = qemu_opt_get(opts, "strict");
> +        if (strict_value) {
> +            if (!strcmp(strict_value, "on")) {
> +                enable_blacklist = true;
> +            }
> +        }
>   #else
>           qerror_report(ERROR_CLASS_GENERIC_ERROR,
>                         "sandboxing request but seccomp is not compiled into this build");
>

-- 
Regards,
Corey Bryant

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCHv2 2/3] seccomp: adding command line support for blacklist
  2013-09-11 16:45   ` Corey Bryant
@ 2013-09-11 16:49     ` Daniel P. Berrange
  2013-09-17 13:01       ` Eduardo Otubo
  0 siblings, 1 reply; 25+ messages in thread
From: Daniel P. Berrange @ 2013-09-11 16:49 UTC (permalink / raw)
  To: Corey Bryant; +Cc: pmoore, qemu-devel, Eduardo Otubo

On Wed, Sep 11, 2013 at 12:45:54PM -0400, Corey Bryant wrote:
> 
> 
> On 09/06/2013 03:21 PM, Eduardo Otubo wrote:
> >New command line options for the seccomp blacklist feature:
> >
> >  $ qemu -sandbox on[,strict=<on|off>]
> >
> >The strict parameter will turn on or off the new system call blacklist
> 
> I mentioned this before but I'll say it again since I think it needs
> to be discussed.  Since this regresses support (it'll prevent -net
> bridge and -net tap from using execv) the concern I have with the
> strict=on|off option is whether or not we will have the flexibility
> to modify the blacklist once QEMU is released with this support.  Of
> course we should be able to add more syscalls to the blacklist as
> long as they don't regress QEMU functionality.  But if we want to
> add a syscall that does regress QEMU functionality, I think we'd
> have to add a new command line option, which doesn't seem desirable.
> 
> So a more flexible approach may be necessary.  Maybe the blacklist
> should be passed on the command line, which would enable it to be
> defined by libvirt and passed to QEMU.  I know Paul is working on
> something for libvirt so maybe that answers this question.

On the face of it, I'm not at all a fan of the idea of libvirt having
to pass a syscall whitelist/blacklist to QEMU. IMHO that would be
exposing too much knowledge of QEMU impl details to libvirt.

Daniel
-- 
|: http://berrange.com      -o-    http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org              -o-             http://virt-manager.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org       -o-       http://live.gnome.org/gtk-vnc :|

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCHv3 3/3] seccomp: general fixes
  2013-09-06 19:21 ` [Qemu-devel] [PATCHv3 3/3] seccomp: general fixes Eduardo Otubo
@ 2013-09-11 16:56   ` Corey Bryant
  2013-10-09  0:40     ` Eduardo Otubo
  0 siblings, 1 reply; 25+ messages in thread
From: Corey Bryant @ 2013-09-11 16:56 UTC (permalink / raw)
  To: Eduardo Otubo; +Cc: pmoore, qemu-devel



On 09/06/2013 03:21 PM, Eduardo Otubo wrote:
>   1) On qemu-seccomp.c:255, the variable ctx was being used
> uninitialized; now it's initialized with NULL and it's being checked at
> the end of the function.
>
>   2) Changed the name of the command line option from "enable" to
> "sandbox" for a better understanding from user side.
>
> Signed-off-by: Eduardo Otubo<otubo@linux.vnet.ibm.com>
> ---
>   qemu-seccomp.c | 5 +++--
>   vl.c           | 6 +++---
>   2 files changed, 6 insertions(+), 5 deletions(-)
>
> diff --git a/qemu-seccomp.c b/qemu-seccomp.c
> index 5e85eb5..f39d636 100644
> --- a/qemu-seccomp.c
> +++ b/qemu-seccomp.c
> @@ -252,7 +252,7 @@ seccomp_return:
>   int seccomp_start(int list_type)
>   {
>       int rc = 0;
> -    scmp_filter_ctx ctx;
> +    scmp_filter_ctx ctx = NULL;
>
>       switch (list_type) {
>       case WHITELIST:
> @@ -280,6 +280,7 @@ int seccomp_start(int list_type)
>       rc = seccomp_load(ctx);
>
>   seccomp_return:
> -    seccomp_release(ctx);
> +    if (!ctx)

You need to remove the ! from this check.

> +        seccomp_release(ctx);
>       return rc;
>   }
> diff --git a/vl.c b/vl.c
> index 909f685..129919d 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -323,11 +323,11 @@ static QemuOptsList qemu_rtc_opts = {
>
>   static QemuOptsList qemu_sandbox_opts = {
>       .name = "sandbox",
> -    .implied_opt_name = "enable",
> +    .implied_opt_name = "sandbox",

So does this technically make it -sandbox,sandbox=on?  If I understand 
correctly, I don't think the implied option is ever seen or used by the 
user anyway so it probably doesn't matter.  But I don't know if it's 
worth changing.

>       .head = QTAILQ_HEAD_INITIALIZER(qemu_sandbox_opts.head),
>       .desc = {
>           {
> -            .name = "enable",
> +            .name = "sandbox",
>               .type = QEMU_OPT_BOOL,
>           },{
>               .name = "strict",
> @@ -1036,7 +1036,7 @@ static int parse_sandbox(QemuOpts *opts, void *opaque)
>   {
>       const char * strict_value = NULL;
>       /* FIXME: change this to true for 1.3 */
> -    if (qemu_opt_get_bool(opts, "enable", false)) {
> +    if (qemu_opt_get_bool(opts, "sandbox", false)) {
>   #ifdef CONFIG_SECCOMP
>           if (seccomp_start(WHITELIST) < 0) {
>               qerror_report(ERROR_CLASS_GENERIC_ERROR,
> -- 1.8.3.1
>

-- 
Regards,
Corey Bryant

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCHv2 2/3] seccomp: adding command line support for blacklist
  2013-09-11 16:49     ` Daniel P. Berrange
@ 2013-09-17 13:01       ` Eduardo Otubo
  2013-09-17 13:06         ` Daniel P. Berrange
  0 siblings, 1 reply; 25+ messages in thread
From: Eduardo Otubo @ 2013-09-17 13:01 UTC (permalink / raw)
  To: Daniel P. Berrange; +Cc: pmoore, Corey Bryant, qemu-devel



On 09/11/2013 01:49 PM, Daniel P. Berrange wrote:
> On Wed, Sep 11, 2013 at 12:45:54PM -0400, Corey Bryant wrote:
>>
>>
>> On 09/06/2013 03:21 PM, Eduardo Otubo wrote:
>>> New command line options for the seccomp blacklist feature:
>>>
>>>   $ qemu -sandbox on[,strict=<on|off>]
>>>
>>> The strict parameter will turn on or off the new system call blacklist
>>
>> I mentioned this before but I'll say it again since I think it needs
>> to be discussed.  Since this regresses support (it'll prevent -net
>> bridge and -net tap from using execv) the concern I have with the
>> strict=on|off option is whether or not we will have the flexibility
>> to modify the blacklist once QEMU is released with this support.  Of
>> course we should be able to add more syscalls to the blacklist as
>> long as they don't regress QEMU functionality.  But if we want to
>> add a syscall that does regress QEMU functionality, I think we'd
>> have to add a new command line option, which doesn't seem desirable.
>>
>> So a more flexible approach may be necessary.  Maybe the blacklist
>> should be passed on the command line, which would enable it to be
>> defined by libvirt and passed to QEMU.  I know Paul is working on
>> something for libvirt so maybe that answers this question.

Paul, what exactly are you planning to add to libvirt? I'm not a big fan 
of using qemu command line to pass syscalls for blacklist as arguments, 
but I can't see other way to avoid problems (like -net bridge / -net 
tap) from happening.

>
> On the face of it, I'm not at all a fan of the idea of libvirt having
> to pass a syscall whitelist/blacklist to QEMU. IMHO that would be
> exposing too much knowledge of QEMU impl details to libvirt.
>
> Daniel
>

-- 
Eduardo Otubo
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCHv2 2/3] seccomp: adding command line support for blacklist
  2013-09-17 13:01       ` Eduardo Otubo
@ 2013-09-17 13:06         ` Daniel P. Berrange
  2013-09-17 14:43           ` Paul Moore
  0 siblings, 1 reply; 25+ messages in thread
From: Daniel P. Berrange @ 2013-09-17 13:06 UTC (permalink / raw)
  To: Eduardo Otubo; +Cc: pmoore, Corey Bryant, qemu-devel

On Tue, Sep 17, 2013 at 10:01:23AM -0300, Eduardo Otubo wrote:
> 
> 
> On 09/11/2013 01:49 PM, Daniel P. Berrange wrote:
> >On Wed, Sep 11, 2013 at 12:45:54PM -0400, Corey Bryant wrote:
> >>
> >>
> >>On 09/06/2013 03:21 PM, Eduardo Otubo wrote:
> >>>New command line options for the seccomp blacklist feature:
> >>>
> >>>  $ qemu -sandbox on[,strict=<on|off>]
> >>>
> >>>The strict parameter will turn on or off the new system call blacklist
> >>
> >>I mentioned this before but I'll say it again since I think it needs
> >>to be discussed.  Since this regresses support (it'll prevent -net
> >>bridge and -net tap from using execv) the concern I have with the
> >>strict=on|off option is whether or not we will have the flexibility
> >>to modify the blacklist once QEMU is released with this support.  Of
> >>course we should be able to add more syscalls to the blacklist as
> >>long as they don't regress QEMU functionality.  But if we want to
> >>add a syscall that does regress QEMU functionality, I think we'd
> >>have to add a new command line option, which doesn't seem desirable.
> >>
> >>So a more flexible approach may be necessary.  Maybe the blacklist
> >>should be passed on the command line, which would enable it to be
> >>defined by libvirt and passed to QEMU.  I know Paul is working on
> >>something for libvirt so maybe that answers this question.
> 
> Paul, what exactly are you planning to add to libvirt? I'm not a big
> fan of using qemu command line to pass syscalls for blacklist as
> arguments, but I can't see other way to avoid problems (like -net
> bridge / -net tap) from happening.

IMHO, if libvirt is enabling seccomp, then making all possible cli
args work is a non-goal. If there are things which require privileges
seccomp is blocking, then libvirt should avoid using them. eg by making
use of FD passing where appropriate to reduce privileges qemu needs.

Daniel
-- 
|: http://berrange.com      -o-    http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org              -o-             http://virt-manager.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org       -o-       http://live.gnome.org/gtk-vnc :|

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCHv2 2/3] seccomp: adding command line support for blacklist
  2013-09-17 13:06         ` Daniel P. Berrange
@ 2013-09-17 14:43           ` Paul Moore
  2013-09-17 17:14             ` Eduardo Otubo
  0 siblings, 1 reply; 25+ messages in thread
From: Paul Moore @ 2013-09-17 14:43 UTC (permalink / raw)
  To: Daniel P. Berrange, Eduardo Otubo; +Cc: Corey Bryant, qemu-devel

On Tuesday, September 17, 2013 02:06:06 PM Daniel P. Berrange wrote:
> On Tue, Sep 17, 2013 at 10:01:23AM -0300, Eduardo Otubo wrote:
>
> > Paul, what exactly are you planning to add to libvirt? I'm not a big
> > fan of using qemu command line to pass syscalls for blacklist as
> > arguments, but I can't see other way to avoid problems (like -net
> > bridge / -net tap) from happening.

At present, and as far as I'm concerned pretty much everything is open for 
discussion, the code works similar to the libvirt network filters.  You create 
a separate XML configuration file which defines the filter and you reference 
that filter from the domain's XML configuration.  When a QEMU/KVM or LXC based 
domain starts it uses libseccomp to create the seccomp filter and then loads 
it into the kernel after the fork but before the domain is exec'd.

There are no command line arguments passed to QEMU.  This work can co-exist 
with the QEMU seccomp filters without problem.

The original goal of this effort wasn't to add libvirt syscall filtering for 
QEMU, but rather for LXC; adding QEMU support just happened to be a trivial 
patch once the LXC support was added.

(I also apologize for the delays, I hit a snag with an existing problem on 
libvirt which stopped work and then some other BZs grabbed my attention...)

> IMHO, if libvirt is enabling seccomp, then making all possible cli
> args work is a non-goal. If there are things which require privileges
> seccomp is blocking, then libvirt should avoid using them. eg by making
> use of FD passing where appropriate to reduce privileges qemu needs.

I agree.

-- 
paul moore
security and virtualization @ redhat

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCHv2 2/3] seccomp: adding command line support for blacklist
  2013-09-17 14:43           ` Paul Moore
@ 2013-09-17 17:14             ` Eduardo Otubo
  2013-09-17 18:08               ` Eduardo Otubo
                                 ` (2 more replies)
  0 siblings, 3 replies; 25+ messages in thread
From: Eduardo Otubo @ 2013-09-17 17:14 UTC (permalink / raw)
  To: Paul Moore; +Cc: Corey Bryant, qemu-devel



On 09/17/2013 11:43 AM, Paul Moore wrote:
> On Tuesday, September 17, 2013 02:06:06 PM Daniel P. Berrange wrote:
>> On Tue, Sep 17, 2013 at 10:01:23AM -0300, Eduardo Otubo wrote:
>>
>>> Paul, what exactly are you planning to add to libvirt? I'm not a big
>>> fan of using qemu command line to pass syscalls for blacklist as
>>> arguments, but I can't see other way to avoid problems (like -net
>>> bridge / -net tap) from happening.
>
> At present, and as far as I'm concerned pretty much everything is open for
> discussion, the code works similar to the libvirt network filters.  You create
> a separate XML configuration file which defines the filter and you reference
> that filter from the domain's XML configuration.  When a QEMU/KVM or LXC based
> domain starts it uses libseccomp to create the seccomp filter and then loads
> it into the kernel after the fork but before the domain is exec'd.

Clever approach. I tihnk a possible way to do this is something like:

  -sandbox 
-on[,strict=<on|off>][,whitelist=qemu_whitelist.conf][,blacklist=qemu_blacklist.conf]

	where:

[,whitelist=qemu_whitelist.conf] will override default whitelist filter
[,blacklist=blacklist.conf] will override default blacklist filter

But when we add seccomp support for qemu on libvirt, we make sure to 
just add -sandbox off and use Paul's approach.

Is that a reasonable approach? What do you think?

>
> There are no command line arguments passed to QEMU.  This work can co-exist
> with the QEMU seccomp filters without problem.
>
> The original goal of this effort wasn't to add libvirt syscall filtering for
> QEMU, but rather for LXC; adding QEMU support just happened to be a trivial
> patch once the LXC support was added.
>
> (I also apologize for the delays, I hit a snag with an existing problem on
> libvirt which stopped work and then some other BZs grabbed my attention...)
>
>> IMHO, if libvirt is enabling seccomp, then making all possible cli
>> args work is a non-goal. If there are things which require privileges
>> seccomp is blocking, then libvirt should avoid using them. eg by making
>> use of FD passing where appropriate to reduce privileges qemu needs.
>
> I agree.
>

-- 
Eduardo Otubo
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCHv2 2/3] seccomp: adding command line support for blacklist
  2013-09-17 17:14             ` Eduardo Otubo
@ 2013-09-17 18:08               ` Eduardo Otubo
  2013-09-17 19:17               ` Corey Bryant
  2013-09-18  7:35               ` Daniel P. Berrange
  2 siblings, 0 replies; 25+ messages in thread
From: Eduardo Otubo @ 2013-09-17 18:08 UTC (permalink / raw)
  To: Paul Moore; +Cc: Corey Bryant, qemu-devel



On 09/17/2013 02:14 PM, Eduardo Otubo wrote:
>
>
> On 09/17/2013 11:43 AM, Paul Moore wrote:
>> On Tuesday, September 17, 2013 02:06:06 PM Daniel P. Berrange wrote:
>>> On Tue, Sep 17, 2013 at 10:01:23AM -0300, Eduardo Otubo wrote:
>>>
>>>> Paul, what exactly are you planning to add to libvirt? I'm not a big
>>>> fan of using qemu command line to pass syscalls for blacklist as
>>>> arguments, but I can't see other way to avoid problems (like -net
>>>> bridge / -net tap) from happening.
>>
>> At present, and as far as I'm concerned pretty much everything is open
>> for
>> discussion, the code works similar to the libvirt network filters.
>> You create
>> a separate XML configuration file which defines the filter and you
>> reference
>> that filter from the domain's XML configuration.  When a QEMU/KVM or
>> LXC based
>> domain starts it uses libseccomp to create the seccomp filter and then
>> loads
>> it into the kernel after the fork but before the domain is exec'd.
>
> Clever approach. I tihnk a possible way to do this is something like:
>
>   -sandbox
> -on[,strict=<on|off>][,whitelist=qemu_whitelist.conf][,blacklist=qemu_blacklist.conf]
>
>
>      where:
>
> [,whitelist=qemu_whitelist.conf] will override default whitelist filter
> [,blacklist=blacklist.conf] will override default blacklist filter
>
> But when we add seccomp support for qemu on libvirt, we make sure to
> just add -sandbox off and use Paul's approach.
>
> Is that a reasonable approach? What do you think?

This approach is also interesting from the test point of view. I'll be 
able to write more complex tests on virt-test. General tests like 
"remove one syscall at a time from whitelist and test" --without the 
need of sed'ing the code and recompiling every time, or even include new 
syscalls to the blacklist.

>
>>
>> There are no command line arguments passed to QEMU.  This work can
>> co-exist
>> with the QEMU seccomp filters without problem.
>>
>> The original goal of this effort wasn't to add libvirt syscall
>> filtering for
>> QEMU, but rather for LXC; adding QEMU support just happened to be a
>> trivial
>> patch once the LXC support was added.
>>
>> (I also apologize for the delays, I hit a snag with an existing
>> problem on
>> libvirt which stopped work and then some other BZs grabbed my
>> attention...)
>>
>>> IMHO, if libvirt is enabling seccomp, then making all possible cli
>>> args work is a non-goal. If there are things which require privileges
>>> seccomp is blocking, then libvirt should avoid using them. eg by making
>>> use of FD passing where appropriate to reduce privileges qemu needs.
>>
>> I agree.
>>
>

-- 
Eduardo Otubo
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCHv2 2/3] seccomp: adding command line support for blacklist
  2013-09-17 17:14             ` Eduardo Otubo
  2013-09-17 18:08               ` Eduardo Otubo
@ 2013-09-17 19:17               ` Corey Bryant
  2013-09-17 20:16                 ` Eduardo Otubo
  2013-09-18  7:38                 ` Daniel P. Berrange
  2013-09-18  7:35               ` Daniel P. Berrange
  2 siblings, 2 replies; 25+ messages in thread
From: Corey Bryant @ 2013-09-17 19:17 UTC (permalink / raw)
  To: Eduardo Otubo; +Cc: Paul Moore, qemu-devel



On 09/17/2013 01:14 PM, Eduardo Otubo wrote:
>
>
> On 09/17/2013 11:43 AM, Paul Moore wrote:
>> On Tuesday, September 17, 2013 02:06:06 PM Daniel P. Berrange wrote:
>>> On Tue, Sep 17, 2013 at 10:01:23AM -0300, Eduardo Otubo wrote:
>>>
>>>> Paul, what exactly are you planning to add to libvirt? I'm not a big
>>>> fan of using qemu command line to pass syscalls for blacklist as
>>>> arguments, but I can't see other way to avoid problems (like -net
>>>> bridge / -net tap) from happening.
>>
>> At present, and as far as I'm concerned pretty much everything is open
>> for
>> discussion, the code works similar to the libvirt network filters.
>> You create
>> a separate XML configuration file which defines the filter and you
>> reference
>> that filter from the domain's XML configuration.  When a QEMU/KVM or
>> LXC based
>> domain starts it uses libseccomp to create the seccomp filter and then
>> loads
>> it into the kernel after the fork but before the domain is exec'd.
>
> Clever approach. I tihnk a possible way to do this is something like:
>
>   -sandbox
> -on[,strict=<on|off>][,whitelist=qemu_whitelist.conf][,blacklist=qemu_blacklist.conf]
>
>
>      where:
>
> [,whitelist=qemu_whitelist.conf] will override default whitelist filter
> [,blacklist=blacklist.conf] will override default blacklist filter
>
> But when we add seccomp support for qemu on libvirt, we make sure to
> just add -sandbox off and use Paul's approach.
>
> Is that a reasonable approach? What do you think?
>

QEMU wouldn't require any changes for the approach Paul describes. The 
QEMU process that is exec'd by libvirt would be constrained by the 
filter that libvirt installed.

-- 
Regards,
Corey Bryant

>>
>> There are no command line arguments passed to QEMU.  This work can
>> co-exist
>> with the QEMU seccomp filters without problem.
>>
>> The original goal of this effort wasn't to add libvirt syscall
>> filtering for
>> QEMU, but rather for LXC; adding QEMU support just happened to be a
>> trivial
>> patch once the LXC support was added.
>>
>> (I also apologize for the delays, I hit a snag with an existing
>> problem on
>> libvirt which stopped work and then some other BZs grabbed my
>> attention...)
>>
>>> IMHO, if libvirt is enabling seccomp, then making all possible cli
>>> args work is a non-goal. If there are things which require privileges
>>> seccomp is blocking, then libvirt should avoid using them. eg by making
>>> use of FD passing where appropriate to reduce privileges qemu needs.
>>
>> I agree.
>>
>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCHv2 2/3] seccomp: adding command line support for blacklist
  2013-09-17 19:17               ` Corey Bryant
@ 2013-09-17 20:16                 ` Eduardo Otubo
  2013-09-18  7:38                 ` Daniel P. Berrange
  1 sibling, 0 replies; 25+ messages in thread
From: Eduardo Otubo @ 2013-09-17 20:16 UTC (permalink / raw)
  To: Corey Bryant; +Cc: Paul Moore, qemu-devel



On 09/17/2013 04:17 PM, Corey Bryant wrote:
>
>
> On 09/17/2013 01:14 PM, Eduardo Otubo wrote:
>>
>>
>> On 09/17/2013 11:43 AM, Paul Moore wrote:
>>> On Tuesday, September 17, 2013 02:06:06 PM Daniel P. Berrange wrote:
>>>> On Tue, Sep 17, 2013 at 10:01:23AM -0300, Eduardo Otubo wrote:
>>>>
>>>>> Paul, what exactly are you planning to add to libvirt? I'm not a big
>>>>> fan of using qemu command line to pass syscalls for blacklist as
>>>>> arguments, but I can't see other way to avoid problems (like -net
>>>>> bridge / -net tap) from happening.
>>>
>>> At present, and as far as I'm concerned pretty much everything is open
>>> for
>>> discussion, the code works similar to the libvirt network filters.
>>> You create
>>> a separate XML configuration file which defines the filter and you
>>> reference
>>> that filter from the domain's XML configuration.  When a QEMU/KVM or
>>> LXC based
>>> domain starts it uses libseccomp to create the seccomp filter and then
>>> loads
>>> it into the kernel after the fork but before the domain is exec'd.
>>
>> Clever approach. I tihnk a possible way to do this is something like:
>>
>>   -sandbox
>> -on[,strict=<on|off>][,whitelist=qemu_whitelist.conf][,blacklist=qemu_blacklist.conf]
>>
>>
>>
>>      where:
>>
>> [,whitelist=qemu_whitelist.conf] will override default whitelist filter
>> [,blacklist=blacklist.conf] will override default blacklist filter
>>
>> But when we add seccomp support for qemu on libvirt, we make sure to
>> just add -sandbox off and use Paul's approach.
>>
>> Is that a reasonable approach? What do you think?
>>
>
> QEMU wouldn't require any changes for the approach Paul describes. The
> QEMU process that is exec'd by libvirt would be constrained by the
> filter that libvirt installed.
>

Yes, that is correct. But I'm thinking about the case when Qemu is run 
stand-alone, without libvirt. There must be a way to configure it 
without using a pre configured filter from libvirt.

-- 
Eduardo Otubo
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCHv2 2/3] seccomp: adding command line support for blacklist
  2013-09-17 17:14             ` Eduardo Otubo
  2013-09-17 18:08               ` Eduardo Otubo
  2013-09-17 19:17               ` Corey Bryant
@ 2013-09-18  7:35               ` Daniel P. Berrange
  2 siblings, 0 replies; 25+ messages in thread
From: Daniel P. Berrange @ 2013-09-18  7:35 UTC (permalink / raw)
  To: Eduardo Otubo; +Cc: Paul Moore, Corey Bryant, qemu-devel

On Tue, Sep 17, 2013 at 02:14:25PM -0300, Eduardo Otubo wrote:
> 
> 
> On 09/17/2013 11:43 AM, Paul Moore wrote:
> >On Tuesday, September 17, 2013 02:06:06 PM Daniel P. Berrange wrote:
> >>On Tue, Sep 17, 2013 at 10:01:23AM -0300, Eduardo Otubo wrote:
> >>
> >>>Paul, what exactly are you planning to add to libvirt? I'm not a big
> >>>fan of using qemu command line to pass syscalls for blacklist as
> >>>arguments, but I can't see other way to avoid problems (like -net
> >>>bridge / -net tap) from happening.
> >
> >At present, and as far as I'm concerned pretty much everything is open for
> >discussion, the code works similar to the libvirt network filters.  You create
> >a separate XML configuration file which defines the filter and you reference
> >that filter from the domain's XML configuration.  When a QEMU/KVM or LXC based
> >domain starts it uses libseccomp to create the seccomp filter and then loads
> >it into the kernel after the fork but before the domain is exec'd.
> 
> Clever approach. I tihnk a possible way to do this is something like:
> 
>  -sandbox -on[,strict=<on|off>][,whitelist=qemu_whitelist.conf][,blacklist=qemu_blacklist.conf]
> 
> 	where:
> 
> [,whitelist=qemu_whitelist.conf] will override default whitelist filter
> [,blacklist=blacklist.conf] will override default blacklist filter
> 
> But when we add seccomp support for qemu on libvirt, we make sure to
> just add -sandbox off and use Paul's approach.
> 
> Is that a reasonable approach? What do you think?

IMHO the same problem exists for non-libvirt apps using QEMU. Exposing
lists of syscalls as a config option requires applications using QEMU
to know far too much about QEMU's internal implementation details. With
this syntax either apps have to read the source to find out which syscalls
to allow, or they have to use trial & error launching QEMU repeatedly
to see what breaks. Neither of these are nice to applications. IMHO any
configuration of syscalls lists should be exclusively QEMU's responsibility.

What is your actual goal here ? If the goal is to make it possible to
use arbitrary command line arguments, then IMHO, QEMU should just look
at the args given and automatically just "do the right thing" with the
syscall whitelists. Of course per my previous message, I think making
all possible args work under seccomp should be a non-goal.

> >There are no command line arguments passed to QEMU.  This work can co-exist
> >with the QEMU seccomp filters without problem.
> >
> >The original goal of this effort wasn't to add libvirt syscall filtering for
> >QEMU, but rather for LXC; adding QEMU support just happened to be a trivial
> >patch once the LXC support was added.
> >
> >(I also apologize for the delays, I hit a snag with an existing problem on
> >libvirt which stopped work and then some other BZs grabbed my attention...)
> >
> >>IMHO, if libvirt is enabling seccomp, then making all possible cli
> >>args work is a non-goal. If there are things which require privileges
> >>seccomp is blocking, then libvirt should avoid using them. eg by making
> >>use of FD passing where appropriate to reduce privileges qemu needs.
> >
> >I agree.


Daniel
-- 
|: http://berrange.com      -o-    http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org              -o-             http://virt-manager.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org       -o-       http://live.gnome.org/gtk-vnc :|

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCHv2 2/3] seccomp: adding command line support for blacklist
  2013-09-17 19:17               ` Corey Bryant
  2013-09-17 20:16                 ` Eduardo Otubo
@ 2013-09-18  7:38                 ` Daniel P. Berrange
  2013-09-18 15:53                   ` Paul Moore
  1 sibling, 1 reply; 25+ messages in thread
From: Daniel P. Berrange @ 2013-09-18  7:38 UTC (permalink / raw)
  To: Corey Bryant; +Cc: Paul Moore, qemu-devel, Eduardo Otubo

On Tue, Sep 17, 2013 at 03:17:28PM -0400, Corey Bryant wrote:
> 
> 
> On 09/17/2013 01:14 PM, Eduardo Otubo wrote:
> >
> >
> >On 09/17/2013 11:43 AM, Paul Moore wrote:
> >>On Tuesday, September 17, 2013 02:06:06 PM Daniel P. Berrange wrote:
> >>>On Tue, Sep 17, 2013 at 10:01:23AM -0300, Eduardo Otubo wrote:
> >>>
> >>>>Paul, what exactly are you planning to add to libvirt? I'm not a big
> >>>>fan of using qemu command line to pass syscalls for blacklist as
> >>>>arguments, but I can't see other way to avoid problems (like -net
> >>>>bridge / -net tap) from happening.
> >>
> >>At present, and as far as I'm concerned pretty much everything is open
> >>for
> >>discussion, the code works similar to the libvirt network filters.
> >>You create
> >>a separate XML configuration file which defines the filter and you
> >>reference
> >>that filter from the domain's XML configuration.  When a QEMU/KVM or
> >>LXC based
> >>domain starts it uses libseccomp to create the seccomp filter and then
> >>loads
> >>it into the kernel after the fork but before the domain is exec'd.
> >
> >Clever approach. I tihnk a possible way to do this is something like:
> >
> >  -sandbox
> >-on[,strict=<on|off>][,whitelist=qemu_whitelist.conf][,blacklist=qemu_blacklist.conf]
> >
> >
> >     where:
> >
> >[,whitelist=qemu_whitelist.conf] will override default whitelist filter
> >[,blacklist=blacklist.conf] will override default blacklist filter
> >
> >But when we add seccomp support for qemu on libvirt, we make sure to
> >just add -sandbox off and use Paul's approach.
> >
> >Is that a reasonable approach? What do you think?
> >
> 
> QEMU wouldn't require any changes for the approach Paul describes.
> The QEMU process that is exec'd by libvirt would be constrained by
> the filter that libvirt installed.

Libvirt does not want to be in the business of creating seccomp syscall
filters for QEMU. As mentioned before, IMHO that places an unacceptable
burden on libvirt to know about the syscalls each a particular version
of QEMU requires for its operation.

Daniel
-- 
|: http://berrange.com      -o-    http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org              -o-             http://virt-manager.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org       -o-       http://live.gnome.org/gtk-vnc :|

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCHv2 2/3] seccomp: adding command line support for blacklist
  2013-09-18  7:38                 ` Daniel P. Berrange
@ 2013-09-18 15:53                   ` Paul Moore
  2013-09-18 15:59                     ` Daniel P. Berrange
  0 siblings, 1 reply; 25+ messages in thread
From: Paul Moore @ 2013-09-18 15:53 UTC (permalink / raw)
  To: Daniel P. Berrange; +Cc: Corey Bryant, qemu-devel, Eduardo Otubo

On Wednesday, September 18, 2013 08:38:17 AM Daniel P. Berrange wrote:
> Libvirt does not want to be in the business of creating seccomp syscall
> filters for QEMU. As mentioned before, IMHO that places an unacceptable
> burden on libvirt to know about the syscalls each a particular version
> of QEMU requires for its operation.

At a high level, I don't see how libvirt configuring and installing a syscall 
filter is substantially different from libvirt configuring and installing a 
network filter.

Also, and I recognize this is diverting away from a topic most of qemu-devel 
is not interested in, what about libvirt-lxc?  What about all of the other 
virtualization drivers supported by libvirt (granted, not all would be 
candidates for syscall filtering, but you get the idea).

-- 
paul moore
security and virtualization @ redhat

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCHv2 2/3] seccomp: adding command line support for blacklist
  2013-09-18 15:53                   ` Paul Moore
@ 2013-09-18 15:59                     ` Daniel P. Berrange
  2013-09-18 16:19                       ` Paul Moore
  0 siblings, 1 reply; 25+ messages in thread
From: Daniel P. Berrange @ 2013-09-18 15:59 UTC (permalink / raw)
  To: Paul Moore; +Cc: Corey Bryant, qemu-devel, Eduardo Otubo

On Wed, Sep 18, 2013 at 11:53:09AM -0400, Paul Moore wrote:
> On Wednesday, September 18, 2013 08:38:17 AM Daniel P. Berrange wrote:
> > Libvirt does not want to be in the business of creating seccomp syscall
> > filters for QEMU. As mentioned before, IMHO that places an unacceptable
> > burden on libvirt to know about the syscalls each a particular version
> > of QEMU requires for its operation.
> 
> At a high level, I don't see how libvirt configuring and installing a syscall 
> filter is substantially different from libvirt configuring and installing a 
> network filter.

The rules created for a network filter have no bearing or relation to
internal QEMU implementation details, as you have with syscalls, so
this isn't really a relevant comparison.

> Also, and I recognize this is diverting away from a topic most of qemu-devel 
> is not interested in, what about libvirt-lxc?  What about all of the other 
> virtualization drivers supported by libvirt (granted, not all would be 
> candidates for syscall filtering, but you get the idea).

It isn't clear to me that syscall filtering is something that's relevant
for inclusion in libvirt-lxc. It seems like something that would be used
by apps running inside LXC containers directly. Libvirt has no knowledge
of such apps or what rules they might require, so can't make any kind of
intelligent decision about syscall filtering for LXC. I really view
seccomp as something that apps use directly themselves, not something
that a 3rd party process applies prior to launching the apps, since the
latter has far too much administrative burden IMHO.

Daniel
-- 
|: http://berrange.com      -o-    http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org              -o-             http://virt-manager.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org       -o-       http://live.gnome.org/gtk-vnc :|

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCHv2 2/3] seccomp: adding command line support for blacklist
  2013-09-18 15:59                     ` Daniel P. Berrange
@ 2013-09-18 16:19                       ` Paul Moore
  2013-09-18 16:32                         ` Daniel P. Berrange
  0 siblings, 1 reply; 25+ messages in thread
From: Paul Moore @ 2013-09-18 16:19 UTC (permalink / raw)
  To: Daniel P. Berrange; +Cc: Corey Bryant, qemu-devel, Eduardo Otubo

On Wednesday, September 18, 2013 04:59:10 PM Daniel P. Berrange wrote:
> On Wed, Sep 18, 2013 at 11:53:09AM -0400, Paul Moore wrote:
> > On Wednesday, September 18, 2013 08:38:17 AM Daniel P. Berrange wrote:
> > > Libvirt does not want to be in the business of creating seccomp syscall
> > > filters for QEMU. As mentioned before, IMHO that places an unacceptable
> > > burden on libvirt to know about the syscalls each a particular version
> > > of QEMU requires for its operation.
> > 
> > At a high level, I don't see how libvirt configuring and installing a
> > syscall filter is substantially different from libvirt configuring and
> > installing a network filter.
> 
> The rules created for a network filter have no bearing or relation to
> internal QEMU implementation details, as you have with syscalls, so
> this isn't really a relevant comparison.

The rules created for a network filter are directly related to the details of 
the guest running inside of QEMU.  From a practical point of view I see both 
network and syscall filtering as being dependent on the guest; the network 
filtering configuration can change as the guest's services change, the syscall 
filtering configuration can change as the QEMU functionality can change.

> > Also, and I recognize this is diverting away from a topic most of
> > qemu-devel is not interested in, what about libvirt-lxc?  What about all
> > of the other virtualization drivers supported by libvirt (granted, not
> > all would be candidates for syscall filtering, but you get the idea).
> 
> It isn't clear to me that syscall filtering is something that's relevant
> for inclusion in libvirt-lxc. It seems like something that would be used
> by apps running inside LXC containers directly.

For all the same reasons that it makes sense to filter syscalls in QEMU, I 
think it makes sense to filter syscalls in libvirt-lxc.  The fundamental 
concern is that the kernel presents are large attack surface in the way of 
syscalls, and it is extremely likely that any given container does not have a 
legitimate need to call into all of the syscalls the kernel presents to 
userspace; especially if you consider the recent approaches of using 
containers to ship/deploy single applications.

Also, just in case there are some misconceptions floating around, loading a 
syscall filter in libvirt doesn't mean the individual container applications 
can't also load their own filter.  When multiple syscall filters are present 
for a given process, all of the filters are evaluated and the most restrictive 
decision for a given syscall request "wins".

> Libvirt has no knowledge of such apps or what rules they might require, so
> can't make any kind of intelligent decision about syscall filtering for LXC.

A perfectly valid point, but I also think of syscall filtering as allowing the 
host administrator the ability to reduce the attack surface of the host 
system/kernel from potentially malicious containers/applications without 
having to rely on these containers/applications to police themselves.

> I really view seccomp as something that apps use directly themselves, not
> something that a 3rd party process applies prior to launching the apps,
> since the latter has far too much administrative burden IMHO.

The seccomp filter functionality is definitely something that apps can use 
themselves, but to limit syscall filtering to just that use case is to miss 
out on other valid uses as well.  As far as the burden is concerned, is 
users/administrators find it too difficult, there is nothing requiring them to 
use it, however, for those who are facing serious security risks in their 
deployments providing syscall filtering in libvirt might be a very welcome 
addition.

-- 
paul moore
security and virtualization @ redhat

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCHv2 2/3] seccomp: adding command line support for blacklist
  2013-09-18 16:19                       ` Paul Moore
@ 2013-09-18 16:32                         ` Daniel P. Berrange
  2013-09-18 17:24                           ` Corey Bryant
  2013-09-18 17:37                           ` Paul Moore
  0 siblings, 2 replies; 25+ messages in thread
From: Daniel P. Berrange @ 2013-09-18 16:32 UTC (permalink / raw)
  To: Paul Moore; +Cc: Corey Bryant, qemu-devel, Eduardo Otubo

On Wed, Sep 18, 2013 at 12:19:44PM -0400, Paul Moore wrote:
> On Wednesday, September 18, 2013 04:59:10 PM Daniel P. Berrange wrote:
> > On Wed, Sep 18, 2013 at 11:53:09AM -0400, Paul Moore wrote:
> > > On Wednesday, September 18, 2013 08:38:17 AM Daniel P. Berrange wrote:
> > > > Libvirt does not want to be in the business of creating seccomp syscall
> > > > filters for QEMU. As mentioned before, IMHO that places an unacceptable
> > > > burden on libvirt to know about the syscalls each a particular version
> > > > of QEMU requires for its operation.
> > > 
> > > At a high level, I don't see how libvirt configuring and installing a
> > > syscall filter is substantially different from libvirt configuring and
> > > installing a network filter.
> > 
> > The rules created for a network filter have no bearing or relation to
> > internal QEMU implementation details, as you have with syscalls, so
> > this isn't really a relevant comparison.
> 
> The rules created for a network filter are directly related to the details of 
> the guest running inside of QEMU.  From a practical point of view I see both 
> network and syscall filtering as being dependent on the guest; the network 
> filtering configuration can change as the guest's services change, the syscall 
> filtering configuration can change as the QEMU functionality can change.

You're talking about two very different things here. Seccomp syscall
filtering affects QEMU itself, while network filter affects the guest
OS apps inside QEMU. Network filtering still does not depend on the
implementation details of the guest OS apps - it depends on the services
that those apps are using. Thus configuring network filters does not
require the admin to have knowledge of the apps internal impl details
in the way that seccomp does.

> > > Also, and I recognize this is diverting away from a topic most of
> > > qemu-devel is not interested in, what about libvirt-lxc?  What about all
> > > of the other virtualization drivers supported by libvirt (granted, not
> > > all would be candidates for syscall filtering, but you get the idea).
> > 
> > It isn't clear to me that syscall filtering is something that's relevant
> > for inclusion in libvirt-lxc. It seems like something that would be used
> > by apps running inside LXC containers directly.
> 
> For all the same reasons that it makes sense to filter syscalls in QEMU, I 
> think it makes sense to filter syscalls in libvirt-lxc.  The fundamental 
> concern is that the kernel presents are large attack surface in the way of 
> syscalls, and it is extremely likely that any given container does not have a 
> legitimate need to call into all of the syscalls the kernel presents to 
> userspace; especially if you consider the recent approaches of using 
> containers to ship/deploy single applications.
>
> Also, just in case there are some misconceptions floating around, loading a 
> syscall filter in libvirt doesn't mean the individual container applications 
> can't also load their own filter.  When multiple syscall filters are present 
> for a given process, all of the filters are evaluated and the most restrictive 
> decision for a given syscall request "wins".
> 
> > Libvirt has no knowledge of such apps or what rules they might require, so
> > can't make any kind of intelligent decision about syscall filtering for LXC.
> 
> A perfectly valid point, but I also think of syscall filtering as allowing the 
> host administrator the ability to reduce the attack surface of the host 
> system/kernel from potentially malicious containers/applications without 
> having to rely on these containers/applications to police themselves.
> 
> > I really view seccomp as something that apps use directly themselves, not
> > something that a 3rd party process applies prior to launching the apps,
> > since the latter has far too much administrative burden IMHO.
> 
> The seccomp filter functionality is definitely something that apps can use 
> themselves, but to limit syscall filtering to just that use case is to miss 
> out on other valid uses as well.  As far as the burden is concerned, is 
> users/administrators find it too difficult, there is nothing requiring them to 
> use it, however, for those who are facing serious security risks in their 
> deployments providing syscall filtering in libvirt might be a very welcome 
> addition.

I'm not debating the usefulness of secomp technology, I just really don't
see it as something that is practical or sensible to encourage end users/
admins to make use of. It is hard enough for app developers themselves to
make use of it properly and they have a tonne of domain knowledge about
the internals of their application implementation. When you have uninformed
users/admins using it by trial and error I just see a support disaster
coming straight at us. That small minority who  really are skilful enough
to use it can still do so by launching the app in question via a 'runseccomp'
like too which would just install a filter & then exec the real binary.

Regards,
Daniel
-- 
|: http://berrange.com      -o-    http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org              -o-             http://virt-manager.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org       -o-       http://live.gnome.org/gtk-vnc :|

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCHv2 2/3] seccomp: adding command line support for blacklist
  2013-09-18 16:32                         ` Daniel P. Berrange
@ 2013-09-18 17:24                           ` Corey Bryant
  2013-09-18 17:37                           ` Paul Moore
  1 sibling, 0 replies; 25+ messages in thread
From: Corey Bryant @ 2013-09-18 17:24 UTC (permalink / raw)
  To: Daniel P. Berrange; +Cc: Paul Moore, qemu-devel, Eduardo Otubo



On 09/18/2013 12:32 PM, Daniel P. Berrange wrote:
> On Wed, Sep 18, 2013 at 12:19:44PM -0400, Paul Moore wrote:
>> On Wednesday, September 18, 2013 04:59:10 PM Daniel P. Berrange wrote:
>>> On Wed, Sep 18, 2013 at 11:53:09AM -0400, Paul Moore wrote:
>>>> On Wednesday, September 18, 2013 08:38:17 AM Daniel P. Berrange wrote:
>>>>> Libvirt does not want to be in the business of creating seccomp syscall
>>>>> filters for QEMU. As mentioned before, IMHO that places an unacceptable
>>>>> burden on libvirt to know about the syscalls each a particular version
>>>>> of QEMU requires for its operation.
>>>>
>>>> At a high level, I don't see how libvirt configuring and installing a
>>>> syscall filter is substantially different from libvirt configuring and
>>>> installing a network filter.
>>>
>>> The rules created for a network filter have no bearing or relation to
>>> internal QEMU implementation details, as you have with syscalls, so
>>> this isn't really a relevant comparison.
>>
>> The rules created for a network filter are directly related to the details of
>> the guest running inside of QEMU.  From a practical point of view I see both
>> network and syscall filtering as being dependent on the guest; the network
>> filtering configuration can change as the guest's services change, the syscall
>> filtering configuration can change as the QEMU functionality can change.
>
> You're talking about two very different things here. Seccomp syscall
> filtering affects QEMU itself, while network filter affects the guest
> OS apps inside QEMU. Network filtering still does not depend on the
> implementation details of the guest OS apps - it depends on the services
> that those apps are using. Thus configuring network filters does not
> require the admin to have knowledge of the apps internal impl details
> in the way that seccomp does.
>
>>>> Also, and I recognize this is diverting away from a topic most of
>>>> qemu-devel is not interested in, what about libvirt-lxc?  What about all
>>>> of the other virtualization drivers supported by libvirt (granted, not
>>>> all would be candidates for syscall filtering, but you get the idea).
>>>
>>> It isn't clear to me that syscall filtering is something that's relevant
>>> for inclusion in libvirt-lxc. It seems like something that would be used
>>> by apps running inside LXC containers directly.
>>
>> For all the same reasons that it makes sense to filter syscalls in QEMU, I
>> think it makes sense to filter syscalls in libvirt-lxc.  The fundamental
>> concern is that the kernel presents are large attack surface in the way of
>> syscalls, and it is extremely likely that any given container does not have a
>> legitimate need to call into all of the syscalls the kernel presents to
>> userspace; especially if you consider the recent approaches of using
>> containers to ship/deploy single applications.
>>
>> Also, just in case there are some misconceptions floating around, loading a
>> syscall filter in libvirt doesn't mean the individual container applications
>> can't also load their own filter.  When multiple syscall filters are present
>> for a given process, all of the filters are evaluated and the most restrictive
>> decision for a given syscall request "wins".
>>
>>> Libvirt has no knowledge of such apps or what rules they might require, so
>>> can't make any kind of intelligent decision about syscall filtering for LXC.
>>
>> A perfectly valid point, but I also think of syscall filtering as allowing the
>> host administrator the ability to reduce the attack surface of the host
>> system/kernel from potentially malicious containers/applications without
>> having to rely on these containers/applications to police themselves.
>>
>>> I really view seccomp as something that apps use directly themselves, not
>>> something that a 3rd party process applies prior to launching the apps,
>>> since the latter has far too much administrative burden IMHO.
>>
>> The seccomp filter functionality is definitely something that apps can use
>> themselves, but to limit syscall filtering to just that use case is to miss
>> out on other valid uses as well.  As far as the burden is concerned, is
>> users/administrators find it too difficult, there is nothing requiring them to
>> use it, however, for those who are facing serious security risks in their
>> deployments providing syscall filtering in libvirt might be a very welcome
>> addition.
>
> I'm not debating the usefulness of secomp technology, I just really don't
> see it as something that is practical or sensible to encourage end users/
> admins to make use of. It is hard enough for app developers themselves to
> make use of it properly and they have a tonne of domain knowledge about
> the internals of their application implementation. When you have uninformed
> users/admins using it by trial and error I just see a support disaster
> coming straight at us. That small minority who  really are skilful enough
> to use it can still do so by launching the app in question via a 'runseccomp'
> like too which would just install a filter & then exec the real binary.
>
> Regards,
> Daniel
>

An added benefit of allowing an admin to configure a seccomp filter is 
that they could potentially "patch" a vulnerability before an actual fix 
is available, by blacklisting a syscall or a syscall argument.

-- 
Regards,
Corey Bryant

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCHv2 2/3] seccomp: adding command line support for blacklist
  2013-09-18 16:32                         ` Daniel P. Berrange
  2013-09-18 17:24                           ` Corey Bryant
@ 2013-09-18 17:37                           ` Paul Moore
  1 sibling, 0 replies; 25+ messages in thread
From: Paul Moore @ 2013-09-18 17:37 UTC (permalink / raw)
  To: Daniel P. Berrange; +Cc: Corey Bryant, qemu-devel, Eduardo Otubo

On Wednesday, September 18, 2013 05:32:17 PM Daniel P. Berrange wrote:
> On Wed, Sep 18, 2013 at 12:19:44PM -0400, Paul Moore wrote:
> > On Wednesday, September 18, 2013 04:59:10 PM Daniel P. Berrange wrote:
> > > On Wed, Sep 18, 2013 at 11:53:09AM -0400, Paul Moore wrote:
> > > > On Wednesday, September 18, 2013 08:38:17 AM Daniel P. Berrange wrote:
> > > > > Libvirt does not want to be in the business of creating seccomp
> > > > > syscall filters for QEMU. As mentioned before, IMHO that places an
> > > > > unacceptable burden on libvirt to know about the syscalls each a
> > > > > particular version of QEMU requires for its operation.
> > > > 
> > > > At a high level, I don't see how libvirt configuring and installing a
> > > > syscall filter is substantially different from libvirt configuring and
> > > > installing a network filter.
> > > 
> > > The rules created for a network filter have no bearing or relation to
> > > internal QEMU implementation details, as you have with syscalls, so
> > > this isn't really a relevant comparison.
> > 
> > The rules created for a network filter are directly related to the details
> > of the guest running inside of QEMU.  From a practical point of view I
> > see both network and syscall filtering as being dependent on the guest;
> > the network filtering configuration can change as the guest's services
> > change, the syscall filtering configuration can change as the QEMU
> > functionality can change.
>
> You're talking about two very different things here. Seccomp syscall
> filtering affects QEMU itself while network filter affects the guest
> OS apps inside QEMU.

>From a security standpoint I'm not entirely convinced the distinction is 
important.

> Network filtering still does not depend on the implementation details of the
> guest OS apps - it depends on the services that those apps are using.

Once again, I'm not entirely sure that worrying about the distinction between 
guest apps/services is important - it is just the "guest".

> Thus configuring network filters does not require the admin to have
> knowledge of the apps internal impl details in the way that seccomp does.

Network filters require the admin to have knowledge of what apps/services the 
guest is providing.  Syscall filters require the admin have knowledge of what 
version of QEMU is deployed on the host.  I think it is reasonable to expect 
that the admin has more knowledge, and more control, over the QEMU version 
they are using than they do over what is being run in the hosted guests.

I don't argue that arriving at the correct syscall filter configuration is 
more difficult than a network filter, but I don't see what that means we can't 
offer it as an option for the more savy admins.  Also, the libvirt patches I'm 
currently working on allow the syscall filter to be defined either as a 
whitelist or a blacklist; the blacklist approach should provide a much more 
gradual learning curve ... and in the case of containers, I suspect it might 
also be the better solution.

> > > > Also, and I recognize this is diverting away from a topic most of
> > > > qemu-devel is not interested in, what about libvirt-lxc?  What about
> > > > all of the other virtualization drivers supported by libvirt (granted,
> > > > not all would be candidates for syscall filtering, but you get the
> > > > idea).
> > > 
> > > It isn't clear to me that syscall filtering is something that's relevant
> > > for inclusion in libvirt-lxc. It seems like something that would be used
> > > by apps running inside LXC containers directly.
> > 
> > For all the same reasons that it makes sense to filter syscalls in QEMU, I
> > think it makes sense to filter syscalls in libvirt-lxc.  The fundamental
> > concern is that the kernel presents are large attack surface in the way of
> > syscalls, and it is extremely likely that any given container does not
> > have a legitimate need to call into all of the syscalls the kernel
> > presents to userspace; especially if you consider the recent approaches
> > of using containers to ship/deploy single applications.
> > 
> > Also, just in case there are some misconceptions floating around, loading
> > a syscall filter in libvirt doesn't mean the individual container
> > applications can't also load their own filter.  When multiple syscall
> > filters are present for a given process, all of the filters are evaluated
> > and the most restrictive decision for a given syscall request "wins".
> > 
> > > Libvirt has no knowledge of such apps or what rules they might require,
> > > so can't make any kind of intelligent decision about syscall filtering
> > > for LXC.
> >
> > A perfectly valid point, but I also think of syscall filtering as allowing
> > the host administrator the ability to reduce the attack surface of the
> > host system/kernel from potentially malicious containers/applications
> > without having to rely on these containers/applications to police
> > themselves.
> > 
> > > I really view seccomp as something that apps use directly themselves,
> > > not something that a 3rd party process applies prior to launching the
> > > apps, since the latter has far too much administrative burden IMHO.
> > 
> > The seccomp filter functionality is definitely something that apps can use
> > themselves, but to limit syscall filtering to just that use case is to
> > miss out on other valid uses as well.  As far as the burden is concerned,
> > is users/administrators find it too difficult, there is nothing requiring
> > them to use it, however, for those who are facing serious security risks
> > in their deployments providing syscall filtering in libvirt might be a
> > very welcome addition.
> 
> I'm not debating the usefulness of secomp technology, I just really don't
> see it as something that is practical or sensible to encourage end users/
> admins to make use of.

I think providing support in libvirt and encouraging every libvirt user to 
make use of it are two different things.

> It is hard enough for app developers themselves to make use of it properly
> and they have a tonne of domain knowledge about the internals of their
> application implementation. When you have uninformed users/admins using it
> by trial and error I just see a support disaster coming straight at us.

Once again, I'm not sure adding support for the functionality means we have to 
advocate that every user enable it on their systems.  I would recommend that 
we document the functionality well, like the rest of libvirt, and make sure 
the documentation is very clear about the care needed in crafting the filters, 
perhaps even documenting some troubleshooting/discovery approaches as well as 
the use of blacklists over whitelists as a starting point.

> That small minority who really are skilful enough to use it can still do so
> by launching the app in question via a 'runseccomp' like too which would
> just install a filter & then exec the real binary.

Does libvirt currently have a mechanism to execute such a helper program 
before a container is exec'd?  What about QEMU and the rest of the libvirt 
supported virt drivers?

-- 
paul moore
security and virtualization @ redhat

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Qemu-devel] [PATCHv3 3/3] seccomp: general fixes
  2013-09-11 16:56   ` Corey Bryant
@ 2013-10-09  0:40     ` Eduardo Otubo
  0 siblings, 0 replies; 25+ messages in thread
From: Eduardo Otubo @ 2013-10-09  0:40 UTC (permalink / raw)
  To: Corey Bryant; +Cc: pmoore, qemu-devel



On 09/11/2013 01:56 PM, Corey Bryant wrote:
>
>
> On 09/06/2013 03:21 PM, Eduardo Otubo wrote:
>>   1) On qemu-seccomp.c:255, the variable ctx was being used
>> uninitialized; now it's initialized with NULL and it's being checked at
>> the end of the function.
>>
>>   2) Changed the name of the command line option from "enable" to
>> "sandbox" for a better understanding from user side.
>>
>> Signed-off-by: Eduardo Otubo<otubo@linux.vnet.ibm.com>
>> ---
>>   qemu-seccomp.c | 5 +++--
>>   vl.c           | 6 +++---
>>   2 files changed, 6 insertions(+), 5 deletions(-)
>>
>> diff --git a/qemu-seccomp.c b/qemu-seccomp.c
>> index 5e85eb5..f39d636 100644
>> --- a/qemu-seccomp.c
>> +++ b/qemu-seccomp.c
>> @@ -252,7 +252,7 @@ seccomp_return:
>>   int seccomp_start(int list_type)
>>   {
>>       int rc = 0;
>> -    scmp_filter_ctx ctx;
>> +    scmp_filter_ctx ctx = NULL;
>>
>>       switch (list_type) {
>>       case WHITELIST:
>> @@ -280,6 +280,7 @@ int seccomp_start(int list_type)
>>       rc = seccomp_load(ctx);
>>
>>   seccomp_return:
>> -    seccomp_release(ctx);
>> +    if (!ctx)
>
> You need to remove the ! from this check.
>
>> +        seccomp_release(ctx);
>>       return rc;
>>   }
>> diff --git a/vl.c b/vl.c
>> index 909f685..129919d 100644
>> --- a/vl.c
>> +++ b/vl.c
>> @@ -323,11 +323,11 @@ static QemuOptsList qemu_rtc_opts = {
>>
>>   static QemuOptsList qemu_sandbox_opts = {
>>       .name = "sandbox",
>> -    .implied_opt_name = "enable",
>> +    .implied_opt_name = "sandbox",
>
> So does this technically make it -sandbox,sandbox=on?If I understand

No. Qemu command line options is a little tricky and I had to spent some 
time to understand it. It actually make "-sandbox on,strict=on"

> correctly, I don't think the implied option is ever seen or used by the
> user anyway so it probably doesn't matter.  But I don't know if it's
> worth changing.

I changed the name so I can remember how it works in the future, since 
it's not that trivial.

>
>>       .head = QTAILQ_HEAD_INITIALIZER(qemu_sandbox_opts.head),
>>       .desc = {
>>           {
>> -            .name = "enable",
>> +            .name = "sandbox",
>>               .type = QEMU_OPT_BOOL,
>>           },{
>>               .name = "strict",
>> @@ -1036,7 +1036,7 @@ static int parse_sandbox(QemuOpts *opts, void
>> *opaque)
>>   {
>>       const char * strict_value = NULL;
>>       /* FIXME: change this to true for 1.3 */
>> -    if (qemu_opt_get_bool(opts, "enable", false)) {
>> +    if (qemu_opt_get_bool(opts, "sandbox", false)) {
>>   #ifdef CONFIG_SECCOMP
>>           if (seccomp_start(WHITELIST) < 0) {
>>               qerror_report(ERROR_CLASS_GENERIC_ERROR,
>> -- 1.8.3.1
>>
>

-- 
Eduardo Otubo
IBM Linux Technology Center

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2013-10-09  0:41 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-09-06 19:21 [Qemu-devel] [PATCHv2 0/3] seccomp: adding blacklist support with command line Eduardo Otubo
2013-09-06 19:21 ` [Qemu-devel] [PATCHv2 1/3] seccomp: adding blacklist support Eduardo Otubo
2013-09-09  3:49   ` Lei Li
2013-09-11 16:29   ` Corey Bryant
2013-09-06 19:21 ` [Qemu-devel] [PATCHv2 2/3] seccomp: adding command line support for blacklist Eduardo Otubo
2013-09-11 16:45   ` Corey Bryant
2013-09-11 16:49     ` Daniel P. Berrange
2013-09-17 13:01       ` Eduardo Otubo
2013-09-17 13:06         ` Daniel P. Berrange
2013-09-17 14:43           ` Paul Moore
2013-09-17 17:14             ` Eduardo Otubo
2013-09-17 18:08               ` Eduardo Otubo
2013-09-17 19:17               ` Corey Bryant
2013-09-17 20:16                 ` Eduardo Otubo
2013-09-18  7:38                 ` Daniel P. Berrange
2013-09-18 15:53                   ` Paul Moore
2013-09-18 15:59                     ` Daniel P. Berrange
2013-09-18 16:19                       ` Paul Moore
2013-09-18 16:32                         ` Daniel P. Berrange
2013-09-18 17:24                           ` Corey Bryant
2013-09-18 17:37                           ` Paul Moore
2013-09-18  7:35               ` Daniel P. Berrange
2013-09-06 19:21 ` [Qemu-devel] [PATCHv3 3/3] seccomp: general fixes Eduardo Otubo
2013-09-11 16:56   ` Corey Bryant
2013-10-09  0:40     ` Eduardo Otubo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.