All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stefan Hajnoczi <stefanha@redhat.com>
To: qemu-devel@nongnu.org
Cc: virtio-fs@redhat.com, rmohr@redhat.com,
	"Dr. David Alan Gilbert" <dgilbert@redhat.com>,
	Stefan Hajnoczi <stefanha@redhat.com>,
	vromanso@redhat.com
Subject: [PATCH for-5.1 2/3] virtiofsd: add container-friendly -o chroot sandboxing option
Date: Wed, 22 Jul 2020 14:02:05 +0100	[thread overview]
Message-ID: <20200722130206.224898-3-stefanha@redhat.com> (raw)
In-Reply-To: <20200722130206.224898-1-stefanha@redhat.com>

virtiofsd cannot run in an unprivileged container because CAP_SYS_ADMIN
is required to create namespaces.

Introduce a weaker sandbox that is sufficient in container environments
because the container runtime already sets up namespaces. Use chroot to
restrict path traversal to the shared directory.

virtiofsd loses the following:

1. Mount namespace. The process chroots to the shared directory but
   leaves the mounts in place. Seccomp rejects mount(2)/umount(2)
   syscalls.

2. Pid namespace. This should be fine because virtiofsd is the only
   process running in the container.

3. Network namespace. This should be fine because seccomp already
   rejects the connect(2) syscall, but an additional layer of security
   is lost. Container runtime-specific network security policies can be
   used drop network traffic (except for the vhost-user UNIX domain
   socket).

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 tools/virtiofsd/helper.c         |  3 +++
 tools/virtiofsd/passthrough_ll.c | 44 ++++++++++++++++++++++++++++++--
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
index 3105b6c23a..7421c9ca1a 100644
--- a/tools/virtiofsd/helper.c
+++ b/tools/virtiofsd/helper.c
@@ -151,6 +151,9 @@ void fuse_cmdline_help(void)
            "    -o cache=<mode>            cache mode. could be one of \"auto, "
            "always, none\"\n"
            "                               default: auto\n"
+           "    -o chroot|no_chroot        use container-friendly chroot instead\n"
+           "                               of stronger mount namespace sandbox\n"
+           "                               default: false\n"
            "    -o flock|no_flock          enable/disable flock\n"
            "                               default: no_flock\n"
            "    -o log_level=<level>       log level, default to \"info\"\n"
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 50a164a599..990c0a8a70 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -139,6 +139,7 @@ enum {
 
 struct lo_data {
     pthread_mutex_t mutex;
+    int chroot; /* 1 - use chroot, 0 - use mount namespace */
     int debug;
     int writeback;
     int flock;
@@ -162,6 +163,8 @@ struct lo_data {
 };
 
 static const struct fuse_opt lo_opts[] = {
+    { "chroot", offsetof(struct lo_data, chroot), 1 },
+    { "no_chroot", offsetof(struct lo_data, chroot), 0 },
     { "writeback", offsetof(struct lo_data, writeback), 1 },
     { "no_writeback", offsetof(struct lo_data, writeback), 0 },
     { "source=%s", offsetof(struct lo_data, source), 0 },
@@ -2665,6 +2668,37 @@ static void setup_capabilities(char *modcaps_in)
     pthread_mutex_unlock(&cap.mutex);
 }
 
+/*
+ * Use chroot as a weaker sandbox for environment where the process is launched
+ * without CAP_SYS_ADMIN.
+ */
+static void setup_chroot(struct lo_data *lo)
+{
+    lo->proc_self_fd = open("/proc/self/fd", O_PATH);
+    if (lo->proc_self_fd == -1) {
+        fuse_log(FUSE_LOG_ERR, "open(\"/proc/self/fd\", O_PATH): %m\n");
+        exit(1);
+    }
+
+    /*
+     * Make the shared directory the file system root so that FUSE_OPEN
+     * (lo_open()) cannot escape the shared directory by opening a symlink.
+     *
+     * It's still possible to escape the chroot via lo->proc_self_fd but that
+     * requires gaining control of the process first.
+     */
+    if (chroot(lo->source) != 0) {
+        fuse_log(FUSE_LOG_ERR, "chroot(\"%s\"): %m\n", lo->source);
+        exit(1);
+    }
+
+    /* Move into the chroot */
+    if (chdir("/") != 0) {
+        fuse_log(FUSE_LOG_ERR, "chdir(\"/\"): %m\n");
+        exit(1);
+    }
+}
+
 /*
  * Lock down this process to prevent access to other processes or files outside
  * source directory.  This reduces the impact of arbitrary code execution bugs.
@@ -2672,8 +2706,13 @@ static void setup_capabilities(char *modcaps_in)
 static void setup_sandbox(struct lo_data *lo, struct fuse_session *se,
                           bool enable_syslog)
 {
-    setup_namespaces(lo, se);
-    setup_mounts(lo->source);
+    if (lo->chroot) {
+        setup_chroot(lo);
+    } else {
+        setup_namespaces(lo, se);
+        setup_mounts(lo->source);
+    }
+
     setup_seccomp(enable_syslog);
     setup_capabilities(g_strdup(lo->modcaps));
 }
@@ -2820,6 +2859,7 @@ int main(int argc, char *argv[])
     struct fuse_session *se;
     struct fuse_cmdline_opts opts;
     struct lo_data lo = {
+        .chroot = 0,
         .debug = 0,
         .writeback = 0,
         .posix_lock = 1,
-- 
2.26.2


WARNING: multiple messages have this Message-ID (diff)
From: Stefan Hajnoczi <stefanha@redhat.com>
To: qemu-devel@nongnu.org
Cc: virtio-fs@redhat.com, rmohr@redhat.com, vromanso@redhat.com
Subject: [Virtio-fs] [PATCH for-5.1 2/3] virtiofsd: add container-friendly -o chroot sandboxing option
Date: Wed, 22 Jul 2020 14:02:05 +0100	[thread overview]
Message-ID: <20200722130206.224898-3-stefanha@redhat.com> (raw)
In-Reply-To: <20200722130206.224898-1-stefanha@redhat.com>

virtiofsd cannot run in an unprivileged container because CAP_SYS_ADMIN
is required to create namespaces.

Introduce a weaker sandbox that is sufficient in container environments
because the container runtime already sets up namespaces. Use chroot to
restrict path traversal to the shared directory.

virtiofsd loses the following:

1. Mount namespace. The process chroots to the shared directory but
   leaves the mounts in place. Seccomp rejects mount(2)/umount(2)
   syscalls.

2. Pid namespace. This should be fine because virtiofsd is the only
   process running in the container.

3. Network namespace. This should be fine because seccomp already
   rejects the connect(2) syscall, but an additional layer of security
   is lost. Container runtime-specific network security policies can be
   used drop network traffic (except for the vhost-user UNIX domain
   socket).

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 tools/virtiofsd/helper.c         |  3 +++
 tools/virtiofsd/passthrough_ll.c | 44 ++++++++++++++++++++++++++++++--
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
index 3105b6c23a..7421c9ca1a 100644
--- a/tools/virtiofsd/helper.c
+++ b/tools/virtiofsd/helper.c
@@ -151,6 +151,9 @@ void fuse_cmdline_help(void)
            "    -o cache=<mode>            cache mode. could be one of \"auto, "
            "always, none\"\n"
            "                               default: auto\n"
+           "    -o chroot|no_chroot        use container-friendly chroot instead\n"
+           "                               of stronger mount namespace sandbox\n"
+           "                               default: false\n"
            "    -o flock|no_flock          enable/disable flock\n"
            "                               default: no_flock\n"
            "    -o log_level=<level>       log level, default to \"info\"\n"
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 50a164a599..990c0a8a70 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -139,6 +139,7 @@ enum {
 
 struct lo_data {
     pthread_mutex_t mutex;
+    int chroot; /* 1 - use chroot, 0 - use mount namespace */
     int debug;
     int writeback;
     int flock;
@@ -162,6 +163,8 @@ struct lo_data {
 };
 
 static const struct fuse_opt lo_opts[] = {
+    { "chroot", offsetof(struct lo_data, chroot), 1 },
+    { "no_chroot", offsetof(struct lo_data, chroot), 0 },
     { "writeback", offsetof(struct lo_data, writeback), 1 },
     { "no_writeback", offsetof(struct lo_data, writeback), 0 },
     { "source=%s", offsetof(struct lo_data, source), 0 },
@@ -2665,6 +2668,37 @@ static void setup_capabilities(char *modcaps_in)
     pthread_mutex_unlock(&cap.mutex);
 }
 
+/*
+ * Use chroot as a weaker sandbox for environment where the process is launched
+ * without CAP_SYS_ADMIN.
+ */
+static void setup_chroot(struct lo_data *lo)
+{
+    lo->proc_self_fd = open("/proc/self/fd", O_PATH);
+    if (lo->proc_self_fd == -1) {
+        fuse_log(FUSE_LOG_ERR, "open(\"/proc/self/fd\", O_PATH): %m\n");
+        exit(1);
+    }
+
+    /*
+     * Make the shared directory the file system root so that FUSE_OPEN
+     * (lo_open()) cannot escape the shared directory by opening a symlink.
+     *
+     * It's still possible to escape the chroot via lo->proc_self_fd but that
+     * requires gaining control of the process first.
+     */
+    if (chroot(lo->source) != 0) {
+        fuse_log(FUSE_LOG_ERR, "chroot(\"%s\"): %m\n", lo->source);
+        exit(1);
+    }
+
+    /* Move into the chroot */
+    if (chdir("/") != 0) {
+        fuse_log(FUSE_LOG_ERR, "chdir(\"/\"): %m\n");
+        exit(1);
+    }
+}
+
 /*
  * Lock down this process to prevent access to other processes or files outside
  * source directory.  This reduces the impact of arbitrary code execution bugs.
@@ -2672,8 +2706,13 @@ static void setup_capabilities(char *modcaps_in)
 static void setup_sandbox(struct lo_data *lo, struct fuse_session *se,
                           bool enable_syslog)
 {
-    setup_namespaces(lo, se);
-    setup_mounts(lo->source);
+    if (lo->chroot) {
+        setup_chroot(lo);
+    } else {
+        setup_namespaces(lo, se);
+        setup_mounts(lo->source);
+    }
+
     setup_seccomp(enable_syslog);
     setup_capabilities(g_strdup(lo->modcaps));
 }
@@ -2820,6 +2859,7 @@ int main(int argc, char *argv[])
     struct fuse_session *se;
     struct fuse_cmdline_opts opts;
     struct lo_data lo = {
+        .chroot = 0,
         .debug = 0,
         .writeback = 0,
         .posix_lock = 1,
-- 
2.26.2


  parent reply	other threads:[~2020-07-22 13:04 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-22 13:02 [PATCH for-5.1 0/3] virtiofsd: allow virtiofsd to run in a container Stefan Hajnoczi
2020-07-22 13:02 ` [Virtio-fs] " Stefan Hajnoczi
2020-07-22 13:02 ` [PATCH for-5.1 1/3] virtiofsd: drop CAP_DAC_READ_SEARCH Stefan Hajnoczi
2020-07-22 13:02   ` [Virtio-fs] " Stefan Hajnoczi
2020-07-22 16:51   ` Dr. David Alan Gilbert
2020-07-22 16:51     ` [Virtio-fs] " Dr. David Alan Gilbert
2020-07-22 13:02 ` Stefan Hajnoczi [this message]
2020-07-22 13:02   ` [Virtio-fs] [PATCH for-5.1 2/3] virtiofsd: add container-friendly -o chroot sandboxing option Stefan Hajnoczi
2020-07-22 16:58   ` Daniel P. Berrangé
2020-07-22 16:58     ` [Virtio-fs] " Daniel P. Berrangé
2020-07-23 12:17     ` Stefan Hajnoczi
2020-07-23 12:17       ` [Virtio-fs] " Stefan Hajnoczi
2020-07-22 17:58   ` Dr. David Alan Gilbert
2020-07-22 17:58     ` [Virtio-fs] " Dr. David Alan Gilbert
2020-07-23 12:28     ` Stefan Hajnoczi
2020-07-23 12:28       ` [Virtio-fs] " Stefan Hajnoczi
2020-07-23 13:47       ` Vivek Goyal
2020-07-23 13:47         ` Vivek Goyal
2020-07-23 15:36         ` Stefan Hajnoczi
2020-07-23 15:36           ` Stefan Hajnoczi
2020-07-22 18:17   ` Vivek Goyal
2020-07-23 12:29     ` Stefan Hajnoczi
2020-07-22 19:03   ` Dr. David Alan Gilbert
2020-07-22 19:03     ` [Virtio-fs] " Dr. David Alan Gilbert
2020-07-23 12:32     ` Stefan Hajnoczi
2020-07-23 12:32       ` [Virtio-fs] " Stefan Hajnoczi
2020-07-23 17:55       ` Dr. David Alan Gilbert
2020-07-23 17:55         ` [Virtio-fs] " Dr. David Alan Gilbert
2020-07-24 12:22         ` Stefan Hajnoczi
2020-07-24 12:22           ` [Virtio-fs] " Stefan Hajnoczi
2020-07-22 13:02 ` [PATCH for-5.1 3/3] virtiofsd: probe unshare(CLONE_FS) and print an error Stefan Hajnoczi
2020-07-22 13:02   ` [Virtio-fs] " Stefan Hajnoczi
2020-07-22 17:03   ` Daniel P. Berrangé
2020-07-22 17:03     ` [Virtio-fs] " Daniel P. Berrangé
2020-07-23 12:46     ` Stefan Hajnoczi
2020-07-23 12:46       ` [Virtio-fs] " Stefan Hajnoczi
2020-07-23 12:50       ` Daniel P. Berrangé
2020-07-23 12:50         ` [Virtio-fs] " Daniel P. Berrangé
2020-07-23 13:56         ` Vivek Goyal
2020-07-23 13:56           ` Vivek Goyal
2020-07-23 15:19           ` Stefan Hajnoczi
2020-07-22 18:19 ` [Virtio-fs] [PATCH for-5.1 0/3] virtiofsd: allow virtiofsd to run in a container Vivek Goyal
2020-07-23 12:46   ` Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200722130206.224898-3-stefanha@redhat.com \
    --to=stefanha@redhat.com \
    --cc=dgilbert@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=rmohr@redhat.com \
    --cc=virtio-fs@redhat.com \
    --cc=vromanso@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.