All of lore.kernel.org
 help / color / mirror / Atom feed
From: Max Reitz <mreitz@redhat.com>
To: qemu-devel@nongnu.org, virtio-fs@redhat.com
Cc: Stefan Hajnoczi <stefanha@redhat.com>,
	"Dr . David Alan Gilbert" <dgilbert@redhat.com>,
	Vivek Goyal <vgoyal@redhat.com>, Max Reitz <mreitz@redhat.com>
Subject: [PATCH v3 07/10] virtiofsd: Add lo_inode.fhandle
Date: Fri, 30 Jul 2021 17:01:31 +0200	[thread overview]
Message-ID: <20210730150134.216126-8-mreitz@redhat.com> (raw)
In-Reply-To: <20210730150134.216126-1-mreitz@redhat.com>

This new field is an alternative to lo_inode.fd: Either of the two must
be set.  In case an O_PATH FD is needed for some lo_inode, it is either
taken from lo_inode.fd, if valid, or a temporary FD is opened with
open_by_handle_at().

Using a file handle instead of an FD has the advantage of keeping the
number of open file descriptors low.

Because open_by_handle_at() requires a mount FD (i.e. a non-O_PATH FD
opened on the filesystem to which the file handle refers), but every
lo_fhandle only has a mount ID (as returned by name_to_handle_at()), we
keep a hash map of such FDs in mount_fds (mapping ID to FD).
get_file_handle(), which is added by a later patch, will ensure that
every mount ID for which we have generated a handle has a corresponding
entry in mount_fds.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Connor Kuehl <ckuehl@redhat.com>
---
 tools/virtiofsd/passthrough_ll.c      | 116 ++++++++++++++++++++++----
 tools/virtiofsd/passthrough_seccomp.c |   1 +
 2 files changed, 102 insertions(+), 15 deletions(-)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 292b7f7e27..487448d666 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -88,8 +88,25 @@ struct lo_key {
     uint64_t mnt_id;
 };
 
+struct lo_fhandle {
+    union {
+        struct file_handle handle;
+        char padding[sizeof(struct file_handle) + MAX_HANDLE_SZ];
+    };
+    int mount_id;
+};
+
+/* Maps mount IDs to an FD that we can pass to open_by_handle_at() */
+static GHashTable *mount_fds;
+pthread_rwlock_t mount_fds_lock = PTHREAD_RWLOCK_INITIALIZER;
+
 struct lo_inode {
+    /*
+     * Either of fd or fhandle must be set (i.e. >= 0 or non-NULL,
+     * respectively).
+     */
     int fd;
+    struct lo_fhandle *fhandle;
 
     /*
      * Atomic reference count for this object.  The nlookup field holds a
@@ -302,6 +319,44 @@ static int temp_fd_steal(TempFd *temp_fd)
     }
 }
 
+/**
+ * Open the given file handle with the given flags.
+ *
+ * The mount FD to pass to open_by_handle_at() is taken from the
+ * mount_fds hash map.
+ *
+ * On error, return -errno.
+ */
+static int open_file_handle(const struct lo_fhandle *fh, int flags)
+{
+    gpointer mount_fd_ptr;
+    int mount_fd;
+    bool found;
+    int ret;
+
+    ret = pthread_rwlock_rdlock(&mount_fds_lock);
+    if (ret) {
+        return -ret;
+    }
+
+    /* mount_fd == 0 is valid, so we need lookup_extended */
+    found = g_hash_table_lookup_extended(mount_fds,
+                                         GINT_TO_POINTER(fh->mount_id),
+                                         NULL, &mount_fd_ptr);
+    pthread_rwlock_unlock(&mount_fds_lock);
+    if (!found) {
+        return -EINVAL;
+    }
+    mount_fd = GPOINTER_TO_INT(mount_fd_ptr);
+
+    ret = open_by_handle_at(mount_fd, (struct file_handle *)&fh->handle, flags);
+    if (ret < 0) {
+        return -errno;
+    }
+
+    return ret;
+}
+
 /*
  * Load capng's state from our saved state if the current thread
  * hadn't previously been loaded.
@@ -608,7 +663,11 @@ static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep)
     *inodep = NULL;
 
     if (g_atomic_int_dec_and_test(&inode->refcount)) {
-        close(inode->fd);
+        if (inode->fd >= 0) {
+            close(inode->fd);
+        } else {
+            g_free(inode->fhandle);
+        }
         free(inode);
     }
 }
@@ -635,10 +694,25 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
 
 static int lo_inode_fd(const struct lo_inode *inode, TempFd *tfd)
 {
-    *tfd = (TempFd) {
-        .fd = inode->fd,
-        .owned = false,
-    };
+    if (inode->fd >= 0) {
+        *tfd = (TempFd) {
+            .fd = inode->fd,
+            .owned = false,
+        };
+    } else {
+        int fd;
+
+        assert(inode->fhandle != NULL);
+        fd = open_file_handle(inode->fhandle, O_PATH);
+        if (fd < 0) {
+            return -errno;
+        }
+
+        *tfd = (TempFd) {
+            .fd = fd,
+            .owned = true,
+        };
+    }
 
     return 0;
 }
@@ -678,22 +752,32 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino, TempFd *tfd)
 static int lo_inode_open(const struct lo_data *lo, const struct lo_inode *inode,
                          int open_flags, TempFd *tfd)
 {
-    g_autofree char *fd_str = g_strdup_printf("%d", inode->fd);
+    g_autofree char *fd_str = NULL;
     int fd;
 
     if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) {
         return -EBADF;
     }
 
-    /*
-     * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier
-     * that the inode is not a special file but if an external process races
-     * with us then symlinks are traversed here. It is not possible to escape
-     * the shared directory since it is mounted as "/" though.
-     */
-    fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW);
-    if (fd < 0) {
-        return -errno;
+    if (inode->fd >= 0) {
+        /*
+         * The file is a symlink so O_NOFOLLOW must be ignored. We checked
+         * earlier that the inode is not a special file but if an external
+         * process races with us then symlinks are traversed here. It is not
+         * possible to escape the shared directory since it is mounted as "/"
+         * though.
+         */
+        fd_str = g_strdup_printf("%d", inode->fd);
+        fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW);
+        if (fd < 0) {
+            return -errno;
+        }
+    } else {
+        assert(inode->fhandle != NULL);
+        fd = open_file_handle(inode->fhandle, open_flags);
+        if (fd < 0) {
+            return fd;
+        }
     }
 
     *tfd = (TempFd) {
@@ -4110,6 +4194,8 @@ int main(int argc, char *argv[])
     lo.root.fuse_ino = FUSE_ROOT_ID;
     lo.cache = CACHE_AUTO;
 
+    mount_fds = g_hash_table_new(NULL, NULL);
+
     /*
      * Set up the ino map like this:
      * [0] Reserved (will not be used)
diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c
index f49ed94b5e..af04c638cb 100644
--- a/tools/virtiofsd/passthrough_seccomp.c
+++ b/tools/virtiofsd/passthrough_seccomp.c
@@ -77,6 +77,7 @@ static const int syscall_allowlist[] = {
     SCMP_SYS(statx),
     SCMP_SYS(open),
     SCMP_SYS(openat),
+    SCMP_SYS(open_by_handle_at),
     SCMP_SYS(ppoll),
     SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */
     SCMP_SYS(preadv),
-- 
2.31.1



WARNING: multiple messages have this Message-ID (diff)
From: Max Reitz <mreitz@redhat.com>
To: qemu-devel@nongnu.org, virtio-fs@redhat.com
Cc: Vivek Goyal <vgoyal@redhat.com>, Max Reitz <mreitz@redhat.com>
Subject: [Virtio-fs] [PATCH v3 07/10] virtiofsd: Add lo_inode.fhandle
Date: Fri, 30 Jul 2021 17:01:31 +0200	[thread overview]
Message-ID: <20210730150134.216126-8-mreitz@redhat.com> (raw)
In-Reply-To: <20210730150134.216126-1-mreitz@redhat.com>

This new field is an alternative to lo_inode.fd: Either of the two must
be set.  In case an O_PATH FD is needed for some lo_inode, it is either
taken from lo_inode.fd, if valid, or a temporary FD is opened with
open_by_handle_at().

Using a file handle instead of an FD has the advantage of keeping the
number of open file descriptors low.

Because open_by_handle_at() requires a mount FD (i.e. a non-O_PATH FD
opened on the filesystem to which the file handle refers), but every
lo_fhandle only has a mount ID (as returned by name_to_handle_at()), we
keep a hash map of such FDs in mount_fds (mapping ID to FD).
get_file_handle(), which is added by a later patch, will ensure that
every mount ID for which we have generated a handle has a corresponding
entry in mount_fds.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Connor Kuehl <ckuehl@redhat.com>
---
 tools/virtiofsd/passthrough_ll.c      | 116 ++++++++++++++++++++++----
 tools/virtiofsd/passthrough_seccomp.c |   1 +
 2 files changed, 102 insertions(+), 15 deletions(-)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 292b7f7e27..487448d666 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -88,8 +88,25 @@ struct lo_key {
     uint64_t mnt_id;
 };
 
+struct lo_fhandle {
+    union {
+        struct file_handle handle;
+        char padding[sizeof(struct file_handle) + MAX_HANDLE_SZ];
+    };
+    int mount_id;
+};
+
+/* Maps mount IDs to an FD that we can pass to open_by_handle_at() */
+static GHashTable *mount_fds;
+pthread_rwlock_t mount_fds_lock = PTHREAD_RWLOCK_INITIALIZER;
+
 struct lo_inode {
+    /*
+     * Either of fd or fhandle must be set (i.e. >= 0 or non-NULL,
+     * respectively).
+     */
     int fd;
+    struct lo_fhandle *fhandle;
 
     /*
      * Atomic reference count for this object.  The nlookup field holds a
@@ -302,6 +319,44 @@ static int temp_fd_steal(TempFd *temp_fd)
     }
 }
 
+/**
+ * Open the given file handle with the given flags.
+ *
+ * The mount FD to pass to open_by_handle_at() is taken from the
+ * mount_fds hash map.
+ *
+ * On error, return -errno.
+ */
+static int open_file_handle(const struct lo_fhandle *fh, int flags)
+{
+    gpointer mount_fd_ptr;
+    int mount_fd;
+    bool found;
+    int ret;
+
+    ret = pthread_rwlock_rdlock(&mount_fds_lock);
+    if (ret) {
+        return -ret;
+    }
+
+    /* mount_fd == 0 is valid, so we need lookup_extended */
+    found = g_hash_table_lookup_extended(mount_fds,
+                                         GINT_TO_POINTER(fh->mount_id),
+                                         NULL, &mount_fd_ptr);
+    pthread_rwlock_unlock(&mount_fds_lock);
+    if (!found) {
+        return -EINVAL;
+    }
+    mount_fd = GPOINTER_TO_INT(mount_fd_ptr);
+
+    ret = open_by_handle_at(mount_fd, (struct file_handle *)&fh->handle, flags);
+    if (ret < 0) {
+        return -errno;
+    }
+
+    return ret;
+}
+
 /*
  * Load capng's state from our saved state if the current thread
  * hadn't previously been loaded.
@@ -608,7 +663,11 @@ static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep)
     *inodep = NULL;
 
     if (g_atomic_int_dec_and_test(&inode->refcount)) {
-        close(inode->fd);
+        if (inode->fd >= 0) {
+            close(inode->fd);
+        } else {
+            g_free(inode->fhandle);
+        }
         free(inode);
     }
 }
@@ -635,10 +694,25 @@ static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
 
 static int lo_inode_fd(const struct lo_inode *inode, TempFd *tfd)
 {
-    *tfd = (TempFd) {
-        .fd = inode->fd,
-        .owned = false,
-    };
+    if (inode->fd >= 0) {
+        *tfd = (TempFd) {
+            .fd = inode->fd,
+            .owned = false,
+        };
+    } else {
+        int fd;
+
+        assert(inode->fhandle != NULL);
+        fd = open_file_handle(inode->fhandle, O_PATH);
+        if (fd < 0) {
+            return -errno;
+        }
+
+        *tfd = (TempFd) {
+            .fd = fd,
+            .owned = true,
+        };
+    }
 
     return 0;
 }
@@ -678,22 +752,32 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino, TempFd *tfd)
 static int lo_inode_open(const struct lo_data *lo, const struct lo_inode *inode,
                          int open_flags, TempFd *tfd)
 {
-    g_autofree char *fd_str = g_strdup_printf("%d", inode->fd);
+    g_autofree char *fd_str = NULL;
     int fd;
 
     if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) {
         return -EBADF;
     }
 
-    /*
-     * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier
-     * that the inode is not a special file but if an external process races
-     * with us then symlinks are traversed here. It is not possible to escape
-     * the shared directory since it is mounted as "/" though.
-     */
-    fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW);
-    if (fd < 0) {
-        return -errno;
+    if (inode->fd >= 0) {
+        /*
+         * The file is a symlink so O_NOFOLLOW must be ignored. We checked
+         * earlier that the inode is not a special file but if an external
+         * process races with us then symlinks are traversed here. It is not
+         * possible to escape the shared directory since it is mounted as "/"
+         * though.
+         */
+        fd_str = g_strdup_printf("%d", inode->fd);
+        fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW);
+        if (fd < 0) {
+            return -errno;
+        }
+    } else {
+        assert(inode->fhandle != NULL);
+        fd = open_file_handle(inode->fhandle, open_flags);
+        if (fd < 0) {
+            return fd;
+        }
     }
 
     *tfd = (TempFd) {
@@ -4110,6 +4194,8 @@ int main(int argc, char *argv[])
     lo.root.fuse_ino = FUSE_ROOT_ID;
     lo.cache = CACHE_AUTO;
 
+    mount_fds = g_hash_table_new(NULL, NULL);
+
     /*
      * Set up the ino map like this:
      * [0] Reserved (will not be used)
diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c
index f49ed94b5e..af04c638cb 100644
--- a/tools/virtiofsd/passthrough_seccomp.c
+++ b/tools/virtiofsd/passthrough_seccomp.c
@@ -77,6 +77,7 @@ static const int syscall_allowlist[] = {
     SCMP_SYS(statx),
     SCMP_SYS(open),
     SCMP_SYS(openat),
+    SCMP_SYS(open_by_handle_at),
     SCMP_SYS(ppoll),
     SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */
     SCMP_SYS(preadv),
-- 
2.31.1


  parent reply	other threads:[~2021-07-30 15:07 UTC|newest]

Thread overview: 88+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-30 15:01 [PATCH v3 00/10] virtiofsd: Allow using file handles instead of O_PATH FDs Max Reitz
2021-07-30 15:01 ` [Virtio-fs] " Max Reitz
2021-07-30 15:01 ` [PATCH v3 01/10] virtiofsd: Limit setxattr()'s creds-dropped region Max Reitz
2021-07-30 15:01   ` [Virtio-fs] " Max Reitz
2021-08-06 14:16   ` Vivek Goyal
2021-08-06 14:16     ` [Virtio-fs] " Vivek Goyal
2021-08-09 10:30     ` Max Reitz
2021-08-09 10:30       ` [Virtio-fs] " Max Reitz
2021-07-30 15:01 ` [PATCH v3 02/10] virtiofsd: Add TempFd structure Max Reitz
2021-07-30 15:01   ` [Virtio-fs] " Max Reitz
2021-08-06 14:41   ` Vivek Goyal
2021-08-06 14:41     ` [Virtio-fs] " Vivek Goyal
2021-08-09 10:44     ` Max Reitz
2021-08-09 10:44       ` [Virtio-fs] " Max Reitz
2021-07-30 15:01 ` [PATCH v3 03/10] virtiofsd: Use lo_inode_open() instead of openat() Max Reitz
2021-07-30 15:01   ` [Virtio-fs] " Max Reitz
2021-08-06 15:42   ` Vivek Goyal
2021-08-06 15:42     ` [Virtio-fs] " Vivek Goyal
2021-07-30 15:01 ` [PATCH v3 04/10] virtiofsd: Add lo_inode_fd() helper Max Reitz
2021-07-30 15:01   ` [Virtio-fs] " Max Reitz
2021-08-06 18:25   ` Vivek Goyal
2021-08-06 18:25     ` [Virtio-fs] " Vivek Goyal
2021-08-09 10:48     ` Max Reitz
2021-08-09 10:48       ` [Virtio-fs] " Max Reitz
2021-07-30 15:01 ` [PATCH v3 05/10] virtiofsd: Let lo_fd() return a TempFd Max Reitz
2021-07-30 15:01   ` [Virtio-fs] " Max Reitz
2021-07-30 15:01 ` [PATCH v3 06/10] virtiofsd: Let lo_inode_open() " Max Reitz
2021-07-30 15:01   ` [Virtio-fs] " Max Reitz
2021-08-06 19:55   ` Vivek Goyal
2021-08-06 19:55     ` [Virtio-fs] " Vivek Goyal
2021-08-09 13:40     ` Max Reitz
2021-08-09 13:40       ` [Virtio-fs] " Max Reitz
2021-07-30 15:01 ` Max Reitz [this message]
2021-07-30 15:01   ` [Virtio-fs] [PATCH v3 07/10] virtiofsd: Add lo_inode.fhandle Max Reitz
2021-08-09 15:21   ` Vivek Goyal
2021-08-09 15:21     ` [Virtio-fs] " Vivek Goyal
2021-08-09 16:41     ` Hanna Reitz
2021-08-09 16:41       ` [Virtio-fs] " Hanna Reitz
2021-07-30 15:01 ` [PATCH v3 08/10] virtiofsd: Add inodes_by_handle hash table Max Reitz
2021-07-30 15:01   ` [Virtio-fs] " Max Reitz
2021-08-09 16:10   ` Vivek Goyal
2021-08-09 16:10     ` [Virtio-fs] " Vivek Goyal
2021-08-09 16:47     ` Hanna Reitz
2021-08-09 16:47       ` [Virtio-fs] " Hanna Reitz
2021-08-10 14:07       ` Vivek Goyal
2021-08-10 14:07         ` [Virtio-fs] " Vivek Goyal
2021-08-10 14:13         ` Hanna Reitz
2021-08-10 14:13           ` [Virtio-fs] " Hanna Reitz
2021-08-10 17:51           ` Vivek Goyal
2021-08-10 17:51             ` [Virtio-fs] " Vivek Goyal
2021-07-30 15:01 ` [PATCH v3 09/10] virtiofsd: Optionally fill lo_inode.fhandle Max Reitz
2021-07-30 15:01   ` [Virtio-fs] " Max Reitz
2021-08-09 18:41   ` Vivek Goyal
2021-08-09 18:41     ` [Virtio-fs] " Vivek Goyal
2021-08-10  8:32     ` Hanna Reitz
2021-08-10  8:32       ` [Virtio-fs] " Hanna Reitz
2021-08-10 15:23       ` Vivek Goyal
2021-08-10 15:23         ` [Virtio-fs] " Vivek Goyal
2021-08-10 15:26         ` Hanna Reitz
2021-08-10 15:26           ` [Virtio-fs] " Hanna Reitz
2021-08-10 15:57           ` Vivek Goyal
2021-08-10 15:57             ` [Virtio-fs] " Vivek Goyal
2021-08-11  6:41             ` Hanna Reitz
2021-08-11  6:41               ` [Virtio-fs] " Hanna Reitz
2021-08-16 19:44               ` Vivek Goyal
2021-08-16 19:44                 ` [Virtio-fs] " Vivek Goyal
2021-08-17  8:27                 ` Hanna Reitz
2021-08-17  8:27                   ` [Virtio-fs] " Hanna Reitz
2021-08-17 19:45                   ` Vivek Goyal
2021-08-17 19:45                     ` [Virtio-fs] " Vivek Goyal
2021-08-18  0:14                     ` Vivek Goyal
2021-08-18  0:14                       ` [Virtio-fs] " Vivek Goyal
2021-08-18 13:32                       ` Vivek Goyal
2021-08-18 13:32                         ` [Virtio-fs] " Vivek Goyal
2021-08-18 13:48                         ` Hanna Reitz
2021-08-18 13:48                           ` [Virtio-fs] " Hanna Reitz
2021-08-19 16:38   ` Dr. David Alan Gilbert
2021-08-19 16:38     ` [Virtio-fs] " Dr. David Alan Gilbert
2021-07-30 15:01 ` [PATCH v3 10/10] virtiofsd: Add lazy lo_do_find() Max Reitz
2021-07-30 15:01   ` [Virtio-fs] " Max Reitz
2021-08-09 19:08   ` Vivek Goyal
2021-08-09 19:08     ` [Virtio-fs] " Vivek Goyal
2021-08-10  8:38     ` Hanna Reitz
2021-08-10  8:38       ` [Virtio-fs] " Hanna Reitz
2021-08-10 14:12       ` Vivek Goyal
2021-08-10 14:12         ` [Virtio-fs] " Vivek Goyal
2021-08-10 14:17         ` Hanna Reitz
2021-08-10 14:17           ` [Virtio-fs] " Hanna Reitz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210730150134.216126-8-mreitz@redhat.com \
    --to=mreitz@redhat.com \
    --cc=dgilbert@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    --cc=vgoyal@redhat.com \
    --cc=virtio-fs@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.