All of lore.kernel.org
 help / color / mirror / Atom feed
* [Virtio-fs] [virtiofsd PATCH v6 0/6] virtiofsd: support per-file DAX
@ 2021-10-11  3:09 Jeffle Xu
  2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 1/6] virtiofsd: add .ioctl() support Jeffle Xu
                   ` (5 more replies)
  0 siblings, 6 replies; 14+ messages in thread
From: Jeffle Xu @ 2021-10-11  3:09 UTC (permalink / raw)
  To: vgoyal, stefanha, miklos; +Cc: virtio-fs, joseph.qi

The corresponding kernel patch set:
https://lore.kernel.org/all/20211011030052.98923-1-jefflexu@linux.alibaba.com/T/#t

changes since v5:
- add back support for .ioctl()
- add back negotiation during FUSE_INIT
- add '-o dax=[server|attr]' option to control the policy used by
  virtiofsd to determine whether DAX shall be enabled or not for
  specific file. Please refer to the commit log of patch 4/5/6 for
  more detailed information.


changes since v4:
- decide whether DAX shall be enabled or not solely depending on file
  size (DAX is disabled for files smaller than 32KB)
- negotiation during FUSE_INIT is droped
- drop support for .ioctl() for passthrough

changes since v2/v3:
Patch 4 in v2 is incomplete by mistake and it will fail to be compiled.
I had ever sent a seperate patch 4 of v3. Now I send the whole complete
set in v4. Except for this, there's no other diferrence.



Jeffle Xu (6):
  virtiofsd: add .ioctl() support
  virtiofsd: support per-file DAX in fuse protocol
  virtiofsd: negotiate per-file DAX in FUSE_INIT
  virtiofsd: add 'dax=' option
  virtiofsd: implement file size based dax policy
  virtiofsd: implement persistent inode attribute based dax policy

 include/standard-headers/linux/fuse.h |   2 +
 tools/virtiofsd/fuse_common.h         |   5 ++
 tools/virtiofsd/fuse_lowlevel.c       |   3 +
 tools/virtiofsd/helper.c              |   7 ++
 tools/virtiofsd/passthrough_ll.c      | 125 ++++++++++++++++++++++++++
 tools/virtiofsd/passthrough_seccomp.c |   1 +
 6 files changed, 143 insertions(+)

-- 
2.27.0


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Virtio-fs] [virtiofsd PATCH v6 1/6] virtiofsd: add .ioctl() support
  2021-10-11  3:09 [Virtio-fs] [virtiofsd PATCH v6 0/6] virtiofsd: support per-file DAX Jeffle Xu
@ 2021-10-11  3:09 ` Jeffle Xu
  2021-10-15  2:49   ` [Virtio-fs] [virtiofsd PATCH v7 " Jeffle Xu
  2021-10-20 17:36   ` [Virtio-fs] [virtiofsd PATCH v6 " Vivek Goyal
  2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 2/6] virtiofsd: support per-file DAX in fuse protocol Jeffle Xu
                   ` (4 subsequent siblings)
  5 siblings, 2 replies; 14+ messages in thread
From: Jeffle Xu @ 2021-10-11  3:09 UTC (permalink / raw)
  To: vgoyal, stefanha, miklos; +Cc: virtio-fs, joseph.qi

For passthrough, it passes corresponding ioctls to host directly.

Currently only these ioctls that handling persistent inode flags, i.e.,
FS_IOC_[G|S]ETFLAGS and FS_IOC_FS[G|S]ETXATTR are supported for security
concern, though it's implemented in a quite general way, so that we can
expand the scope of allowed ioctls if it is really needed later.

Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
---
 tools/virtiofsd/passthrough_ll.c      | 59 +++++++++++++++++++++++++++
 tools/virtiofsd/passthrough_seccomp.c |  1 +
 2 files changed, 60 insertions(+)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index b76d878509..5969d67810 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -47,6 +47,7 @@
 #include <dirent.h>
 #include <pthread.h>
 #include <sys/file.h>
+#include <sys/ioctl.h>
 #include <sys/mount.h>
 #include <sys/prctl.h>
 #include <sys/resource.h>
@@ -54,6 +55,7 @@
 #include <sys/wait.h>
 #include <sys/xattr.h>
 #include <syslog.h>
+#include <linux/fs.h>
 
 #include "qemu/cutils.h"
 #include "passthrough_helpers.h"
@@ -2105,6 +2107,62 @@ out:
     fuse_reply_err(req, saverr);
 }
 
+
+static void lo_ioctl(fuse_req_t req, fuse_ino_t ino, unsigned int cmd, void *arg,
+                          struct fuse_file_info *fi, unsigned flags, const void *in_buf,
+                          size_t in_bufsz, size_t out_bufsz)
+{
+    int fd = lo_fi_fd(req, fi);
+    int res;
+    int saverr = ENOSYS;
+    int dir;
+    void *buf = NULL;
+    size_t size = 0;
+
+    fuse_log(FUSE_LOG_DEBUG, "lo_ioctl(ino=%" PRIu64 ", cmd=0x%x, flags=0x%x, "
+            "in_bufsz = %lu, out_bufsz = %lu)\n",
+            ino, cmd, flags, in_bufsz, out_bufsz);
+
+    if (cmd != FS_IOC_SETFLAGS || cmd != FS_IOC_GETFLAGS ||
+        cmd != FS_IOC_FSSETXATTR || cmd != FS_IOC_FSGETXATTR)
+        goto out;
+
+    /* unrestricted ioctl is not supported yet */
+    if (flags & FUSE_IOCTL_UNRESTRICTED)
+        goto out;
+
+    dir = _IOC_DIR(cmd);
+
+    if (dir & _IOC_READ) {
+        size = out_bufsz;
+        buf = malloc(size);
+        if (!buf)
+            goto out_err;
+
+        if (dir & _IOC_WRITE)
+            memcpy(buf, in_buf, size);
+
+        res = ioctl(fd, cmd, buf);
+    }
+    else if (dir & _IOC_WRITE) {
+        res = ioctl(fd, cmd, in_buf);
+    }
+    else {
+        res = ioctl(fd, cmd, arg);
+    }
+
+    if (res < 0)
+        goto out_err;
+
+    fuse_reply_ioctl(req, 0, buf, size);
+    return;
+
+out_err:
+    saverr = errno;
+out:
+    fuse_reply_err(req, saverr);
+}
+
 static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync,
                         struct fuse_file_info *fi)
 {
@@ -3278,6 +3336,7 @@ static struct fuse_lowlevel_ops lo_oper = {
     .fsyncdir = lo_fsyncdir,
     .create = lo_create,
     .getlk = lo_getlk,
+    .ioctl = lo_ioctl,
     .setlk = lo_setlk,
     .open = lo_open,
     .release = lo_release,
diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c
index 62441cfcdb..2a5f7614fc 100644
--- a/tools/virtiofsd/passthrough_seccomp.c
+++ b/tools/virtiofsd/passthrough_seccomp.c
@@ -62,6 +62,7 @@ static const int syscall_allowlist[] = {
     SCMP_SYS(gettid),
     SCMP_SYS(gettimeofday),
     SCMP_SYS(getxattr),
+    SCMP_SYS(ioctl),
     SCMP_SYS(linkat),
     SCMP_SYS(listxattr),
     SCMP_SYS(lseek),
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Virtio-fs] [virtiofsd PATCH v6 2/6] virtiofsd: support per-file DAX in fuse protocol
  2021-10-11  3:09 [Virtio-fs] [virtiofsd PATCH v6 0/6] virtiofsd: support per-file DAX Jeffle Xu
  2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 1/6] virtiofsd: add .ioctl() support Jeffle Xu
@ 2021-10-11  3:09 ` Jeffle Xu
  2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 3/6] virtiofsd: negotiate per-file DAX in FUSE_INIT Jeffle Xu
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 14+ messages in thread
From: Jeffle Xu @ 2021-10-11  3:09 UTC (permalink / raw)
  To: vgoyal, stefanha, miklos; +Cc: virtio-fs, joseph.qi

... in prep for the following support for per-file DAX feature.

Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
---
 include/standard-headers/linux/fuse.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h
index 950d7edb7e..7bd006ffcb 100644
--- a/include/standard-headers/linux/fuse.h
+++ b/include/standard-headers/linux/fuse.h
@@ -356,6 +356,7 @@ struct fuse_file_lock {
 #define FUSE_MAP_ALIGNMENT	(1 << 26)
 #define FUSE_SUBMOUNTS		(1 << 27)
 #define FUSE_HANDLE_KILLPRIV_V2	(1 << 28)
+#define FUSE_PERFILE_DAX	(1 << 30)
 
 /**
  * CUSE INIT request/reply flags
@@ -440,6 +441,7 @@ struct fuse_file_lock {
  * FUSE_ATTR_SUBMOUNT: Object is a submount root
  */
 #define FUSE_ATTR_SUBMOUNT      (1 << 0)
+#define FUSE_ATTR_DAX		(1 << 1)
 
 /**
  * Open flags
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Virtio-fs] [virtiofsd PATCH v6 3/6] virtiofsd: negotiate per-file DAX in FUSE_INIT
  2021-10-11  3:09 [Virtio-fs] [virtiofsd PATCH v6 0/6] virtiofsd: support per-file DAX Jeffle Xu
  2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 1/6] virtiofsd: add .ioctl() support Jeffle Xu
  2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 2/6] virtiofsd: support per-file DAX in fuse protocol Jeffle Xu
@ 2021-10-11  3:09 ` Jeffle Xu
  2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 4/6] virtiofsd: add 'dax=' option Jeffle Xu
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 14+ messages in thread
From: Jeffle Xu @ 2021-10-11  3:09 UTC (permalink / raw)
  To: vgoyal, stefanha, miklos; +Cc: virtio-fs, joseph.qi

Among the FUSE_INIT phase, guest virtiofs will advertise per-file DAX
if it's mounted with "-o dax=inode". Then virtiofsd is aware that
client is in per-file DAX mode, and will construct per-inode DAX
attribute accordingly.

Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
---
 tools/virtiofsd/fuse_common.h   | 5 +++++
 tools/virtiofsd/fuse_lowlevel.c | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h
index 8a75729be9..ee6fc64c23 100644
--- a/tools/virtiofsd/fuse_common.h
+++ b/tools/virtiofsd/fuse_common.h
@@ -372,6 +372,11 @@ struct fuse_file_info {
  */
 #define FUSE_CAP_HANDLE_KILLPRIV_V2 (1 << 28)
 
+/**
+ * Indicates support for per-file DAX.
+ */
+#define FUSE_CAP_PERFILE_DAX (1 << 29)
+
 /**
  * Ioctl flags
  *
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index bee781982b..dbb0b3d275 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -2066,6 +2066,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
     if (arg->flags & FUSE_HANDLE_KILLPRIV_V2) {
         se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV_V2;
     }
+    if (arg->flags & FUSE_PERFILE_DAX) {
+        se->conn.capable |= FUSE_CAP_PERFILE_DAX;
+    }
 #ifdef HAVE_SPLICE
 #ifdef HAVE_VMSPLICE
     se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE;
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Virtio-fs] [virtiofsd PATCH v6 4/6] virtiofsd: add 'dax=' option
  2021-10-11  3:09 [Virtio-fs] [virtiofsd PATCH v6 0/6] virtiofsd: support per-file DAX Jeffle Xu
                   ` (2 preceding siblings ...)
  2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 3/6] virtiofsd: negotiate per-file DAX in FUSE_INIT Jeffle Xu
@ 2021-10-11  3:09 ` Jeffle Xu
  2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 5/6] virtiofsd: implement file size based dax policy Jeffle Xu
  2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 6/6] virtiofsd: implement persistent inode attribute " Jeffle Xu
  5 siblings, 0 replies; 14+ messages in thread
From: Jeffle Xu @ 2021-10-11  3:09 UTC (permalink / raw)
  To: vgoyal, stefanha, miklos; +Cc: virtio-fs, joseph.qi

This option is used to specify the policy of constructing per-inode DAX
attribute when guest virtiofs is mounted with "-o dax=inode".

Currently there are two valid policies, "server" and "attr".

When "dax=server" is specified, virtiofsd could implement its own
heuristic strategy of constructing per-inode DAX attribute, e.g.
depending on the file size.

When "dax=attr" is specified, virtiofsd will construct per-inode DAX
attribute denpending on the persistent inode flags of host files. With
this option, admin could select those files that should be DAX enabled
and mark them with persistent inode flags, or users could mark files as
DAX enabled inside guest.

The default behaviour is "none", that is, DAX is always disabled for all
files when guest virtiofs is mounted with "-o dax=inode".

Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
---
 tools/virtiofsd/helper.c         |  7 +++++++
 tools/virtiofsd/passthrough_ll.c | 16 ++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
index 28243b51b2..3c7fecda74 100644
--- a/tools/virtiofsd/helper.c
+++ b/tools/virtiofsd/helper.c
@@ -183,6 +183,13 @@ void fuse_cmdline_help(void)
            "                               to virtiofsd from guest applications.\n"
            "                               default: no_allow_direct_io\n"
            "    -o announce_submounts      Announce sub-mount points to the guest\n"
+           "    -o dax=<policy>            policies of constructing per-file DAX attribute.\n"
+           "                               could be one of \"server\", \"attr\"\n"
+           "                               - server: fuse daemon's own heuristic policy,\n"
+	   "                                 e.g. depending on file size\n"
+           "                               - attr: depending on persistent inode attribute\n"
+           "                                 of host files set by admin or guest\n"
+           "                               default: none\n"
            );
 }
 
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 5969d67810..b4de86e317 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -137,6 +137,12 @@ enum {
     SANDBOX_CHROOT,
 };
 
+enum {
+    DAX_NONE,
+    DAX_SERVER,
+    DAX_ATTR,
+};
+
 typedef struct xattr_map_entry {
     char *key;
     char *prepend;
@@ -162,6 +168,8 @@ struct lo_data {
     int readdirplus_clear;
     int allow_direct_io;
     int announce_submounts;
+    int user_dax;
+    int dax;
     bool use_statx;
     struct lo_inode root;
     GHashTable *inodes; /* protected by lo->mutex */
@@ -206,6 +214,8 @@ static const struct fuse_opt lo_opts[] = {
     { "announce_submounts", offsetof(struct lo_data, announce_submounts), 1 },
     { "killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 1 },
     { "no_killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 0 },
+    { "dax=server", offsetof(struct lo_data, user_dax), DAX_SERVER },
+    { "dax=attr", offsetof(struct lo_data, user_dax), DAX_ATTR },
     FUSE_OPT_END
 };
 static bool use_syslog = false;
@@ -704,6 +714,12 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
         conn->want &= ~FUSE_CAP_HANDLE_KILLPRIV_V2;
         lo->killpriv_v2 = 0;
     }
+
+    if ((conn->capable & FUSE_CAP_PERFILE_DAX)) {
+        lo->dax = lo->user_dax;
+    } else {
+        lo->dax = DAX_NONE;
+    }
 }
 
 static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Virtio-fs] [virtiofsd PATCH v6 5/6] virtiofsd: implement file size based dax policy
  2021-10-11  3:09 [Virtio-fs] [virtiofsd PATCH v6 0/6] virtiofsd: support per-file DAX Jeffle Xu
                   ` (3 preceding siblings ...)
  2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 4/6] virtiofsd: add 'dax=' option Jeffle Xu
@ 2021-10-11  3:09 ` Jeffle Xu
  2021-10-20 17:54   ` Vivek Goyal
  2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 6/6] virtiofsd: implement persistent inode attribute " Jeffle Xu
  5 siblings, 1 reply; 14+ messages in thread
From: Jeffle Xu @ 2021-10-11  3:09 UTC (permalink / raw)
  To: vgoyal, stefanha, miklos; +Cc: virtio-fs, joseph.qi

When DAX window is fully utilized and needs to be expanded to avoid the
performance fluctuation triggered by DAX window recaliming, it may not
be wise to allocate DAX window for files with size smaller than some
specific point, considering from the perspective of reducing memory
overhead.

To maintain one DAX window chunk (e.g., 2MB in size), 32KB
(512 * 64 bytes) memory footprint will be consumed for page descriptors
inside guest. Thus it'd better disable DAX for those files smaller than
32KB, to reduce the demand for DAX window and thus avoid the unworthy
memory overhead.

Thus only flag the file with FUSE_ATTR_DAX when the file size is greater
than 32 KB. The guest will enable DAX only for those files flagged with
FUSE_ATTR_DAX, when virtiofs is mounted with '-o dax=inode'.

To be noted that both FUSE_LOOKUP and FUSE_READDIRPLUS are affected, and
will convey FUSE_ATTR_DAX flag to the guest.

This policy will be used when '-o dax=server' option is specified for
virtiofsd.

Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
---
 tools/virtiofsd/passthrough_ll.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index b4de86e317..a5a5061906 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -54,6 +54,7 @@
 #include <sys/syscall.h>
 #include <sys/wait.h>
 #include <sys/xattr.h>
+#include <sys/user.h>
 #include <syslog.h>
 #include <linux/fs.h>
 
@@ -61,6 +62,19 @@
 #include "passthrough_helpers.h"
 #include "passthrough_seccomp.h"
 
+/*
+ * One page descriptor (64 bytes in size) needs to be maintained for every page
+ * in the DAX window chunk, i.e., there is certain guest memory overhead when
+ * DAX is enabled. Thus disable DAX for those files smaller than this certain
+ * memory overhead if virtiofs is mounted in per-file DAX mode, in which case
+ * the guest page cache will consume less memory when DAX is disabled.
+ */
+#define FUSE_DAX_SHIFT	21
+#define PAGE_DESC_SHIFT	6
+#define FUSE_PERFILE_DAX_SHIFT \
+    (FUSE_DAX_SHIFT - PAGE_SHIFT + PAGE_DESC_SHIFT)
+#define FUSE_PERFILE_DAX_POINT	(1 << FUSE_PERFILE_DAX_SHIFT)
+
 /* Keep track of inode posix locks for each owner. */
 struct lo_inode_plock {
     uint64_t lock_owner;
@@ -986,6 +1000,20 @@ static int do_statx(struct lo_data *lo, int dirfd, const char *pathname,
     return 0;
 }
 
+static bool lo_should_enable_dax(struct lo_data *lo,
+                                 struct fuse_entry_param *e)
+{
+    if (lo->dax == DAX_NONE || !S_ISREG(e->attr.st_mode)) {
+        return false;
+    }
+
+    if (lo->dax & DAX_SERVER) {
+        return e->attr.st_size > FUSE_PERFILE_DAX_POINT;
+    }
+
+    return false;
+}
+
 /*
  * Increments nlookup on the inode on success. unref_inode_lolocked() must be
  * called eventually to decrement nlookup again. If inodep is non-NULL, the
@@ -1076,6 +1104,10 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
     }
     e->ino = inode->fuse_ino;
 
+    if (lo_should_enable_dax(lo, e)) {
+        e->attr_flags |= FUSE_ATTR_DAX;
+    }
+
     /* Transfer ownership of inode pointer to caller or drop it */
     if (inodep) {
         *inodep = inode;
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Virtio-fs] [virtiofsd PATCH v6 6/6] virtiofsd: implement persistent inode attribute based dax policy
  2021-10-11  3:09 [Virtio-fs] [virtiofsd PATCH v6 0/6] virtiofsd: support per-file DAX Jeffle Xu
                   ` (4 preceding siblings ...)
  2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 5/6] virtiofsd: implement file size based dax policy Jeffle Xu
@ 2021-10-11  3:09 ` Jeffle Xu
  2021-10-20 17:55   ` Vivek Goyal
  5 siblings, 1 reply; 14+ messages in thread
From: Jeffle Xu @ 2021-10-11  3:09 UTC (permalink / raw)
  To: vgoyal, stefanha, miklos; +Cc: virtio-fs, joseph.qi

The per-file DAX feature in ext4/xfs uses the persistent inode
attribute, i.e. FS_DAX_FL/FS_XFLAG_DAX, to indicate if DAX shall be
enable for this file or not when filesystem is mounted in per-file DAX
mode.

To keep compatible with this feature in ext4/xfs, virtiofs also supports
enabling DAX depending on the persistent inode attribute, which may be
set/cleared by users inside guest or admin on host.

This policy is used when '-o dax=attr' option is specified for
virtiofsd.

Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
---
 tools/virtiofsd/passthrough_ll.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index a5a5061906..6f75cd6d92 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -1000,7 +1000,7 @@ static int do_statx(struct lo_data *lo, int dirfd, const char *pathname,
     return 0;
 }
 
-static bool lo_should_enable_dax(struct lo_data *lo,
+static bool lo_should_enable_dax(struct lo_data *lo, struct lo_inode *inode,
                                  struct fuse_entry_param *e)
 {
     if (lo->dax == DAX_NONE || !S_ISREG(e->attr.st_mode)) {
@@ -1011,6 +1011,24 @@ static bool lo_should_enable_dax(struct lo_data *lo,
         return e->attr.st_size > FUSE_PERFILE_DAX_POINT;
     }
 
+    if (lo->dax & DAX_ATTR) {
+        int res, fd;
+        int ret = false;;
+        unsigned int attr;
+
+        fd = lo_inode_open(lo, inode, O_RDONLY);
+        if (fd == -1) {
+            return false;
+        }
+
+        res = ioctl(fd, FS_IOC_GETFLAGS, &attr);
+        if (!res && (attr & FS_DAX_FL))
+            ret = true;
+
+        close(fd);
+        return ret;
+    }
+
     return false;
 }
 
@@ -1104,7 +1122,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
     }
     e->ino = inode->fuse_ino;
 
-    if (lo_should_enable_dax(lo, e)) {
+    if (lo_should_enable_dax(lo, inode, e)) {
         e->attr_flags |= FUSE_ATTR_DAX;
     }
 
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Virtio-fs] [virtiofsd PATCH v7 1/6] virtiofsd: add .ioctl() support
  2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 1/6] virtiofsd: add .ioctl() support Jeffle Xu
@ 2021-10-15  2:49   ` Jeffle Xu
  2021-10-20 17:36   ` [Virtio-fs] [virtiofsd PATCH v6 " Vivek Goyal
  1 sibling, 0 replies; 14+ messages in thread
From: Jeffle Xu @ 2021-10-15  2:49 UTC (permalink / raw)
  To: vgoyal, stefanha, miklos; +Cc: virtio-fs, joseph.qi

changes since v6:
- correct the logic of limiting ioctls other than FS_IOC_[G|S]ETFLAGS
  and FS_IOC_FS[G|S]ETXATTR
- add missing free() corresponding to malloc()

For passthrough, it passes corresponding ioctls to host directly.

Currently only these ioctls that handling persistent inode flags, i.e.,
FS_IOC_[G|S]ETFLAGS and FS_IOC_FS[G|S]ETXATTR are supported for security
concern, though it's implemented in a quite general way, so that we can
expand the scope of allowed ioctls if it is really needed later.

Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
---
 tools/virtiofsd/passthrough_ll.c      | 61 +++++++++++++++++++++++++++
 tools/virtiofsd/passthrough_seccomp.c |  1 +
 2 files changed, 62 insertions(+)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index b76d878509..c1cb54c31a 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -47,6 +47,7 @@
 #include <dirent.h>
 #include <pthread.h>
 #include <sys/file.h>
+#include <sys/ioctl.h>
 #include <sys/mount.h>
 #include <sys/prctl.h>
 #include <sys/resource.h>
@@ -54,6 +55,7 @@
 #include <sys/wait.h>
 #include <sys/xattr.h>
 #include <syslog.h>
+#include <linux/fs.h>
 
 #include "qemu/cutils.h"
 #include "passthrough_helpers.h"
@@ -2105,6 +2107,64 @@ out:
     fuse_reply_err(req, saverr);
 }
 
+
+static void lo_ioctl(fuse_req_t req, fuse_ino_t ino, unsigned int cmd, void *arg,
+                          struct fuse_file_info *fi, unsigned flags, const void *in_buf,
+                          size_t in_bufsz, size_t out_bufsz)
+{
+    int res, dir;
+    int fd = lo_fi_fd(req, fi);
+    int saverr = ENOSYS;
+    void *buf = NULL;
+    size_t size = 0;
+
+    fuse_log(FUSE_LOG_DEBUG, "lo_ioctl(ino=%" PRIu64 ", cmd=0x%x, flags=0x%x, "
+            "in_bufsz = %lu, out_bufsz = %lu)\n",
+            ino, cmd, flags, in_bufsz, out_bufsz);
+
+    if (!(cmd == FS_IOC_SETFLAGS || cmd == FS_IOC_GETFLAGS ||
+          cmd == FS_IOC_FSSETXATTR || cmd == FS_IOC_FSGETXATTR))
+        goto out;
+
+    /* unrestricted ioctl is not supported yet */
+    if (flags & FUSE_IOCTL_UNRESTRICTED)
+        goto out;
+
+    dir = _IOC_DIR(cmd);
+
+    if (dir & _IOC_READ) {
+        size = out_bufsz;
+        buf = malloc(size);
+        if (!buf)
+            goto out_err;
+
+        if (dir & _IOC_WRITE)
+            memcpy(buf, in_buf, size);
+
+        res = ioctl(fd, cmd, buf);
+    }
+    else if (dir & _IOC_WRITE) {
+        res = ioctl(fd, cmd, in_buf);
+    }
+    else {
+        res = ioctl(fd, cmd, arg);
+    }
+
+    if (res < 0)
+        goto out_err;
+
+    fuse_reply_ioctl(req, 0, buf, size);
+    free(buf);
+
+    return;
+
+out_err:
+    saverr = errno;
+out:
+    fuse_reply_err(req, saverr);
+    free(buf);
+}
+
 static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync,
                         struct fuse_file_info *fi)
 {
@@ -3278,6 +3338,7 @@ static struct fuse_lowlevel_ops lo_oper = {
     .fsyncdir = lo_fsyncdir,
     .create = lo_create,
     .getlk = lo_getlk,
+    .ioctl = lo_ioctl,
     .setlk = lo_setlk,
     .open = lo_open,
     .release = lo_release,
diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c
index 62441cfcdb..2a5f7614fc 100644
--- a/tools/virtiofsd/passthrough_seccomp.c
+++ b/tools/virtiofsd/passthrough_seccomp.c
@@ -62,6 +62,7 @@ static const int syscall_allowlist[] = {
     SCMP_SYS(gettid),
     SCMP_SYS(gettimeofday),
     SCMP_SYS(getxattr),
+    SCMP_SYS(ioctl),
     SCMP_SYS(linkat),
     SCMP_SYS(listxattr),
     SCMP_SYS(lseek),
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [Virtio-fs] [virtiofsd PATCH v6 1/6] virtiofsd: add .ioctl() support
  2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 1/6] virtiofsd: add .ioctl() support Jeffle Xu
  2021-10-15  2:49   ` [Virtio-fs] [virtiofsd PATCH v7 " Jeffle Xu
@ 2021-10-20 17:36   ` Vivek Goyal
  2021-10-21  2:32     ` JeffleXu
  1 sibling, 1 reply; 14+ messages in thread
From: Vivek Goyal @ 2021-10-20 17:36 UTC (permalink / raw)
  To: Jeffle Xu; +Cc: virtio-fs, joseph.qi, miklos

On Mon, Oct 11, 2021 at 11:09:33AM +0800, Jeffle Xu wrote:
> For passthrough, it passes corresponding ioctls to host directly.
> 
> Currently only these ioctls that handling persistent inode flags, i.e.,
> FS_IOC_[G|S]ETFLAGS and FS_IOC_FS[G|S]ETXATTR are supported for security
> concern, though it's implemented in a quite general way, so that we can
> expand the scope of allowed ioctls if it is really needed later.
> 
> Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
> ---
>  tools/virtiofsd/passthrough_ll.c      | 59 +++++++++++++++++++++++++++
>  tools/virtiofsd/passthrough_seccomp.c |  1 +
>  2 files changed, 60 insertions(+)
> 
> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> index b76d878509..5969d67810 100644
> --- a/tools/virtiofsd/passthrough_ll.c
> +++ b/tools/virtiofsd/passthrough_ll.c
> @@ -47,6 +47,7 @@
>  #include <dirent.h>
>  #include <pthread.h>
>  #include <sys/file.h>
> +#include <sys/ioctl.h>
>  #include <sys/mount.h>
>  #include <sys/prctl.h>
>  #include <sys/resource.h>
> @@ -54,6 +55,7 @@
>  #include <sys/wait.h>
>  #include <sys/xattr.h>
>  #include <syslog.h>
> +#include <linux/fs.h>
>  
>  #include "qemu/cutils.h"
>  #include "passthrough_helpers.h"
> @@ -2105,6 +2107,62 @@ out:
>      fuse_reply_err(req, saverr);
>  }
>  
> +
> +static void lo_ioctl(fuse_req_t req, fuse_ino_t ino, unsigned int cmd, void *arg,
> +                          struct fuse_file_info *fi, unsigned flags, const void *in_buf,
> +                          size_t in_bufsz, size_t out_bufsz)
> +{
> +    int fd = lo_fi_fd(req, fi);
> +    int res;
> +    int saverr = ENOSYS;
> +    int dir;
> +    void *buf = NULL;
> +    size_t size = 0;
> +
> +    fuse_log(FUSE_LOG_DEBUG, "lo_ioctl(ino=%" PRIu64 ", cmd=0x%x, flags=0x%x, "
> +            "in_bufsz = %lu, out_bufsz = %lu)\n",
> +            ino, cmd, flags, in_bufsz, out_bufsz);
> +
> +    if (cmd != FS_IOC_SETFLAGS || cmd != FS_IOC_GETFLAGS ||
> +        cmd != FS_IOC_FSSETXATTR || cmd != FS_IOC_FSGETXATTR)
> +        goto out;
> +
> +    /* unrestricted ioctl is not supported yet */
> +    if (flags & FUSE_IOCTL_UNRESTRICTED)
> +        goto out;
> +
> +    dir = _IOC_DIR(cmd);
> +
> +    if (dir & _IOC_READ) {
> +        size = out_bufsz;
> +        buf = malloc(size);
> +        if (!buf)
> +            goto out_err;
> +
> +        if (dir & _IOC_WRITE)
> +            memcpy(buf, in_buf, size);
> +
> +        res = ioctl(fd, cmd, buf);
> +    }
> +    else if (dir & _IOC_WRITE) {

Typically else is on same line as } braces.

} else if (dir & _IOC_WRITE) {

Have you tried running qemu/script/checkpatch.pl on these patches. Does it
flag it?

Vivek

> +        res = ioctl(fd, cmd, in_buf);
> +    }
> +    else {
> +        res = ioctl(fd, cmd, arg);
> +    }
> +
> +    if (res < 0)
> +        goto out_err;
> +
> +    fuse_reply_ioctl(req, 0, buf, size);
> +    return;
> +
> +out_err:
> +    saverr = errno;
> +out:
> +    fuse_reply_err(req, saverr);
> +}
> +
>  static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync,
>                          struct fuse_file_info *fi)
>  {
> @@ -3278,6 +3336,7 @@ static struct fuse_lowlevel_ops lo_oper = {
>      .fsyncdir = lo_fsyncdir,
>      .create = lo_create,
>      .getlk = lo_getlk,
> +    .ioctl = lo_ioctl,
>      .setlk = lo_setlk,
>      .open = lo_open,
>      .release = lo_release,
> diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c
> index 62441cfcdb..2a5f7614fc 100644
> --- a/tools/virtiofsd/passthrough_seccomp.c
> +++ b/tools/virtiofsd/passthrough_seccomp.c
> @@ -62,6 +62,7 @@ static const int syscall_allowlist[] = {
>      SCMP_SYS(gettid),
>      SCMP_SYS(gettimeofday),
>      SCMP_SYS(getxattr),
> +    SCMP_SYS(ioctl),
>      SCMP_SYS(linkat),
>      SCMP_SYS(listxattr),
>      SCMP_SYS(lseek),
> -- 
> 2.27.0
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Virtio-fs] [virtiofsd PATCH v6 5/6] virtiofsd: implement file size based dax policy
  2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 5/6] virtiofsd: implement file size based dax policy Jeffle Xu
@ 2021-10-20 17:54   ` Vivek Goyal
  2021-11-01  8:59     ` JeffleXu
  0 siblings, 1 reply; 14+ messages in thread
From: Vivek Goyal @ 2021-10-20 17:54 UTC (permalink / raw)
  To: Jeffle Xu; +Cc: virtio-fs, joseph.qi, miklos

On Mon, Oct 11, 2021 at 11:09:37AM +0800, Jeffle Xu wrote:
> When DAX window is fully utilized and needs to be expanded to avoid the
> performance fluctuation triggered by DAX window recaliming, it may not
> be wise to allocate DAX window for files with size smaller than some
> specific point, considering from the perspective of reducing memory
> overhead.
> 
> To maintain one DAX window chunk (e.g., 2MB in size), 32KB
> (512 * 64 bytes) memory footprint will be consumed for page descriptors
> inside guest. Thus it'd better disable DAX for those files smaller than
> 32KB, to reduce the demand for DAX window and thus avoid the unworthy
> memory overhead.
> 
> Thus only flag the file with FUSE_ATTR_DAX when the file size is greater
> than 32 KB. The guest will enable DAX only for those files flagged with
> FUSE_ATTR_DAX, when virtiofs is mounted with '-o dax=inode'.
> 
> To be noted that both FUSE_LOOKUP and FUSE_READDIRPLUS are affected, and
> will convey FUSE_ATTR_DAX flag to the guest.
> 
> This policy will be used when '-o dax=server' option is specified for
> virtiofsd.

So enabling DAX based on size is just one type of policy. Tomorrow one
could think of enabling DAX on some other attribute. So only enable it
for non-executable files etc. 

We could be more specific and define a policy say "-o dax=filesize". This
will allow defining other policies easily later. 

But problem with this option is that defining more complex policies
will be hard. Say I want to enable DAX on non-executable files which
have size greater than 32K. Being able to combine multiple policies
will be easy if we implement separate options/knobs to control them
and that will allow turning on multiple policies together.

Say, -o dax_size_threshold=1MB -o dax_no_exec_file

So I guess a generic "-o dax=server" is not a bad idea. As of now this
will implement a default size based policy of 32K size. But one can add
options later to tweak this.

Vivek

> 
> Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
> ---
>  tools/virtiofsd/passthrough_ll.c | 32 ++++++++++++++++++++++++++++++++
>  1 file changed, 32 insertions(+)
> 
> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> index b4de86e317..a5a5061906 100644
> --- a/tools/virtiofsd/passthrough_ll.c
> +++ b/tools/virtiofsd/passthrough_ll.c
> @@ -54,6 +54,7 @@
>  #include <sys/syscall.h>
>  #include <sys/wait.h>
>  #include <sys/xattr.h>
> +#include <sys/user.h>
>  #include <syslog.h>
>  #include <linux/fs.h>
>  
> @@ -61,6 +62,19 @@
>  #include "passthrough_helpers.h"
>  #include "passthrough_seccomp.h"
>  
> +/*
> + * One page descriptor (64 bytes in size) needs to be maintained for every page
> + * in the DAX window chunk, i.e., there is certain guest memory overhead when
> + * DAX is enabled. Thus disable DAX for those files smaller than this certain
> + * memory overhead if virtiofs is mounted in per-file DAX mode, in which case
> + * the guest page cache will consume less memory when DAX is disabled.
> + */
> +#define FUSE_DAX_SHIFT	21
> +#define PAGE_DESC_SHIFT	6
> +#define FUSE_PERFILE_DAX_SHIFT \
> +    (FUSE_DAX_SHIFT - PAGE_SHIFT + PAGE_DESC_SHIFT)
> +#define FUSE_PERFILE_DAX_POINT	(1 << FUSE_PERFILE_DAX_SHIFT)
> +
>  /* Keep track of inode posix locks for each owner. */
>  struct lo_inode_plock {
>      uint64_t lock_owner;
> @@ -986,6 +1000,20 @@ static int do_statx(struct lo_data *lo, int dirfd, const char *pathname,
>      return 0;
>  }
>  
> +static bool lo_should_enable_dax(struct lo_data *lo,
> +                                 struct fuse_entry_param *e)
> +{
> +    if (lo->dax == DAX_NONE || !S_ISREG(e->attr.st_mode)) {
> +        return false;
> +    }
> +
> +    if (lo->dax & DAX_SERVER) {
> +        return e->attr.st_size > FUSE_PERFILE_DAX_POINT;
> +    }
> +
> +    return false;
> +}
> +
>  /*
>   * Increments nlookup on the inode on success. unref_inode_lolocked() must be
>   * called eventually to decrement nlookup again. If inodep is non-NULL, the
> @@ -1076,6 +1104,10 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
>      }
>      e->ino = inode->fuse_ino;
>  
> +    if (lo_should_enable_dax(lo, e)) {
> +        e->attr_flags |= FUSE_ATTR_DAX;
> +    }
> +
>      /* Transfer ownership of inode pointer to caller or drop it */
>      if (inodep) {
>          *inodep = inode;
> -- 
> 2.27.0
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Virtio-fs] [virtiofsd PATCH v6 6/6] virtiofsd: implement persistent inode attribute based dax policy
  2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 6/6] virtiofsd: implement persistent inode attribute " Jeffle Xu
@ 2021-10-20 17:55   ` Vivek Goyal
  0 siblings, 0 replies; 14+ messages in thread
From: Vivek Goyal @ 2021-10-20 17:55 UTC (permalink / raw)
  To: Jeffle Xu; +Cc: virtio-fs, joseph.qi, miklos

On Mon, Oct 11, 2021 at 11:09:38AM +0800, Jeffle Xu wrote:
> The per-file DAX feature in ext4/xfs uses the persistent inode
> attribute, i.e. FS_DAX_FL/FS_XFLAG_DAX, to indicate if DAX shall be
> enable for this file or not when filesystem is mounted in per-file DAX
> mode.
> 
> To keep compatible with this feature in ext4/xfs, virtiofs also supports
> enabling DAX depending on the persistent inode attribute, which may be
> set/cleared by users inside guest or admin on host.
> 
> This policy is used when '-o dax=attr' option is specified for
> virtiofsd.

I guess keep this patch higher up in the patch series as we want to make
sure this behavior works first. And then implement server specific policy.
Just a minor nit. 

Once you post new patch series with kernel changes, I will spend some time
testing these patches.

Vivek
> 
> Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
> ---
>  tools/virtiofsd/passthrough_ll.c | 22 ++++++++++++++++++++--
>  1 file changed, 20 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> index a5a5061906..6f75cd6d92 100644
> --- a/tools/virtiofsd/passthrough_ll.c
> +++ b/tools/virtiofsd/passthrough_ll.c
> @@ -1000,7 +1000,7 @@ static int do_statx(struct lo_data *lo, int dirfd, const char *pathname,
>      return 0;
>  }
>  
> -static bool lo_should_enable_dax(struct lo_data *lo,
> +static bool lo_should_enable_dax(struct lo_data *lo, struct lo_inode *inode,
>                                   struct fuse_entry_param *e)
>  {
>      if (lo->dax == DAX_NONE || !S_ISREG(e->attr.st_mode)) {
> @@ -1011,6 +1011,24 @@ static bool lo_should_enable_dax(struct lo_data *lo,
>          return e->attr.st_size > FUSE_PERFILE_DAX_POINT;
>      }
>  
> +    if (lo->dax & DAX_ATTR) {
> +        int res, fd;
> +        int ret = false;;
> +        unsigned int attr;
> +
> +        fd = lo_inode_open(lo, inode, O_RDONLY);
> +        if (fd == -1) {
> +            return false;
> +        }
> +
> +        res = ioctl(fd, FS_IOC_GETFLAGS, &attr);
> +        if (!res && (attr & FS_DAX_FL))
> +            ret = true;
> +
> +        close(fd);
> +        return ret;
> +    }
> +
>      return false;
>  }
>  
> @@ -1104,7 +1122,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
>      }
>      e->ino = inode->fuse_ino;
>  
> -    if (lo_should_enable_dax(lo, e)) {
> +    if (lo_should_enable_dax(lo, inode, e)) {
>          e->attr_flags |= FUSE_ATTR_DAX;
>      }
>  
> -- 
> 2.27.0
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Virtio-fs] [virtiofsd PATCH v6 1/6] virtiofsd: add .ioctl() support
  2021-10-20 17:36   ` [Virtio-fs] [virtiofsd PATCH v6 " Vivek Goyal
@ 2021-10-21  2:32     ` JeffleXu
  0 siblings, 0 replies; 14+ messages in thread
From: JeffleXu @ 2021-10-21  2:32 UTC (permalink / raw)
  To: Vivek Goyal; +Cc: virtio-fs, joseph.qi, miklos



On 10/21/21 1:36 AM, Vivek Goyal wrote:
> On Mon, Oct 11, 2021 at 11:09:33AM +0800, Jeffle Xu wrote:
>> For passthrough, it passes corresponding ioctls to host directly.
>>
>> Currently only these ioctls that handling persistent inode flags, i.e.,
>> FS_IOC_[G|S]ETFLAGS and FS_IOC_FS[G|S]ETXATTR are supported for security
>> concern, though it's implemented in a quite general way, so that we can
>> expand the scope of allowed ioctls if it is really needed later.
>>
>> Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
>> ---
>>  tools/virtiofsd/passthrough_ll.c      | 59 +++++++++++++++++++++++++++
>>  tools/virtiofsd/passthrough_seccomp.c |  1 +
>>  2 files changed, 60 insertions(+)
>>
>> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
>> index b76d878509..5969d67810 100644
>> --- a/tools/virtiofsd/passthrough_ll.c
>> +++ b/tools/virtiofsd/passthrough_ll.c
>> @@ -47,6 +47,7 @@
>>  #include <dirent.h>
>>  #include <pthread.h>
>>  #include <sys/file.h>
>> +#include <sys/ioctl.h>
>>  #include <sys/mount.h>
>>  #include <sys/prctl.h>
>>  #include <sys/resource.h>
>> @@ -54,6 +55,7 @@
>>  #include <sys/wait.h>
>>  #include <sys/xattr.h>
>>  #include <syslog.h>
>> +#include <linux/fs.h>
>>  
>>  #include "qemu/cutils.h"
>>  #include "passthrough_helpers.h"
>> @@ -2105,6 +2107,62 @@ out:
>>      fuse_reply_err(req, saverr);
>>  }
>>  
>> +
>> +static void lo_ioctl(fuse_req_t req, fuse_ino_t ino, unsigned int cmd, void *arg,
>> +                          struct fuse_file_info *fi, unsigned flags, const void *in_buf,
>> +                          size_t in_bufsz, size_t out_bufsz)
>> +{
>> +    int fd = lo_fi_fd(req, fi);
>> +    int res;
>> +    int saverr = ENOSYS;
>> +    int dir;
>> +    void *buf = NULL;
>> +    size_t size = 0;
>> +
>> +    fuse_log(FUSE_LOG_DEBUG, "lo_ioctl(ino=%" PRIu64 ", cmd=0x%x, flags=0x%x, "
>> +            "in_bufsz = %lu, out_bufsz = %lu)\n",
>> +            ino, cmd, flags, in_bufsz, out_bufsz);
>> +
>> +    if (cmd != FS_IOC_SETFLAGS || cmd != FS_IOC_GETFLAGS ||
>> +        cmd != FS_IOC_FSSETXATTR || cmd != FS_IOC_FSGETXATTR)
>> +        goto out;
>> +
>> +    /* unrestricted ioctl is not supported yet */
>> +    if (flags & FUSE_IOCTL_UNRESTRICTED)
>> +        goto out;
>> +
>> +    dir = _IOC_DIR(cmd);
>> +
>> +    if (dir & _IOC_READ) {
>> +        size = out_bufsz;
>> +        buf = malloc(size);
>> +        if (!buf)
>> +            goto out_err;
>> +
>> +        if (dir & _IOC_WRITE)
>> +            memcpy(buf, in_buf, size);
>> +
>> +        res = ioctl(fd, cmd, buf);
>> +    }
>> +    else if (dir & _IOC_WRITE) {
> 
> Typically else is on same line as } braces.
> 
> } else if (dir & _IOC_WRITE) {
> 
> Have you tried running qemu/script/checkpatch.pl on these patches. Does it
> flag it?

Sorry, not yet. I have no experience contributing to qemu before... I
will check it next time and send a new version later.


-- 
Thanks,
Jeffle


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Virtio-fs] [virtiofsd PATCH v6 5/6] virtiofsd: implement file size based dax policy
  2021-10-20 17:54   ` Vivek Goyal
@ 2021-11-01  8:59     ` JeffleXu
  2021-11-01 13:18       ` Vivek Goyal
  0 siblings, 1 reply; 14+ messages in thread
From: JeffleXu @ 2021-11-01  8:59 UTC (permalink / raw)
  To: Vivek Goyal; +Cc: virtio-fs, joseph.qi, miklos



On 10/21/21 1:54 AM, Vivek Goyal wrote:
> On Mon, Oct 11, 2021 at 11:09:37AM +0800, Jeffle Xu wrote:
>> When DAX window is fully utilized and needs to be expanded to avoid the
>> performance fluctuation triggered by DAX window recaliming, it may not
>> be wise to allocate DAX window for files with size smaller than some
>> specific point, considering from the perspective of reducing memory
>> overhead.
>>
>> To maintain one DAX window chunk (e.g., 2MB in size), 32KB
>> (512 * 64 bytes) memory footprint will be consumed for page descriptors
>> inside guest. Thus it'd better disable DAX for those files smaller than
>> 32KB, to reduce the demand for DAX window and thus avoid the unworthy
>> memory overhead.
>>
>> Thus only flag the file with FUSE_ATTR_DAX when the file size is greater
>> than 32 KB. The guest will enable DAX only for those files flagged with
>> FUSE_ATTR_DAX, when virtiofs is mounted with '-o dax=inode'.
>>
>> To be noted that both FUSE_LOOKUP and FUSE_READDIRPLUS are affected, and
>> will convey FUSE_ATTR_DAX flag to the guest.
>>
>> This policy will be used when '-o dax=server' option is specified for
>> virtiofsd.
> 
> So enabling DAX based on size is just one type of policy. Tomorrow one
> could think of enabling DAX on some other attribute. So only enable it
> for non-executable files etc. 
> 
> We could be more specific and define a policy say "-o dax=filesize". This
> will allow defining other policies easily later. 
> 
> But problem with this option is that defining more complex policies
> will be hard. Say I want to enable DAX on non-executable files which
> have size greater than 32K. Being able to combine multiple policies
> will be easy if we implement separate options/knobs to control them
> and that will allow turning on multiple policies together.
> 
> Say, -o dax_size_threshold=1MB -o dax_no_exec_file
> 
> So I guess a generic "-o dax=server" is not a bad idea. As of now this
> will implement a default size based policy of 32K size. But one can add
> options later to tweak this.
> 

The reason why I originally use the 'dax=server' name is that, I want to
emphasize that it's a heuristic strategy implemented by virtiofsd
itself. But now I realized that the input of virtiofsd is all various
kinds of file attributes, e.g. filesize, persistent inode flags, file
type, etc, and so "dax=attr" name is also a vague name in this degree.

Now I prefer "dax=filesize" and "dax=xflag", which shall be more clear
to users at the first glance. The former allows DAX for those smaller
than 32KB, while the latter queries the persistent DAX inode flag and
allows DAX accordingly.

w.r.t. the complex policy you described, e.g. 'non-executable files
which have size greater than 32K', indeed we may need more option knobs
then, maybe "dax=filesize|xflag" need to be transformed to
"dax_filesize" and "dax_xflag"? But one problem of this multiple option
knob design is that, it may be meaningless to specify "dax_filesize" and
"dax_xflag" at the same time.

In a summary, personally I prefer "dax=filesize" and "dax=xflag" so far,
and we can tweak this, e.g. expand to multiple option knobs, if it's
really needed later.


-- 
Thanks,
Jeffle


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Virtio-fs] [virtiofsd PATCH v6 5/6] virtiofsd: implement file size based dax policy
  2021-11-01  8:59     ` JeffleXu
@ 2021-11-01 13:18       ` Vivek Goyal
  0 siblings, 0 replies; 14+ messages in thread
From: Vivek Goyal @ 2021-11-01 13:18 UTC (permalink / raw)
  To: JeffleXu; +Cc: virtio-fs, joseph.qi, miklos

On Mon, Nov 01, 2021 at 04:59:20PM +0800, JeffleXu wrote:
> 
> 
> On 10/21/21 1:54 AM, Vivek Goyal wrote:
> > On Mon, Oct 11, 2021 at 11:09:37AM +0800, Jeffle Xu wrote:
> >> When DAX window is fully utilized and needs to be expanded to avoid the
> >> performance fluctuation triggered by DAX window recaliming, it may not
> >> be wise to allocate DAX window for files with size smaller than some
> >> specific point, considering from the perspective of reducing memory
> >> overhead.
> >>
> >> To maintain one DAX window chunk (e.g., 2MB in size), 32KB
> >> (512 * 64 bytes) memory footprint will be consumed for page descriptors
> >> inside guest. Thus it'd better disable DAX for those files smaller than
> >> 32KB, to reduce the demand for DAX window and thus avoid the unworthy
> >> memory overhead.
> >>
> >> Thus only flag the file with FUSE_ATTR_DAX when the file size is greater
> >> than 32 KB. The guest will enable DAX only for those files flagged with
> >> FUSE_ATTR_DAX, when virtiofs is mounted with '-o dax=inode'.
> >>
> >> To be noted that both FUSE_LOOKUP and FUSE_READDIRPLUS are affected, and
> >> will convey FUSE_ATTR_DAX flag to the guest.
> >>
> >> This policy will be used when '-o dax=server' option is specified for
> >> virtiofsd.
> > 
> > So enabling DAX based on size is just one type of policy. Tomorrow one
> > could think of enabling DAX on some other attribute. So only enable it
> > for non-executable files etc. 
> > 
> > We could be more specific and define a policy say "-o dax=filesize". This
> > will allow defining other policies easily later. 
> > 
> > But problem with this option is that defining more complex policies
> > will be hard. Say I want to enable DAX on non-executable files which
> > have size greater than 32K. Being able to combine multiple policies
> > will be easy if we implement separate options/knobs to control them
> > and that will allow turning on multiple policies together.
> > 
> > Say, -o dax_size_threshold=1MB -o dax_no_exec_file
> > 
> > So I guess a generic "-o dax=server" is not a bad idea. As of now this
> > will implement a default size based policy of 32K size. But one can add
> > options later to tweak this.
> > 
> 
> The reason why I originally use the 'dax=server' name is that, I want to
> emphasize that it's a heuristic strategy implemented by virtiofsd
> itself. But now I realized that the input of virtiofsd is all various
> kinds of file attributes, e.g. filesize, persistent inode flags, file
> type, etc, and so "dax=attr" name is also a vague name in this degree.
> 
> Now I prefer "dax=filesize" and "dax=xflag", which shall be more clear
> to users at the first glance. The former allows DAX for those smaller
> than 32KB, while the latter queries the persistent DAX inode flag and
> allows DAX accordingly.
> 
> w.r.t. the complex policy you described, e.g. 'non-executable files
> which have size greater than 32K', indeed we may need more option knobs
> then, maybe "dax=filesize|xflag" need to be transformed to
> "dax_filesize" and "dax_xflag"? But one problem of this multiple option
> knob design is that, it may be meaningless to specify "dax_filesize" and
> "dax_xflag" at the same time.
> 
> In a summary, personally I prefer "dax=filesize" and "dax=xflag" so far,
> and we can tweak this, e.g. expand to multiple option knobs, if it's
> really needed later.

Agreed that more complex policy knobs can come later. Say one wants
to enable dax on every file except file smaller than 32K and on executable
files. Then we could probaby extend syntax to "dax=filesize,no_exec".

I like "dax=filesize". Little concerned about "dax=xflag" as xflag is
pretty generic for all kind of flags. How about "dax=inode" instead. This
is also generic, but atleast matches kernel mount option. Alternatively,
how about "dax=xflag_dax".

May be "dax=inode" will be more intuitive as kernel already has defined
semantics for this.

Thanks
Vivek


^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2021-11-01 13:18 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-11  3:09 [Virtio-fs] [virtiofsd PATCH v6 0/6] virtiofsd: support per-file DAX Jeffle Xu
2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 1/6] virtiofsd: add .ioctl() support Jeffle Xu
2021-10-15  2:49   ` [Virtio-fs] [virtiofsd PATCH v7 " Jeffle Xu
2021-10-20 17:36   ` [Virtio-fs] [virtiofsd PATCH v6 " Vivek Goyal
2021-10-21  2:32     ` JeffleXu
2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 2/6] virtiofsd: support per-file DAX in fuse protocol Jeffle Xu
2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 3/6] virtiofsd: negotiate per-file DAX in FUSE_INIT Jeffle Xu
2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 4/6] virtiofsd: add 'dax=' option Jeffle Xu
2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 5/6] virtiofsd: implement file size based dax policy Jeffle Xu
2021-10-20 17:54   ` Vivek Goyal
2021-11-01  8:59     ` JeffleXu
2021-11-01 13:18       ` Vivek Goyal
2021-10-11  3:09 ` [Virtio-fs] [virtiofsd PATCH v6 6/6] virtiofsd: implement persistent inode attribute " Jeffle Xu
2021-10-20 17:55   ` Vivek Goyal

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.