* [PATCH V8 1/3] fuse: Definitions and ioctl() for passthrough
2020-09-11 16:34 [PATCH V8 0/3] fuse: Add support for passthrough read/write Alessio Balsini
@ 2020-09-11 16:34 ` Alessio Balsini
2020-09-12 11:06 ` Amir Goldstein
2020-09-11 16:34 ` [PATCH V8 2/3] fuse: Introduce synchronous read and write " Alessio Balsini
` (2 subsequent siblings)
3 siblings, 1 reply; 17+ messages in thread
From: Alessio Balsini @ 2020-09-11 16:34 UTC (permalink / raw)
To: Miklos Szeredi
Cc: Akilesh Kailash, Amir Goldstein, David Anderson, Eric Yan,
Jann Horn, Jens Axboe, Martijn Coenen, Palmer Dabbelt,
Paul Lawrence, Stefano Duo, Zimuzo Ezeozue, fuse-devel,
kernel-team, linux-fsdevel, linux-kernel
Introduce the new FUSE passthrough ioctl(), which allows userspace to
specify a direct connection between a FUSE file and a lower file system
file.
Such ioctl() requires userspace to specify:
- the file descriptor of one of its opened files,
- the unique identifier of the FUSE request associated with a pending
open/create operation,
both encapsulated into a fuse_passthrough_out data structure.
The ioctl() will search for the pending FUSE request matching the unique
identifier, and update the passthrough file pointer of the request with the
file pointer referenced by the passed file descriptor.
When that pending FUSE request is handled, the passthrough file pointer
is copied to the fuse_file data structure, so that the link between FUSE
and lower file system is consolidated.
In order for the passthrough mode to be successfully activated, the lower
file system file must implement both read_ and write_iter file operations.
This extra check avoids special pseudofiles to be targets for this feature.
An additional enforced limitation is that when FUSE passthrough is enabled,
no further file system stacking is allowed.
Signed-off-by: Alessio Balsini <balsini@android.com>
---
fs/fuse/Makefile | 1 +
fs/fuse/dev.c | 57 +++++++++++++++++++++++++++++++++++----
fs/fuse/dir.c | 2 ++
fs/fuse/file.c | 17 +++++++++---
fs/fuse/fuse_i.h | 14 ++++++++++
fs/fuse/inode.c | 9 ++++++-
fs/fuse/passthrough.c | 55 +++++++++++++++++++++++++++++++++++++
include/uapi/linux/fuse.h | 12 ++++++++-
8 files changed, 156 insertions(+), 11 deletions(-)
create mode 100644 fs/fuse/passthrough.c
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 3e8cebfb59b7..6971454a2bdf 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -8,4 +8,5 @@ obj-$(CONFIG_CUSE) += cuse.o
obj-$(CONFIG_VIRTIO_FS) += virtiofs.o
fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o
+fuse-objs += passthrough.o
virtiofs-y += virtio_fs.o
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 02b3c36b3676..b0fbdbfd4fbd 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2219,21 +2219,53 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
return 0;
}
+int fuse_passthrough_open(struct fuse_dev *fud,
+ struct fuse_passthrough_out *pto)
+{
+ int ret;
+ struct fuse_req *req;
+ struct fuse_pqueue *fpq = &fud->pq;
+ struct fuse_conn *fc = fud->fc;
+
+ if (!fc->passthrough)
+ return -EPERM;
+
+ /* This field is reserved for future use */
+ if (pto->len != 0)
+ return -EINVAL;
+
+ spin_lock(&fpq->lock);
+ req = request_find(fpq, pto->unique & ~FUSE_INT_REQ_BIT);
+ if (!req) {
+ spin_unlock(&fpq->lock);
+ return -ENOENT;
+ }
+ __fuse_get_request(req);
+ spin_unlock(&fpq->lock);
+
+ ret = fuse_passthrough_setup(req, pto->fd);
+
+ __fuse_put_request(req);
+ return ret;
+}
+
static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
- int err = -ENOTTY;
-
- if (cmd == FUSE_DEV_IOC_CLONE) {
- int oldfd;
+ int err;
+ int oldfd;
+ struct fuse_dev *fud;
+ struct fuse_passthrough_out pto;
+ switch (cmd) {
+ case FUSE_DEV_IOC_CLONE:
err = -EFAULT;
if (!get_user(oldfd, (__u32 __user *) arg)) {
struct file *old = fget(oldfd);
err = -EINVAL;
if (old) {
- struct fuse_dev *fud = NULL;
+ fud = NULL;
/*
* Check against file->f_op because CUSE
@@ -2251,6 +2283,21 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
fput(old);
}
}
+ break;
+ case FUSE_DEV_IOC_PASSTHROUGH_OPEN:
+ err = -EFAULT;
+ if (!copy_from_user(&pto,
+ (struct fuse_passthrough_out __user *)arg,
+ sizeof(pto))) {
+ err = -EINVAL;
+ fud = fuse_get_dev(file);
+ if (fud)
+ err = fuse_passthrough_open(fud, &pto);
+ }
+ break;
+ default:
+ err = -ENOTTY;
+ break;
}
return err;
}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 26f028bc760b..531de0c5c9e8 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -477,6 +477,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
args.out_args[0].value = &outentry;
args.out_args[1].size = sizeof(outopen);
args.out_args[1].value = &outopen;
+ args.passthrough_filp = NULL;
err = fuse_simple_request(fc, &args);
if (err)
goto out_free_ff;
@@ -489,6 +490,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
ff->fh = outopen.fh;
ff->nodeid = outentry.nodeid;
ff->open_flags = outopen.open_flags;
+ ff->passthrough_filp = args.passthrough_filp;
inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
&outentry.attr, entry_attr_timeout(&outentry), 0);
if (!inode) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 83d917f7e542..6c0ec742ce74 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -33,10 +33,12 @@ static struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags,
}
static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
- int opcode, struct fuse_open_out *outargp)
+ int opcode, struct fuse_open_out *outargp,
+ struct file **passthrough_filp)
{
struct fuse_open_in inarg;
FUSE_ARGS(args);
+ int ret;
memset(&inarg, 0, sizeof(inarg));
inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
@@ -51,7 +53,10 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
args.out_args[0].size = sizeof(*outargp);
args.out_args[0].value = outargp;
- return fuse_simple_request(fc, &args);
+ ret = fuse_simple_request(fc, &args);
+ *passthrough_filp = args.passthrough_filp;
+
+ return ret;
}
struct fuse_release_args {
@@ -144,14 +149,16 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
/* Default for no-open */
ff->open_flags = FOPEN_KEEP_CACHE | (isdir ? FOPEN_CACHE_DIR : 0);
if (isdir ? !fc->no_opendir : !fc->no_open) {
+ struct file *passthrough_filp;
struct fuse_open_out outarg;
int err;
- err = fuse_send_open(fc, nodeid, file, opcode, &outarg);
+ err = fuse_send_open(fc, nodeid, file, opcode, &outarg,
+ &passthrough_filp);
if (!err) {
ff->fh = outarg.fh;
ff->open_flags = outarg.open_flags;
-
+ ff->passthrough_filp = passthrough_filp;
} else if (err != -ENOSYS) {
fuse_file_free(ff);
return err;
@@ -281,6 +288,8 @@ void fuse_release_common(struct file *file, bool isdir)
struct fuse_release_args *ra = ff->release_args;
int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
+ fuse_passthrough_release(ff);
+
fuse_prepare_release(fi, ff, file->f_flags, opcode);
if (ff->flock) {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 740a8a7d7ae6..6c5166447905 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -208,6 +208,12 @@ struct fuse_file {
} readdir;
+ /**
+ * Reference to lower filesystem file for read/write operations
+ * handled in pass-through mode
+ */
+ struct file *passthrough_filp;
+
/** RB node to be linked on fuse_conn->polled_files */
struct rb_node polled_node;
@@ -250,6 +256,8 @@ struct fuse_args {
bool page_zeroing:1;
bool page_replace:1;
bool may_block:1;
+ /** Lower filesystem file pointer used in pass-through mode */
+ struct file *passthrough_filp;
struct fuse_in_arg in_args[3];
struct fuse_arg out_args[2];
void (*end)(struct fuse_conn *fc, struct fuse_args *args, int error);
@@ -720,6 +728,9 @@ struct fuse_conn {
/* Do not show mount options */
unsigned int no_mount_options:1;
+ /** Pass-through mode for read/write IO */
+ unsigned int passthrough:1;
+
/** The number of requests waiting for completion */
atomic_t num_waiting;
@@ -1093,4 +1104,7 @@ unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args);
u64 fuse_get_unique(struct fuse_iqueue *fiq);
void fuse_free_conn(struct fuse_conn *fc);
+int fuse_passthrough_setup(struct fuse_req *req, unsigned int fd);
+void fuse_passthrough_release(struct fuse_file *ff);
+
#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index bba747520e9b..eb223130a917 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -965,6 +965,12 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args,
min_t(unsigned int, FUSE_MAX_MAX_PAGES,
max_t(unsigned int, arg->max_pages, 1));
}
+ if (arg->flags & FUSE_PASSTHROUGH) {
+ fc->passthrough = 1;
+ /* Prevent further stacking */
+ fc->sb->s_stack_depth =
+ FILESYSTEM_MAX_STACK_DEPTH;
+ }
} else {
ra_pages = fc->max_read / PAGE_SIZE;
fc->no_lock = 1;
@@ -1002,7 +1008,8 @@ void fuse_send_init(struct fuse_conn *fc)
FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
- FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA;
+ FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
+ FUSE_PASSTHROUGH;
ia->args.opcode = FUSE_INIT;
ia->args.in_numargs = 1;
ia->args.in_args[0].size = sizeof(ia->in);
diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
new file mode 100644
index 000000000000..86ab4eafa7bf
--- /dev/null
+++ b/fs/fuse/passthrough.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "fuse_i.h"
+
+int fuse_passthrough_setup(struct fuse_req *req, unsigned int fd)
+{
+ int ret;
+ int fs_stack_depth;
+ struct file *passthrough_filp;
+ struct inode *passthrough_inode;
+ struct super_block *passthrough_sb;
+
+ /* Passthrough mode can only be enabled at file open/create time */
+ if (req->in.h.opcode != FUSE_OPEN && req->in.h.opcode != FUSE_CREATE) {
+ pr_err("FUSE: invalid OPCODE for request.\n");
+ return -EINVAL;
+ }
+
+ passthrough_filp = fget(fd);
+ if (!passthrough_filp) {
+ pr_err("FUSE: invalid file descriptor for passthrough.\n");
+ return -EINVAL;
+ }
+
+ ret = -EINVAL;
+ if (!passthrough_filp->f_op->read_iter ||
+ !passthrough_filp->f_op->write_iter) {
+ pr_err("FUSE: passthrough file misses file operations.\n");
+ goto out;
+ }
+
+ passthrough_inode = file_inode(passthrough_filp);
+ passthrough_sb = passthrough_inode->i_sb;
+ fs_stack_depth = passthrough_sb->s_stack_depth + 1;
+ ret = -EEXIST;
+ if (fs_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
+ pr_err("FUSE: maximum fs stacking depth exceeded for passthrough\n");
+ goto out;
+ }
+
+ req->args->passthrough_filp = passthrough_filp;
+ return 0;
+out:
+ fput(passthrough_filp);
+ return ret;
+}
+
+void fuse_passthrough_release(struct fuse_file *ff)
+{
+ if (!ff->passthrough_filp)
+ return;
+
+ fput(ff->passthrough_filp);
+ ff->passthrough_filp = NULL;
+}
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 373cada89815..0cd9fd83374a 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -342,6 +342,7 @@ struct fuse_file_lock {
#define FUSE_NO_OPENDIR_SUPPORT (1 << 24)
#define FUSE_EXPLICIT_INVAL_DATA (1 << 25)
#define FUSE_MAP_ALIGNMENT (1 << 26)
+#define FUSE_PASSTHROUGH (1 << 27)
/**
* CUSE INIT request/reply flags
@@ -794,6 +795,14 @@ struct fuse_in_header {
uint32_t padding;
};
+struct fuse_passthrough_out {
+ uint64_t unique;
+ uint32_t fd;
+ /* For future implementation */
+ uint32_t len;
+ void *vec;
+};
+
struct fuse_out_header {
uint32_t len;
int32_t error;
@@ -869,7 +878,8 @@ struct fuse_notify_retrieve_in {
};
/* Device ioctls: */
-#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t)
+#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t)
+#define FUSE_DEV_IOC_PASSTHROUGH_OPEN _IOW(229, 1, struct fuse_passthrough_out)
struct fuse_lseek_in {
uint64_t fh;
--
2.28.0.618.gf4bc123cb7-goog
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH V8 1/3] fuse: Definitions and ioctl() for passthrough
2020-09-11 16:34 ` [PATCH V8 1/3] fuse: Definitions and ioctl() for passthrough Alessio Balsini
@ 2020-09-12 11:06 ` Amir Goldstein
2020-09-18 16:33 ` Alessio Balsini
0 siblings, 1 reply; 17+ messages in thread
From: Amir Goldstein @ 2020-09-12 11:06 UTC (permalink / raw)
To: Alessio Balsini
Cc: Miklos Szeredi, Akilesh Kailash, David Anderson, Eric Yan,
Jann Horn, Jens Axboe, Martijn Coenen, Palmer Dabbelt,
Paul Lawrence, Stefano Duo, Zimuzo Ezeozue, fuse-devel,
kernel-team, linux-fsdevel, linux-kernel
On Fri, Sep 11, 2020 at 7:34 PM Alessio Balsini <balsini@android.com> wrote:
>
> Introduce the new FUSE passthrough ioctl(), which allows userspace to
> specify a direct connection between a FUSE file and a lower file system
> file.
> Such ioctl() requires userspace to specify:
> - the file descriptor of one of its opened files,
> - the unique identifier of the FUSE request associated with a pending
> open/create operation,
> both encapsulated into a fuse_passthrough_out data structure.
> The ioctl() will search for the pending FUSE request matching the unique
> identifier, and update the passthrough file pointer of the request with the
> file pointer referenced by the passed file descriptor.
> When that pending FUSE request is handled, the passthrough file pointer
> is copied to the fuse_file data structure, so that the link between FUSE
> and lower file system is consolidated.
>
> In order for the passthrough mode to be successfully activated, the lower
> file system file must implement both read_ and write_iter file operations.
> This extra check avoids special pseudofiles to be targets for this feature.
> An additional enforced limitation is that when FUSE passthrough is enabled,
> no further file system stacking is allowed.
>
> Signed-off-by: Alessio Balsini <balsini@android.com>
> ---
[...]
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index bba747520e9b..eb223130a917 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -965,6 +965,12 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args,
> min_t(unsigned int, FUSE_MAX_MAX_PAGES,
> max_t(unsigned int, arg->max_pages, 1));
> }
> + if (arg->flags & FUSE_PASSTHROUGH) {
> + fc->passthrough = 1;
> + /* Prevent further stacking */
> + fc->sb->s_stack_depth =
> + FILESYSTEM_MAX_STACK_DEPTH;
> + }
That seems a bit limiting.
I suppose what you really want to avoid is loops into FUSE fd.
There may be a way to do this with forbidding overlay over FUSE passthrough
or the other way around.
You can set fc->sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH - 1
here and in passthrough ioctl you can check for looping into a fuse fs with
passthrough enabled on the passed fd (see below) ...
> } else {
> ra_pages = fc->max_read / PAGE_SIZE;
> fc->no_lock = 1;
> @@ -1002,7 +1008,8 @@ void fuse_send_init(struct fuse_conn *fc)
> FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
> FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
> FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
> - FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA;
> + FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
> + FUSE_PASSTHROUGH;
> ia->args.opcode = FUSE_INIT;
> ia->args.in_numargs = 1;
> ia->args.in_args[0].size = sizeof(ia->in);
> diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
> new file mode 100644
> index 000000000000..86ab4eafa7bf
> --- /dev/null
> +++ b/fs/fuse/passthrough.c
> @@ -0,0 +1,55 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include "fuse_i.h"
> +
> +int fuse_passthrough_setup(struct fuse_req *req, unsigned int fd)
> +{
> + int ret;
> + int fs_stack_depth;
> + struct file *passthrough_filp;
> + struct inode *passthrough_inode;
> + struct super_block *passthrough_sb;
> +
> + /* Passthrough mode can only be enabled at file open/create time */
> + if (req->in.h.opcode != FUSE_OPEN && req->in.h.opcode != FUSE_CREATE) {
> + pr_err("FUSE: invalid OPCODE for request.\n");
> + return -EINVAL;
> + }
> +
> + passthrough_filp = fget(fd);
> + if (!passthrough_filp) {
> + pr_err("FUSE: invalid file descriptor for passthrough.\n");
> + return -EINVAL;
> + }
> +
> + ret = -EINVAL;
> + if (!passthrough_filp->f_op->read_iter ||
> + !passthrough_filp->f_op->write_iter) {
> + pr_err("FUSE: passthrough file misses file operations.\n");
> + goto out;
> + }
> +
> + passthrough_inode = file_inode(passthrough_filp);
> + passthrough_sb = passthrough_inode->i_sb;
> + fs_stack_depth = passthrough_sb->s_stack_depth + 1;
... for example:
if (fs_stack_depth && passthrough_sb->s_type == fuse_fs_type) {
pr_err("FUSE: stacked passthrough file\n");
goto out;
}
But maybe we want to ban passthrough to any lower FUSE at least for start.
> + ret = -EEXIST;
Why EEXIST? Why not EINVAL?
> + if (fs_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
> + pr_err("FUSE: maximum fs stacking depth exceeded for passthrough\n");
> + goto out;
> + }
> +
> + req->args->passthrough_filp = passthrough_filp;
> + return 0;
> +out:
> + fput(passthrough_filp);
> + return ret;
> +}
> +
And speaking of overlayfs, I believe you may be able to test your code with
fuse-overlayfs (passthrough to upper files).
This is a project with real users running real workloads who may be
able to provide you with valuable feedback from testing.
Thanks,
Amir.
[1] https://github.com/containers/fuse-overlayfs
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH V8 1/3] fuse: Definitions and ioctl() for passthrough
2020-09-12 11:06 ` Amir Goldstein
@ 2020-09-18 16:33 ` Alessio Balsini
2020-09-18 19:59 ` Amir Goldstein
0 siblings, 1 reply; 17+ messages in thread
From: Alessio Balsini @ 2020-09-18 16:33 UTC (permalink / raw)
To: Amir Goldstein
Cc: Alessio Balsini, Miklos Szeredi, Akilesh Kailash, David Anderson,
Eric Yan, Jann Horn, Jens Axboe, Martijn Coenen, Palmer Dabbelt,
Paul Lawrence, Stefano Duo, Zimuzo Ezeozue, fuse-devel,
kernel-team, linux-fsdevel, linux-kernel
Hi Amir,
Thanks again for your feedback.
On Sat, Sep 12, 2020 at 02:06:02PM +0300, Amir Goldstein wrote:
> On Fri, Sep 11, 2020 at 7:34 PM Alessio Balsini <balsini@android.com> wrote:
> [...]
> > diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> > index bba747520e9b..eb223130a917 100644
> > --- a/fs/fuse/inode.c
> > +++ b/fs/fuse/inode.c
> > @@ -965,6 +965,12 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args,
> > min_t(unsigned int, FUSE_MAX_MAX_PAGES,
> > max_t(unsigned int, arg->max_pages, 1));
> > }
> > + if (arg->flags & FUSE_PASSTHROUGH) {
> > + fc->passthrough = 1;
> > + /* Prevent further stacking */
> > + fc->sb->s_stack_depth =
> > + FILESYSTEM_MAX_STACK_DEPTH;
> > + }
>
> That seems a bit limiting.
> I suppose what you really want to avoid is loops into FUSE fd.
> There may be a way to do this with forbidding overlay over FUSE passthrough
> or the other way around.
>
> You can set fc->sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH - 1
> here and in passthrough ioctl you can check for looping into a fuse fs with
> passthrough enabled on the passed fd (see below) ...
>
> [...]
> > diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
> > new file mode 100644
> > index 000000000000..86ab4eafa7bf
> > --- /dev/null
> > +++ b/fs/fuse/passthrough.c
> > @@ -0,0 +1,55 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +
> > +#include "fuse_i.h"
> > +
> > +int fuse_passthrough_setup(struct fuse_req *req, unsigned int fd)
> > +{
> > + int ret;
> > + int fs_stack_depth;
> > + struct file *passthrough_filp;
> > + struct inode *passthrough_inode;
> > + struct super_block *passthrough_sb;
> > +
> > + /* Passthrough mode can only be enabled at file open/create time */
> > + if (req->in.h.opcode != FUSE_OPEN && req->in.h.opcode != FUSE_CREATE) {
> > + pr_err("FUSE: invalid OPCODE for request.\n");
> > + return -EINVAL;
> > + }
> > +
> > + passthrough_filp = fget(fd);
> > + if (!passthrough_filp) {
> > + pr_err("FUSE: invalid file descriptor for passthrough.\n");
> > + return -EINVAL;
> > + }
> > +
> > + ret = -EINVAL;
> > + if (!passthrough_filp->f_op->read_iter ||
> > + !passthrough_filp->f_op->write_iter) {
> > + pr_err("FUSE: passthrough file misses file operations.\n");
> > + goto out;
> > + }
> > +
> > + passthrough_inode = file_inode(passthrough_filp);
> > + passthrough_sb = passthrough_inode->i_sb;
> > + fs_stack_depth = passthrough_sb->s_stack_depth + 1;
>
> ... for example:
>
> if (fs_stack_depth && passthrough_sb->s_type == fuse_fs_type) {
> pr_err("FUSE: stacked passthrough file\n");
> goto out;
> }
>
> But maybe we want to ban passthrough to any lower FUSE at least for start.
Yes, what I proposed here is very conservative, and your solution sounds
good to me. Unfortunately I don't have a clear idea of what could go wrong
if we relax this constraint. I need some guidance from you experts here.
What do you think if we keep this overly strict rule for now to avoid
unintended behaviors and come back as we find affected use case?
>
> > + ret = -EEXIST;
>
> Why EEXIST? Why not EINVAL?
>
Reaching the stacking limit sounded like an error caused by the undesired
existence of something, thus EEXIST sounded like a good fit.
No problem in changing that to EINVAL.
> > + if (fs_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
> > + pr_err("FUSE: maximum fs stacking depth exceeded for passthrough\n");
> > + goto out;
> > + }
> > +
> > + req->args->passthrough_filp = passthrough_filp;
> > + return 0;
> > +out:
> > + fput(passthrough_filp);
> > + return ret;
> > +}
> > +
>
> And speaking of overlayfs, I believe you may be able to test your code with
> fuse-overlayfs (passthrough to upper files).
>
> This is a project with real users running real workloads who may be
> able to provide you with valuable feedback from testing.
>
> Thanks,
> Amir.
>
> [1] https://github.com/containers/fuse-overlayfs
This is indeed a project with several common elements to what we are doing
in Android, and that would probably benefit from this change. Will surely
involve them in the discussion.
Thanks for sharing!
Alessio
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH V8 1/3] fuse: Definitions and ioctl() for passthrough
2020-09-18 16:33 ` Alessio Balsini
@ 2020-09-18 19:59 ` Amir Goldstein
2020-09-22 12:15 ` Alessio Balsini
0 siblings, 1 reply; 17+ messages in thread
From: Amir Goldstein @ 2020-09-18 19:59 UTC (permalink / raw)
To: Alessio Balsini
Cc: Miklos Szeredi, Akilesh Kailash, David Anderson, Eric Yan,
Jann Horn, Jens Axboe, Martijn Coenen, Palmer Dabbelt,
Paul Lawrence, Stefano Duo, Zimuzo Ezeozue, fuse-devel,
kernel-team, linux-fsdevel, linux-kernel
On Fri, Sep 18, 2020 at 7:33 PM Alessio Balsini <balsini@android.com> wrote:
>
> Hi Amir,
>
> Thanks again for your feedback.
>
> On Sat, Sep 12, 2020 at 02:06:02PM +0300, Amir Goldstein wrote:
> > On Fri, Sep 11, 2020 at 7:34 PM Alessio Balsini <balsini@android.com> wrote:
> > [...]
> > > diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> > > index bba747520e9b..eb223130a917 100644
> > > --- a/fs/fuse/inode.c
> > > +++ b/fs/fuse/inode.c
> > > @@ -965,6 +965,12 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args,
> > > min_t(unsigned int, FUSE_MAX_MAX_PAGES,
> > > max_t(unsigned int, arg->max_pages, 1));
> > > }
> > > + if (arg->flags & FUSE_PASSTHROUGH) {
> > > + fc->passthrough = 1;
> > > + /* Prevent further stacking */
> > > + fc->sb->s_stack_depth =
> > > + FILESYSTEM_MAX_STACK_DEPTH;
> > > + }
> >
> > That seems a bit limiting.
> > I suppose what you really want to avoid is loops into FUSE fd.
> > There may be a way to do this with forbidding overlay over FUSE passthrough
> > or the other way around.
> >
> > You can set fc->sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH - 1
> > here and in passthrough ioctl you can check for looping into a fuse fs with
> > passthrough enabled on the passed fd (see below) ...
> >
> > [...]
> > > diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
> > > new file mode 100644
> > > index 000000000000..86ab4eafa7bf
> > > --- /dev/null
> > > +++ b/fs/fuse/passthrough.c
> > > @@ -0,0 +1,55 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> > > +
> > > +#include "fuse_i.h"
> > > +
> > > +int fuse_passthrough_setup(struct fuse_req *req, unsigned int fd)
> > > +{
> > > + int ret;
> > > + int fs_stack_depth;
> > > + struct file *passthrough_filp;
> > > + struct inode *passthrough_inode;
> > > + struct super_block *passthrough_sb;
> > > +
> > > + /* Passthrough mode can only be enabled at file open/create time */
> > > + if (req->in.h.opcode != FUSE_OPEN && req->in.h.opcode != FUSE_CREATE) {
> > > + pr_err("FUSE: invalid OPCODE for request.\n");
> > > + return -EINVAL;
> > > + }
> > > +
> > > + passthrough_filp = fget(fd);
> > > + if (!passthrough_filp) {
> > > + pr_err("FUSE: invalid file descriptor for passthrough.\n");
> > > + return -EINVAL;
> > > + }
> > > +
> > > + ret = -EINVAL;
> > > + if (!passthrough_filp->f_op->read_iter ||
> > > + !passthrough_filp->f_op->write_iter) {
> > > + pr_err("FUSE: passthrough file misses file operations.\n");
> > > + goto out;
> > > + }
> > > +
> > > + passthrough_inode = file_inode(passthrough_filp);
> > > + passthrough_sb = passthrough_inode->i_sb;
> > > + fs_stack_depth = passthrough_sb->s_stack_depth + 1;
> >
> > ... for example:
> >
> > if (fs_stack_depth && passthrough_sb->s_type == fuse_fs_type) {
> > pr_err("FUSE: stacked passthrough file\n");
> > goto out;
> > }
> >
> > But maybe we want to ban passthrough to any lower FUSE at least for start.
>
>
> Yes, what I proposed here is very conservative, and your solution sounds
> good to me. Unfortunately I don't have a clear idea of what could go wrong
> if we relax this constraint. I need some guidance from you experts here.
>
I guess the main concern would be locking order and deadlocks.
With my suggestion I think deadlocks are avoided and I am less sure
but think that lockdep should not have false positives either.
If people do need the 1-level stacking, I can try to think harder
if it is safe and maybe add some more compromise limitations.
> What do you think if we keep this overly strict rule for now to avoid
> unintended behaviors and come back as we find affected use case?
>
I can live with that if other designated users don't mind the limitation.
I happen to be developing a passthrough FUSE fs [1] myself and
I also happen to be using it to pass through to overlayfs.
OTOH, the workloads for my use case are mostly large sequential IO,
and the hardware can handle the few extra syscalls, so the passthrough
fd feature is not urgent for my use case at this point in time.
>
> >
> > > + ret = -EEXIST;
> >
> > Why EEXIST? Why not EINVAL?
> >
>
>
> Reaching the stacking limit sounded like an error caused by the undesired
> existence of something, thus EEXIST sounded like a good fit.
> No problem in changing that to EINVAL.
>
>
>
> > > + if (fs_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
> > > + pr_err("FUSE: maximum fs stacking depth exceeded for passthrough\n");
> > > + goto out;
> > > + }
> > > +
> > > + req->args->passthrough_filp = passthrough_filp;
> > > + return 0;
> > > +out:
> > > + fput(passthrough_filp);
> > > + return ret;
> > > +}
> > > +
> >
> > And speaking of overlayfs, I believe you may be able to test your code with
> > fuse-overlayfs (passthrough to upper files).
> >
...
>
> This is indeed a project with several common elements to what we are doing
> in Android,
Are you in liberty to share more information about the Android project?
Is it related to Incremental FS [2]?
Thanks,
Amir.
[1] https://github.com/amir73il/libfuse/commits/cachegwfs
[2] https://lore.kernel.org/linux-fsdevel/20190502040331.81196-1-ezemtsov@google.com/
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH V8 1/3] fuse: Definitions and ioctl() for passthrough
2020-09-18 19:59 ` Amir Goldstein
@ 2020-09-22 12:15 ` Alessio Balsini
2020-09-22 16:08 ` Amir Goldstein
0 siblings, 1 reply; 17+ messages in thread
From: Alessio Balsini @ 2020-09-22 12:15 UTC (permalink / raw)
To: Amir Goldstein
Cc: Alessio Balsini, Miklos Szeredi, Akilesh Kailash, David Anderson,
Eric Yan, Jann Horn, Jens Axboe, Martijn Coenen, Palmer Dabbelt,
Paul Lawrence, Stefano Duo, Zimuzo Ezeozue, fuse-devel,
kernel-team, linux-fsdevel, linux-kernel
On Fri, Sep 18, 2020 at 10:59:16PM +0300, Amir Goldstein wrote:
> On Fri, Sep 18, 2020 at 7:33 PM Alessio Balsini <balsini@android.com> wrote:
> [...]
> > > ... for example:
> > >
> > > if (fs_stack_depth && passthrough_sb->s_type == fuse_fs_type) {
> > > pr_err("FUSE: stacked passthrough file\n");
> > > goto out;
> > > }
> > >
> > > But maybe we want to ban passthrough to any lower FUSE at least for start.
> >
> >
> > Yes, what I proposed here is very conservative, and your solution sounds
> > good to me. Unfortunately I don't have a clear idea of what could go wrong
> > if we relax this constraint. I need some guidance from you experts here.
> >
>
> I guess the main concern would be locking order and deadlocks.
> With my suggestion I think deadlocks are avoided and I am less sure
> but think that lockdep should not have false positives either.
>
> If people do need the 1-level stacking, I can try to think harder
> if it is safe and maybe add some more compromise limitations.
>
> > What do you think if we keep this overly strict rule for now to avoid
> > unintended behaviors and come back as we find affected use case?
> >
>
> I can live with that if other designated users don't mind the limitation.
>
> I happen to be developing a passthrough FUSE fs [1] myself and
> I also happen to be using it to pass through to overlayfs.
> OTOH, the workloads for my use case are mostly large sequential IO,
> and the hardware can handle the few extra syscalls, so the passthrough
> fd feature is not urgent for my use case at this point in time.
This is something that only happens if the FUSE daemon opens a connection
wanting FUSE_PASSTHROUGH, so shouldn't affect existing use cases. Or am I
wrong?
If some users find this limitation to be an issue, we can rethink/relax
this policy in the future... Switching to something like the solution you
proposed does not break the current behavior, so we would be able to change
this with minimal effort.
>
>
> >
> > >
> > > > + ret = -EEXIST;
> > >
> > > Why EEXIST? Why not EINVAL?
> > >
> >
> >
> > Reaching the stacking limit sounded like an error caused by the undesired
> > existence of something, thus EEXIST sounded like a good fit.
> > No problem in changing that to EINVAL.
> >
> >
> >
> > > > + if (fs_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
> > > > + pr_err("FUSE: maximum fs stacking depth exceeded for passthrough\n");
> > > > + goto out;
> > > > + }
> > > > +
> > > > + req->args->passthrough_filp = passthrough_filp;
> > > > + return 0;
> > > > +out:
> > > > + fput(passthrough_filp);
> > > > + return ret;
> > > > +}
> > > > +
> > >
> > > And speaking of overlayfs, I believe you may be able to test your code with
> > > fuse-overlayfs (passthrough to upper files).
> > >
> ...
> >
> > This is indeed a project with several common elements to what we are doing
> > in Android,
>
> Are you in liberty to share more information about the Android project?
> Is it related to Incremental FS [2]?
>
> Thanks,
> Amir.
>
> [1] https://github.com/amir73il/libfuse/commits/cachegwfs
> [2] https://lore.kernel.org/linux-fsdevel/20190502040331.81196-1-ezemtsov@google.com/
Thanks for the pointer to cachegwfs, I'm glad I'm seeing more and more
passthrough file systems in addition to our use case in Android.
I'm not directly involved in the Incremental FS project, but, as far as I
remember, only for the first PoC was actually developed as a FUSE file
system. Because of the overhead introduced by the user space round-trips,
that design was left behind and the whole Incremental FS infrastructure
switched to becoming a kernel module.
In general, the FUSE passthrough patch set proposed in this series wouldn't
be helpful for that use case because, for example, Incremental FS requires
live (de)compression of data, that can only be performed by the FUSE
daemon.
The specific use case I'm trying to improve with this FUSE passthrough
series is instead related to the scoped storage feature that we introduced
in Android 11, that is based on FUSE, and affects those folders that are
shared among all the Apps (e.g., DCIM, Downloads, etc). More details here:
https://developer.android.com/about/versions/11/privacy/storage
With FUSE we now have a flexible way to specify more fine-grained
permissions (e.g., specify if an App is allowed to access files depenind on
their type), create private App folders, maintain legacy paths for old
Apps, manipulate pieces of files at run-time, etc. Forgive me if I forgot
something. :)
These extra operations may slower the file system access comprared to a
native, direct access, but if:
- the file being accessed requires special treatment before being passed to
the requesting App, then further tests will be performed at every
read/write operation (with some optimizations). This overhead is of
course annoying, but is something we are happy to pay because is
beneficial to the user (i.e., improves privacy and security).
- Instead, if at open time a file is recognized as safe to access and does
not require any further enforcement from the FUSE daemon, there's no need
to pay for future read/write operations overheads, that wouldn't do
anything more than just copying data (possibly with the help of
splicing). In this case the FUSE passthrough feature proposed in this
series can be enabled to reduce this overhead.
Moreover, some Apps use big files that contain all their resources, then
access these files at random offsets, not taking advantage of read-ahead
cache. The same happens for files containing databases.
In addition, our specific use case involves a FUSE daemon is probably
heavier than the average passthrough file system (for example those that
are in libfuse/examples), so reducing user space round trips thanks to the
patchset proposed here gives a strong improvement.
Anyway, only showing the improvements in our extreme use case would have
brought a limited case for upstreaming, and that is why the benchmarks I
ran (presented in the cover letter), are based on the vanilla
passthrough_hp daemon managing kind of standard storage workloads, and
still show evident performance improvements.
Running and sharing benchmarks on Android is also tricky because at the
time of first submission the most recent public real device was running a
4.14 kernel, so that would have been maybe nice to see, but not that
interesting... :)
I hope I answered all your questions, please let me know if I missed
something and feel free to ask for more details!
Thanks again,
Alessio
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH V8 1/3] fuse: Definitions and ioctl() for passthrough
2020-09-22 12:15 ` Alessio Balsini
@ 2020-09-22 16:08 ` Amir Goldstein
2020-09-29 14:30 ` Alessio Balsini
0 siblings, 1 reply; 17+ messages in thread
From: Amir Goldstein @ 2020-09-22 16:08 UTC (permalink / raw)
To: Alessio Balsini
Cc: Miklos Szeredi, Akilesh Kailash, David Anderson, Eric Yan,
Jann Horn, Jens Axboe, Martijn Coenen, Palmer Dabbelt,
Paul Lawrence, Stefano Duo, Zimuzo Ezeozue, fuse-devel,
kernel-team, linux-fsdevel, linux-kernel
On Tue, Sep 22, 2020 at 3:15 PM Alessio Balsini <balsini@android.com> wrote:
>
> On Fri, Sep 18, 2020 at 10:59:16PM +0300, Amir Goldstein wrote:
> > On Fri, Sep 18, 2020 at 7:33 PM Alessio Balsini <balsini@android.com> wrote:
> > [...]
> > > > ... for example:
> > > >
> > > > if (fs_stack_depth && passthrough_sb->s_type == fuse_fs_type) {
> > > > pr_err("FUSE: stacked passthrough file\n");
> > > > goto out;
> > > > }
> > > >
> > > > But maybe we want to ban passthrough to any lower FUSE at least for start.
> > >
> > >
> > > Yes, what I proposed here is very conservative, and your solution sounds
> > > good to me. Unfortunately I don't have a clear idea of what could go wrong
> > > if we relax this constraint. I need some guidance from you experts here.
> > >
> >
> > I guess the main concern would be locking order and deadlocks.
> > With my suggestion I think deadlocks are avoided and I am less sure
> > but think that lockdep should not have false positives either.
> >
> > If people do need the 1-level stacking, I can try to think harder
> > if it is safe and maybe add some more compromise limitations.
> >
> > > What do you think if we keep this overly strict rule for now to avoid
> > > unintended behaviors and come back as we find affected use case?
> > >
> >
> > I can live with that if other designated users don't mind the limitation.
> >
> > I happen to be developing a passthrough FUSE fs [1] myself and
> > I also happen to be using it to pass through to overlayfs.
> > OTOH, the workloads for my use case are mostly large sequential IO,
> > and the hardware can handle the few extra syscalls, so the passthrough
> > fd feature is not urgent for my use case at this point in time.
>
>
> This is something that only happens if the FUSE daemon opens a connection
> wanting FUSE_PASSTHROUGH, so shouldn't affect existing use cases. Or am I
> wrong?
I meant, if I would have expected a significant performance improvement
from FUSE_PASSTHROUGH in my use case, I would have wanted to use it
and then passthrough to overlayfs would have mattered to me more.
> If some users find this limitation to be an issue, we can rethink/relax
> this policy in the future... Switching to something like the solution you
> proposed does not break the current behavior, so we would be able to change
> this with minimal effort.
>
True.
>
> >
> >
> > >
> > > >
> > > > > + ret = -EEXIST;
> > > >
> > > > Why EEXIST? Why not EINVAL?
> > > >
> > >
> > >
> > > Reaching the stacking limit sounded like an error caused by the undesired
> > > existence of something, thus EEXIST sounded like a good fit.
> > > No problem in changing that to EINVAL.
> > >
> > >
> > >
> > > > > + if (fs_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
> > > > > + pr_err("FUSE: maximum fs stacking depth exceeded for passthrough\n");
> > > > > + goto out;
> > > > > + }
> > > > > +
> > > > > + req->args->passthrough_filp = passthrough_filp;
> > > > > + return 0;
> > > > > +out:
> > > > > + fput(passthrough_filp);
> > > > > + return ret;
> > > > > +}
> > > > > +
> > > >
> > > > And speaking of overlayfs, I believe you may be able to test your code with
> > > > fuse-overlayfs (passthrough to upper files).
> > > >
> > ...
> > >
> > > This is indeed a project with several common elements to what we are doing
> > > in Android,
> >
> > Are you in liberty to share more information about the Android project?
> > Is it related to Incremental FS [2]?
> >
> > Thanks,
> > Amir.
> >
> > [1] https://github.com/amir73il/libfuse/commits/cachegwfs
> > [2] https://lore.kernel.org/linux-fsdevel/20190502040331.81196-1-ezemtsov@google.com/
>
> Thanks for the pointer to cachegwfs, I'm glad I'm seeing more and more
> passthrough file systems in addition to our use case in Android.
>
I am hearing about a lot of these projects.
I think that FUSE_PASSTHROUGH is a very useful feature.
I have an intention to explore passthrough to directory fd for
directory modifications. I sure hope you will beat me to it ;-)
> I'm not directly involved in the Incremental FS project, but, as far as I
> remember, only for the first PoC was actually developed as a FUSE file
> system. Because of the overhead introduced by the user space round-trips,
> that design was left behind and the whole Incremental FS infrastructure
> switched to becoming a kernel module.
> In general, the FUSE passthrough patch set proposed in this series wouldn't
> be helpful for that use case because, for example, Incremental FS requires
> live (de)compression of data, that can only be performed by the FUSE
> daemon.
>
Ext4 supports inline encryption. Btrfs supports encrypted/compressed extents.
No reason for FUSE not to support the same.
Is it trivial? No.
Is it an excuse for not using FUSE and writing a new userspace fs. Not
in my option.
> The specific use case I'm trying to improve with this FUSE passthrough
> series is instead related to the scoped storage feature that we introduced
> in Android 11, that is based on FUSE, and affects those folders that are
> shared among all the Apps (e.g., DCIM, Downloads, etc). More details here:
>
sdcard fs has had a lot of reincarnations :-)
I for one am happy with the return to FUSE.
Instead of saying "FUSE is too slow" and implementing a kernel sdcardfs,
improve FUSE to be faster for everyone - that's the way to go ;-)
> https://developer.android.com/about/versions/11/privacy/storage
>
> With FUSE we now have a flexible way to specify more fine-grained
> permissions (e.g., specify if an App is allowed to access files depenind on
> their type), create private App folders, maintain legacy paths for old
> Apps, manipulate pieces of files at run-time, etc. Forgive me if I forgot
> something. :)
> These extra operations may slower the file system access comprared to a
> native, direct access, but if:
> - the file being accessed requires special treatment before being passed to
> the requesting App, then further tests will be performed at every
> read/write operation (with some optimizations). This overhead is of
> course annoying, but is something we are happy to pay because is
> beneficial to the user (i.e., improves privacy and security).
> - Instead, if at open time a file is recognized as safe to access and does
> not require any further enforcement from the FUSE daemon, there's no need
> to pay for future read/write operations overheads, that wouldn't do
> anything more than just copying data (possibly with the help of
> splicing). In this case the FUSE passthrough feature proposed in this
> series can be enabled to reduce this overhead.
>
> Moreover, some Apps use big files that contain all their resources, then
> access these files at random offsets, not taking advantage of read-ahead
> cache. The same happens for files containing databases.
> In addition, our specific use case involves a FUSE daemon is probably
> heavier than the average passthrough file system (for example those that
> are in libfuse/examples), so reducing user space round trips thanks to the
> patchset proposed here gives a strong improvement.
>
> Anyway, only showing the improvements in our extreme use case would have
> brought a limited case for upstreaming, and that is why the benchmarks I
> ran (presented in the cover letter), are based on the vanilla
> passthrough_hp daemon managing kind of standard storage workloads, and
> still show evident performance improvements.
> Running and sharing benchmarks on Android is also tricky because at the
> time of first submission the most recent public real device was running a
> 4.14 kernel, so that would have been maybe nice to see, but not that
> interesting... :)
>
> I hope I answered all your questions, please let me know if I missed
> something and feel free to ask for more details!
>
Thanks for sharing the details,
Amir.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH V8 1/3] fuse: Definitions and ioctl() for passthrough
2020-09-22 16:08 ` Amir Goldstein
@ 2020-09-29 14:30 ` Alessio Balsini
0 siblings, 0 replies; 17+ messages in thread
From: Alessio Balsini @ 2020-09-29 14:30 UTC (permalink / raw)
To: Amir Goldstein
Cc: Alessio Balsini, Miklos Szeredi, Akilesh Kailash, David Anderson,
Eric Yan, Jann Horn, Jens Axboe, Martijn Coenen, Palmer Dabbelt,
Paul Lawrence, Stefano Duo, Zimuzo Ezeozue, fuse-devel,
kernel-team, linux-fsdevel, linux-kernel
On Tue, Sep 22, 2020 at 07:08:48PM +0300, Amir Goldstein wrote:
>
> I am hearing about a lot of these projects.
> I think that FUSE_PASSTHROUGH is a very useful feature.
> I have an intention to explore passthrough to directory fd for
> directory modifications. I sure hope you will beat me to it ;-)
It's awesome that you mentioned this, something similar is already in my
TODO list, suggested by Paul (in CC). I'll start working on this and will
be glad to discuss as soon as this FUSE_PASSTHROUGH extension will
hopefully get accepted.
>
> > I'm not directly involved in the Incremental FS project, but, as far as I
> > remember, only for the first PoC was actually developed as a FUSE file
> > system. Because of the overhead introduced by the user space round-trips,
> > that design was left behind and the whole Incremental FS infrastructure
> > switched to becoming a kernel module.
> > In general, the FUSE passthrough patch set proposed in this series wouldn't
> > be helpful for that use case because, for example, Incremental FS requires
> > live (de)compression of data, that can only be performed by the FUSE
> > daemon.
> >
>
> Ext4 supports inline encryption. Btrfs supports encrypted/compressed extents.
> No reason for FUSE not to support the same.
> Is it trivial? No.
> Is it an excuse for not using FUSE and writing a new userspace fs. Not
> in my option.
I started exploring the FUSE internals only in the last months and had the
feeling this compression feature was a bit out of context or at least very
use-case specific. But I'll start thinking about this.
>
> > The specific use case I'm trying to improve with this FUSE passthrough
> > series is instead related to the scoped storage feature that we introduced
> > in Android 11, that is based on FUSE, and affects those folders that are
> > shared among all the Apps (e.g., DCIM, Downloads, etc). More details here:
> >
>
> sdcard fs has had a lot of reincarnations :-)
>
> I for one am happy with the return to FUSE.
> Instead of saying "FUSE is too slow" and implementing a kernel sdcardfs,
> improve FUSE to be faster for everyone - that's the way to go ;-)
Yes! This is exactly what we are trying to achieve and this patch-set is
the first piece of a bigger picture which, among others, includes the
direct directory access that you mentioned before.
I hope the community appreciates these improvement attempts :)
As you may have noticed, I recently shared the v9 of the patch-set.
Thanks to the feedback I received, what we have now has a completely
different than the initial proposal.
Thanks again everyone for the suggestions!
Alessio
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH V8 2/3] fuse: Introduce synchronous read and write for passthrough
2020-09-11 16:34 [PATCH V8 0/3] fuse: Add support for passthrough read/write Alessio Balsini
2020-09-11 16:34 ` [PATCH V8 1/3] fuse: Definitions and ioctl() for passthrough Alessio Balsini
@ 2020-09-11 16:34 ` Alessio Balsini
2020-09-12 9:55 ` Amir Goldstein
2020-09-11 16:34 ` [PATCH V8 3/3] fuse: Handle AIO read and write in passthrough Alessio Balsini
2020-09-11 18:46 ` [fuse-devel] [PATCH V8 0/3] fuse: Add support for passthrough read/write Antonio SJ Musumeci
3 siblings, 1 reply; 17+ messages in thread
From: Alessio Balsini @ 2020-09-11 16:34 UTC (permalink / raw)
To: Miklos Szeredi
Cc: Akilesh Kailash, Amir Goldstein, David Anderson, Eric Yan,
Jann Horn, Jens Axboe, Martijn Coenen, Palmer Dabbelt,
Paul Lawrence, Stefano Duo, Zimuzo Ezeozue, fuse-devel,
kernel-team, linux-fsdevel, linux-kernel
All the read and write operations performed on fuse_files which have the
passthrough feature enabled are forwarded to the associated lower file
system file.
Sending the request directly to the lower file system avoids the userspace
round-trip that, because of possible context switches and additional
operations might reduce the overall performance, especially in those cases
where caching doesn't help, for example in reads at random offsets.
If a fuse_file has a lower file system file associated for passthrough can
be verified by checking the validity of its passthrough_filp pointer, which
is not null only passthrough has been successfully enabled via the
appropriate ioctl(). When a read/write operation is requested for a FUSE
file with passthrough enabled, the request is directly forwarded to the
corresponding file_operations of the lower file system file. After the
read/write operation is completed, the file stats change is notified (and
propagated) to the lower file system.
This change only implements synchronous requests in passthrough, returning
an error in the case of ansynchronous operations, yet covering the majority
of the use cases.
Signed-off-by: Alessio Balsini <balsini@android.com>
---
fs/fuse/file.c | 8 +++--
fs/fuse/fuse_i.h | 2 ++
fs/fuse/passthrough.c | 81 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 89 insertions(+), 2 deletions(-)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 6c0ec742ce74..c3289ff0cd33 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1552,7 +1552,9 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
if (is_bad_inode(file_inode(file)))
return -EIO;
- if (!(ff->open_flags & FOPEN_DIRECT_IO))
+ if (ff->passthrough_filp)
+ return fuse_passthrough_read_iter(iocb, to);
+ else if (!(ff->open_flags & FOPEN_DIRECT_IO))
return fuse_cache_read_iter(iocb, to);
else
return fuse_direct_read_iter(iocb, to);
@@ -1566,7 +1568,9 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (is_bad_inode(file_inode(file)))
return -EIO;
- if (!(ff->open_flags & FOPEN_DIRECT_IO))
+ if (ff->passthrough_filp)
+ return fuse_passthrough_write_iter(iocb, from);
+ else if (!(ff->open_flags & FOPEN_DIRECT_IO))
return fuse_cache_write_iter(iocb, from);
else
return fuse_direct_write_iter(iocb, from);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 6c5166447905..21ba30a6a661 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -1106,5 +1106,7 @@ void fuse_free_conn(struct fuse_conn *fc);
int fuse_passthrough_setup(struct fuse_req *req, unsigned int fd);
void fuse_passthrough_release(struct fuse_file *ff);
+ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *to);
+ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *from);
#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
index 86ab4eafa7bf..44a78e02f45d 100644
--- a/fs/fuse/passthrough.c
+++ b/fs/fuse/passthrough.c
@@ -2,6 +2,87 @@
#include "fuse_i.h"
+#include <linux/fs_stack.h>
+#include <linux/fsnotify.h>
+#include <linux/uio.h>
+
+static void fuse_copyattr(struct file *dst_file, struct file *src_file,
+ bool write)
+{
+ if (write) {
+ struct inode *dst = file_inode(dst_file);
+ struct inode *src = file_inode(src_file);
+
+ fsnotify_modify(src_file);
+ fsstack_copy_inode_size(dst, src);
+ } else {
+ fsnotify_access(src_file);
+ }
+}
+
+
+ssize_t fuse_passthrough_read_iter(struct kiocb *iocb_fuse,
+ struct iov_iter *iter)
+{
+ ssize_t ret;
+ struct file *fuse_filp = iocb_fuse->ki_filp;
+ struct fuse_file *ff = fuse_filp->private_data;
+ struct file *passthrough_filp = ff->passthrough_filp;
+
+ if (!iov_iter_count(iter))
+ return 0;
+
+ if (is_sync_kiocb(iocb_fuse)) {
+ struct kiocb iocb;
+
+ kiocb_clone(&iocb, iocb_fuse, passthrough_filp);
+ ret = call_read_iter(passthrough_filp, &iocb, iter);
+ iocb_fuse->ki_pos = iocb.ki_pos;
+ if (ret >= 0)
+ fuse_copyattr(fuse_filp, passthrough_filp, false);
+
+ } else {
+ ret = -EIO;
+ }
+
+ return ret;
+}
+
+ssize_t fuse_passthrough_write_iter(struct kiocb *iocb_fuse,
+ struct iov_iter *iter)
+{
+ ssize_t ret;
+ struct file *fuse_filp = iocb_fuse->ki_filp;
+ struct fuse_file *ff = fuse_filp->private_data;
+ struct inode *fuse_inode = file_inode(fuse_filp);
+ struct file *passthrough_filp = ff->passthrough_filp;
+
+ if (!iov_iter_count(iter))
+ return 0;
+
+ inode_lock(fuse_inode);
+
+ if (is_sync_kiocb(iocb_fuse)) {
+ struct kiocb iocb;
+
+ kiocb_clone(&iocb, iocb_fuse, passthrough_filp);
+
+ file_start_write(passthrough_filp);
+ ret = call_write_iter(passthrough_filp, &iocb, iter);
+ file_end_write(passthrough_filp);
+
+ iocb_fuse->ki_pos = iocb.ki_pos;
+ if (ret > 0)
+ fuse_copyattr(fuse_filp, passthrough_filp, true);
+ } else {
+ ret = -EIO;
+ }
+
+ inode_unlock(fuse_inode);
+
+ return ret;
+}
+
int fuse_passthrough_setup(struct fuse_req *req, unsigned int fd)
{
int ret;
--
2.28.0.618.gf4bc123cb7-goog
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH V8 2/3] fuse: Introduce synchronous read and write for passthrough
2020-09-11 16:34 ` [PATCH V8 2/3] fuse: Introduce synchronous read and write " Alessio Balsini
@ 2020-09-12 9:55 ` Amir Goldstein
2020-09-21 11:01 ` Alessio Balsini
0 siblings, 1 reply; 17+ messages in thread
From: Amir Goldstein @ 2020-09-12 9:55 UTC (permalink / raw)
To: Alessio Balsini
Cc: Miklos Szeredi, Akilesh Kailash, David Anderson, Eric Yan,
Jann Horn, Jens Axboe, Martijn Coenen, Palmer Dabbelt,
Paul Lawrence, Stefano Duo, Zimuzo Ezeozue, fuse-devel,
kernel-team, linux-fsdevel, linux-kernel
On Fri, Sep 11, 2020 at 7:34 PM Alessio Balsini <balsini@android.com> wrote:
>
> All the read and write operations performed on fuse_files which have the
> passthrough feature enabled are forwarded to the associated lower file
> system file.
>
> Sending the request directly to the lower file system avoids the userspace
> round-trip that, because of possible context switches and additional
> operations might reduce the overall performance, especially in those cases
> where caching doesn't help, for example in reads at random offsets.
>
> If a fuse_file has a lower file system file associated for passthrough can
> be verified by checking the validity of its passthrough_filp pointer, which
> is not null only passthrough has been successfully enabled via the
> appropriate ioctl(). When a read/write operation is requested for a FUSE
> file with passthrough enabled, the request is directly forwarded to the
> corresponding file_operations of the lower file system file. After the
> read/write operation is completed, the file stats change is notified (and
> propagated) to the lower file system.
>
> This change only implements synchronous requests in passthrough, returning
> an error in the case of ansynchronous operations, yet covering the majority
> of the use cases.
>
> Signed-off-by: Alessio Balsini <balsini@android.com>
> ---
> fs/fuse/file.c | 8 +++--
> fs/fuse/fuse_i.h | 2 ++
> fs/fuse/passthrough.c | 81 +++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 89 insertions(+), 2 deletions(-)
>
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 6c0ec742ce74..c3289ff0cd33 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -1552,7 +1552,9 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
> if (is_bad_inode(file_inode(file)))
> return -EIO;
>
> - if (!(ff->open_flags & FOPEN_DIRECT_IO))
> + if (ff->passthrough_filp)
> + return fuse_passthrough_read_iter(iocb, to);
> + else if (!(ff->open_flags & FOPEN_DIRECT_IO))
> return fuse_cache_read_iter(iocb, to);
> else
> return fuse_direct_read_iter(iocb, to);
> @@ -1566,7 +1568,9 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
> if (is_bad_inode(file_inode(file)))
> return -EIO;
>
> - if (!(ff->open_flags & FOPEN_DIRECT_IO))
> + if (ff->passthrough_filp)
> + return fuse_passthrough_write_iter(iocb, from);
> + else if (!(ff->open_flags & FOPEN_DIRECT_IO))
> return fuse_cache_write_iter(iocb, from);
> else
> return fuse_direct_write_iter(iocb, from);
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 6c5166447905..21ba30a6a661 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -1106,5 +1106,7 @@ void fuse_free_conn(struct fuse_conn *fc);
>
> int fuse_passthrough_setup(struct fuse_req *req, unsigned int fd);
> void fuse_passthrough_release(struct fuse_file *ff);
> +ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *to);
> +ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *from);
>
> #endif /* _FS_FUSE_I_H */
> diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
> index 86ab4eafa7bf..44a78e02f45d 100644
> --- a/fs/fuse/passthrough.c
> +++ b/fs/fuse/passthrough.c
> @@ -2,6 +2,87 @@
>
> #include "fuse_i.h"
>
> +#include <linux/fs_stack.h>
> +#include <linux/fsnotify.h>
> +#include <linux/uio.h>
> +
> +static void fuse_copyattr(struct file *dst_file, struct file *src_file,
> + bool write)
> +{
> + if (write) {
> + struct inode *dst = file_inode(dst_file);
> + struct inode *src = file_inode(src_file);
> +
> + fsnotify_modify(src_file);
> + fsstack_copy_inode_size(dst, src);
> + } else {
> + fsnotify_access(src_file);
> + }
> +}
> +
> +
> +ssize_t fuse_passthrough_read_iter(struct kiocb *iocb_fuse,
> + struct iov_iter *iter)
> +{
> + ssize_t ret;
> + struct file *fuse_filp = iocb_fuse->ki_filp;
> + struct fuse_file *ff = fuse_filp->private_data;
> + struct file *passthrough_filp = ff->passthrough_filp;
> +
> + if (!iov_iter_count(iter))
> + return 0;
> +
> + if (is_sync_kiocb(iocb_fuse)) {
> + struct kiocb iocb;
> +
> + kiocb_clone(&iocb, iocb_fuse, passthrough_filp);
> + ret = call_read_iter(passthrough_filp, &iocb, iter);
> + iocb_fuse->ki_pos = iocb.ki_pos;
> + if (ret >= 0)
> + fuse_copyattr(fuse_filp, passthrough_filp, false);
> +
> + } else {
> + ret = -EIO;
> + }
> +
> + return ret;
> +}
> +
> +ssize_t fuse_passthrough_write_iter(struct kiocb *iocb_fuse,
> + struct iov_iter *iter)
> +{
> + ssize_t ret;
> + struct file *fuse_filp = iocb_fuse->ki_filp;
> + struct fuse_file *ff = fuse_filp->private_data;
> + struct inode *fuse_inode = file_inode(fuse_filp);
> + struct file *passthrough_filp = ff->passthrough_filp;
> +
> + if (!iov_iter_count(iter))
> + return 0;
> +
> + inode_lock(fuse_inode);
> +
> + if (is_sync_kiocb(iocb_fuse)) {
> + struct kiocb iocb;
> +
> + kiocb_clone(&iocb, iocb_fuse, passthrough_filp);
> +
> + file_start_write(passthrough_filp);
> + ret = call_write_iter(passthrough_filp, &iocb, iter);
Why not vfs_iter_write()/vfs_iter_read()?
You are bypassing many internal VFS checks that seem pretty important.
Thanks,
Amir.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH V8 2/3] fuse: Introduce synchronous read and write for passthrough
2020-09-12 9:55 ` Amir Goldstein
@ 2020-09-21 11:01 ` Alessio Balsini
2020-09-21 13:07 ` Amir Goldstein
0 siblings, 1 reply; 17+ messages in thread
From: Alessio Balsini @ 2020-09-21 11:01 UTC (permalink / raw)
To: Amir Goldstein
Cc: Alessio Balsini, Miklos Szeredi, Akilesh Kailash, David Anderson,
Eric Yan, Jann Horn, Jens Axboe, Martijn Coenen, Palmer Dabbelt,
Paul Lawrence, Stefano Duo, Zimuzo Ezeozue, fuse-devel,
kernel-team, linux-fsdevel, linux-kernel
Hi Amir,
On Sat, Sep 12, 2020 at 12:55:35PM +0300, Amir Goldstein wrote:
> On Fri, Sep 11, 2020 at 7:34 PM Alessio Balsini <balsini@android.com> wrote:
> > +ssize_t fuse_passthrough_read_iter(struct kiocb *iocb_fuse,
> > + struct iov_iter *iter)
> > +{
> > + ssize_t ret;
> > + struct file *fuse_filp = iocb_fuse->ki_filp;
> > + struct fuse_file *ff = fuse_filp->private_data;
> > + struct file *passthrough_filp = ff->passthrough_filp;
> > +
> > + if (!iov_iter_count(iter))
> > + return 0;
> > +
> > + if (is_sync_kiocb(iocb_fuse)) {
> > + struct kiocb iocb;
> > +
> > + kiocb_clone(&iocb, iocb_fuse, passthrough_filp);
> > + ret = call_read_iter(passthrough_filp, &iocb, iter);
> > + iocb_fuse->ki_pos = iocb.ki_pos;
> > + if (ret >= 0)
> > + fuse_copyattr(fuse_filp, passthrough_filp, false);
> > +
> > + } else {
> > + ret = -EIO;
> > + }
> > +
> > + return ret;
> > +}
> > +
> > +ssize_t fuse_passthrough_write_iter(struct kiocb *iocb_fuse,
> > + struct iov_iter *iter)
> > +{
> > + ssize_t ret;
> > + struct file *fuse_filp = iocb_fuse->ki_filp;
> > + struct fuse_file *ff = fuse_filp->private_data;
> > + struct inode *fuse_inode = file_inode(fuse_filp);
> > + struct file *passthrough_filp = ff->passthrough_filp;
> > +
> > + if (!iov_iter_count(iter))
> > + return 0;
> > +
> > + inode_lock(fuse_inode);
> > +
> > + if (is_sync_kiocb(iocb_fuse)) {
> > + struct kiocb iocb;
> > +
> > + kiocb_clone(&iocb, iocb_fuse, passthrough_filp);
> > +
> > + file_start_write(passthrough_filp);
> > + ret = call_write_iter(passthrough_filp, &iocb, iter);
>
> Why not vfs_iter_write()/vfs_iter_read()?
>
> You are bypassing many internal VFS checks that seem pretty important.
>
I've been thinking a lot about this and decided to go for the VFS bypassing
solution because:
1. it looked odd to me to perform VFS checks twice, both for FUSE and lower
FS and it seemed to me that we found a tradeoff with Jann about doing
this lower FS call, and
2. in our Android use case (I just saw you asking for more details about
this in, I'll reply on the other thread), the user might have the right
credentials to access the FUSE file system, but not to access the lower
file system, so the VFS checkings would fail. So that would have created
the need for a credential bypassing that looked hacky.
But I agree and I would probably sleep better knowing that VFS checks are
not skipped :) So I decided to implemented the vfs_iter_{read,write}()
variant.
I again picked a lot from the overlayfs solution. In a few words, I get the
FUSE daemon credential reference at FUSE FS creation time and, when
passthrough read/write operations are triggered, the kernel temporarily
overrides the requesting process' credentials with those of the FUSE
daemon. Credentials are reverted as soon as the operation completes.
I have a temporary development branch where I'm developing the V9 of this
patch, plus the VFS variant (git history may change):
https://github.com/balsini/linux/tree/fuse-passthrough-stable-v5.8-v9-vfs
For now, I'm happy to say that I like this VFS solution, it also simplified
the lower file system notifications, so I'll probably go with this in the
V9.
Thanks again Amir, much appreciated.
Alessio
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH V8 2/3] fuse: Introduce synchronous read and write for passthrough
2020-09-21 11:01 ` Alessio Balsini
@ 2020-09-21 13:07 ` Amir Goldstein
0 siblings, 0 replies; 17+ messages in thread
From: Amir Goldstein @ 2020-09-21 13:07 UTC (permalink / raw)
To: Alessio Balsini
Cc: Miklos Szeredi, Akilesh Kailash, David Anderson, Eric Yan,
Jann Horn, Jens Axboe, Martijn Coenen, Palmer Dabbelt,
Paul Lawrence, Stefano Duo, Zimuzo Ezeozue, fuse-devel,
kernel-team, linux-fsdevel, linux-kernel
On Mon, Sep 21, 2020 at 2:01 PM Alessio Balsini <balsini@android.com> wrote:
>
> Hi Amir,
>
> On Sat, Sep 12, 2020 at 12:55:35PM +0300, Amir Goldstein wrote:
> > On Fri, Sep 11, 2020 at 7:34 PM Alessio Balsini <balsini@android.com> wrote:
> > > +ssize_t fuse_passthrough_read_iter(struct kiocb *iocb_fuse,
> > > + struct iov_iter *iter)
> > > +{
> > > + ssize_t ret;
> > > + struct file *fuse_filp = iocb_fuse->ki_filp;
> > > + struct fuse_file *ff = fuse_filp->private_data;
> > > + struct file *passthrough_filp = ff->passthrough_filp;
> > > +
> > > + if (!iov_iter_count(iter))
> > > + return 0;
> > > +
> > > + if (is_sync_kiocb(iocb_fuse)) {
> > > + struct kiocb iocb;
> > > +
> > > + kiocb_clone(&iocb, iocb_fuse, passthrough_filp);
> > > + ret = call_read_iter(passthrough_filp, &iocb, iter);
> > > + iocb_fuse->ki_pos = iocb.ki_pos;
> > > + if (ret >= 0)
> > > + fuse_copyattr(fuse_filp, passthrough_filp, false);
> > > +
> > > + } else {
> > > + ret = -EIO;
> > > + }
> > > +
> > > + return ret;
> > > +}
> > > +
> > > +ssize_t fuse_passthrough_write_iter(struct kiocb *iocb_fuse,
> > > + struct iov_iter *iter)
> > > +{
> > > + ssize_t ret;
> > > + struct file *fuse_filp = iocb_fuse->ki_filp;
> > > + struct fuse_file *ff = fuse_filp->private_data;
> > > + struct inode *fuse_inode = file_inode(fuse_filp);
> > > + struct file *passthrough_filp = ff->passthrough_filp;
> > > +
> > > + if (!iov_iter_count(iter))
> > > + return 0;
> > > +
> > > + inode_lock(fuse_inode);
> > > +
> > > + if (is_sync_kiocb(iocb_fuse)) {
> > > + struct kiocb iocb;
> > > +
> > > + kiocb_clone(&iocb, iocb_fuse, passthrough_filp);
> > > +
> > > + file_start_write(passthrough_filp);
> > > + ret = call_write_iter(passthrough_filp, &iocb, iter);
> >
> > Why not vfs_iter_write()/vfs_iter_read()?
> >
> > You are bypassing many internal VFS checks that seem pretty important.
> >
>
> I've been thinking a lot about this and decided to go for the VFS bypassing
> solution because:
> 1. it looked odd to me to perform VFS checks twice, both for FUSE and lower
> FS and it seemed to me that we found a tradeoff with Jann about doing
> this lower FS call, and
> 2. in our Android use case (I just saw you asking for more details about
> this in, I'll reply on the other thread), the user might have the right
> credentials to access the FUSE file system, but not to access the lower
> file system, so the VFS checkings would fail. So that would have created
> the need for a credential bypassing that looked hacky.
>
> But I agree and I would probably sleep better knowing that VFS checks are
> not skipped :) So I decided to implemented the vfs_iter_{read,write}()
> variant.
>
> I again picked a lot from the overlayfs solution. In a few words, I get the
> FUSE daemon credential reference at FUSE FS creation time and, when
> passthrough read/write operations are triggered, the kernel temporarily
> overrides the requesting process' credentials with those of the FUSE
> daemon. Credentials are reverted as soon as the operation completes.
>
> I have a temporary development branch where I'm developing the V9 of this
> patch, plus the VFS variant (git history may change):
>
> https://github.com/balsini/linux/tree/fuse-passthrough-stable-v5.8-v9-vfs
>
> For now, I'm happy to say that I like this VFS solution, it also simplified
> the lower file system notifications, so I'll probably go with this in the
> V9.
>
I am happy this direction was workable, not because the overlayfs solution
is perfect, but because it already has decent mileage running into strange
corner cases and fixing them.
But I also think it is better when fuse driver performs actions on behalf
of the server, that it uses the server's credential, because this way,
the passthrough fd code path behaves logically closer to the non-passthrough
code, only (hopefully) faster.
Thanks,
Amir.
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH V8 3/3] fuse: Handle AIO read and write in passthrough
2020-09-11 16:34 [PATCH V8 0/3] fuse: Add support for passthrough read/write Alessio Balsini
2020-09-11 16:34 ` [PATCH V8 1/3] fuse: Definitions and ioctl() for passthrough Alessio Balsini
2020-09-11 16:34 ` [PATCH V8 2/3] fuse: Introduce synchronous read and write " Alessio Balsini
@ 2020-09-11 16:34 ` Alessio Balsini
2020-09-11 17:23 ` Jens Axboe
2020-09-11 18:46 ` [fuse-devel] [PATCH V8 0/3] fuse: Add support for passthrough read/write Antonio SJ Musumeci
3 siblings, 1 reply; 17+ messages in thread
From: Alessio Balsini @ 2020-09-11 16:34 UTC (permalink / raw)
To: Miklos Szeredi
Cc: Akilesh Kailash, Amir Goldstein, David Anderson, Eric Yan,
Jann Horn, Jens Axboe, Martijn Coenen, Palmer Dabbelt,
Paul Lawrence, Stefano Duo, Zimuzo Ezeozue, fuse-devel,
kernel-team, linux-fsdevel, linux-kernel
Extend the passthrough feature by handling asynchronous IO both for read
and write operations.
When an AIO request is received, targeting a FUSE file with passthrough
functionality enabled, a new identical AIO request is created, the file
pointer of which is updated with the file pointer of the lower file system,
and the completion handler is set with a special AIO passthrough handler.
The lower file system AIO request is allocated in dynamic kernel memory
and, when it completes, the allocated memory is freed and the completion
signal is propagated to the FUSE AIO request by triggering its completion
callback as well.
Signed-off-by: Alessio Balsini <balsini@android.com>
---
fs/fuse/passthrough.c | 66 +++++++++++++++++++++++++++++++++++++++++--
1 file changed, 63 insertions(+), 3 deletions(-)
diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
index 44a78e02f45d..87b57b26fd8a 100644
--- a/fs/fuse/passthrough.c
+++ b/fs/fuse/passthrough.c
@@ -2,10 +2,16 @@
#include "fuse_i.h"
+#include <linux/aio.h>
#include <linux/fs_stack.h>
#include <linux/fsnotify.h>
#include <linux/uio.h>
+struct fuse_aio_req {
+ struct kiocb iocb;
+ struct kiocb *iocb_fuse;
+};
+
static void fuse_copyattr(struct file *dst_file, struct file *src_file,
bool write)
{
@@ -20,6 +26,32 @@ static void fuse_copyattr(struct file *dst_file, struct file *src_file,
}
}
+static void fuse_aio_cleanup_handler(struct fuse_aio_req *aio_req)
+{
+ struct kiocb *iocb = &aio_req->iocb;
+ struct kiocb *iocb_fuse = aio_req->iocb_fuse;
+ bool write = !!(iocb->ki_flags & IOCB_WRITE);
+
+ if (write) {
+ __sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb,
+ SB_FREEZE_WRITE);
+ file_end_write(iocb->ki_filp);
+ }
+
+ fuse_copyattr(iocb_fuse->ki_filp, iocb->ki_filp, write);
+ iocb_fuse->ki_pos = iocb->ki_pos;
+ kfree(aio_req);
+}
+
+static void fuse_aio_rw_complete(struct kiocb *iocb, long res, long res2)
+{
+ struct fuse_aio_req *aio_req =
+ container_of(iocb, struct fuse_aio_req, iocb);
+ struct kiocb *iocb_fuse = aio_req->iocb_fuse;
+
+ fuse_aio_cleanup_handler(aio_req);
+ iocb_fuse->ki_complete(iocb_fuse, res, res2);
+}
ssize_t fuse_passthrough_read_iter(struct kiocb *iocb_fuse,
struct iov_iter *iter)
@@ -42,7 +74,18 @@ ssize_t fuse_passthrough_read_iter(struct kiocb *iocb_fuse,
fuse_copyattr(fuse_filp, passthrough_filp, false);
} else {
- ret = -EIO;
+ struct fuse_aio_req *aio_req;
+
+ aio_req = kmalloc(sizeof(struct fuse_aio_req), GFP_KERNEL);
+ if (!aio_req)
+ return -ENOMEM;
+
+ aio_req->iocb_fuse = iocb_fuse;
+ kiocb_clone(&aio_req->iocb, iocb_fuse, passthrough_filp);
+ aio_req->iocb.ki_complete = fuse_aio_rw_complete;
+ ret = call_read_iter(passthrough_filp, &aio_req->iocb, iter);
+ if (ret != -EIOCBQUEUED)
+ fuse_aio_cleanup_handler(aio_req);
}
return ret;
@@ -56,6 +99,7 @@ ssize_t fuse_passthrough_write_iter(struct kiocb *iocb_fuse,
struct fuse_file *ff = fuse_filp->private_data;
struct inode *fuse_inode = file_inode(fuse_filp);
struct file *passthrough_filp = ff->passthrough_filp;
+ struct inode *passthrough_inode = file_inode(passthrough_filp);
if (!iov_iter_count(iter))
return 0;
@@ -75,9 +119,25 @@ ssize_t fuse_passthrough_write_iter(struct kiocb *iocb_fuse,
if (ret > 0)
fuse_copyattr(fuse_filp, passthrough_filp, true);
} else {
- ret = -EIO;
- }
+ struct fuse_aio_req *aio_req;
+ aio_req = kmalloc(sizeof(struct fuse_aio_req), GFP_KERNEL);
+ if (!aio_req) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ file_start_write(passthrough_filp);
+ __sb_writers_release(passthrough_inode->i_sb, SB_FREEZE_WRITE);
+
+ aio_req->iocb_fuse = iocb_fuse;
+ kiocb_clone(&aio_req->iocb, iocb_fuse, passthrough_filp);
+ aio_req->iocb.ki_complete = fuse_aio_rw_complete;
+ ret = call_write_iter(passthrough_filp, &aio_req->iocb, iter);
+ if (ret != -EIOCBQUEUED)
+ fuse_aio_cleanup_handler(aio_req);
+ }
+out:
inode_unlock(fuse_inode);
return ret;
--
2.28.0.618.gf4bc123cb7-goog
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH V8 3/3] fuse: Handle AIO read and write in passthrough
2020-09-11 16:34 ` [PATCH V8 3/3] fuse: Handle AIO read and write in passthrough Alessio Balsini
@ 2020-09-11 17:23 ` Jens Axboe
2020-09-21 15:28 ` Alessio Balsini
0 siblings, 1 reply; 17+ messages in thread
From: Jens Axboe @ 2020-09-11 17:23 UTC (permalink / raw)
To: Alessio Balsini, Miklos Szeredi
Cc: Akilesh Kailash, Amir Goldstein, David Anderson, Eric Yan,
Jann Horn, Martijn Coenen, Palmer Dabbelt, Paul Lawrence,
Stefano Duo, Zimuzo Ezeozue, fuse-devel, kernel-team,
linux-fsdevel, linux-kernel
On 9/11/20 10:34 AM, Alessio Balsini wrote:
> Extend the passthrough feature by handling asynchronous IO both for read
> and write operations.
> When an AIO request is received, targeting a FUSE file with passthrough
> functionality enabled, a new identical AIO request is created, the file
> pointer of which is updated with the file pointer of the lower file system,
> and the completion handler is set with a special AIO passthrough handler.
> The lower file system AIO request is allocated in dynamic kernel memory
> and, when it completes, the allocated memory is freed and the completion
> signal is propagated to the FUSE AIO request by triggering its completion
> callback as well.
>
> Signed-off-by: Alessio Balsini <balsini@android.com>
> ---
> fs/fuse/passthrough.c | 66 +++++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 63 insertions(+), 3 deletions(-)
>
> diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
> index 44a78e02f45d..87b57b26fd8a 100644
> --- a/fs/fuse/passthrough.c
> +++ b/fs/fuse/passthrough.c
> @@ -2,10 +2,16 @@
>
> #include "fuse_i.h"
>
> +#include <linux/aio.h>
What is this include for? It's not using any aio parts at all.
--
Jens Axboe
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH V8 3/3] fuse: Handle AIO read and write in passthrough
2020-09-11 17:23 ` Jens Axboe
@ 2020-09-21 15:28 ` Alessio Balsini
0 siblings, 0 replies; 17+ messages in thread
From: Alessio Balsini @ 2020-09-21 15:28 UTC (permalink / raw)
To: Jens Axboe
Cc: Alessio Balsini, Miklos Szeredi, Akilesh Kailash, Amir Goldstein,
David Anderson, Eric Yan, Jann Horn, Martijn Coenen,
Palmer Dabbelt, Paul Lawrence, Stefano Duo, Zimuzo Ezeozue,
fuse-devel, kernel-team, linux-fsdevel, linux-kernel
On Fri, Sep 11, 2020 at 11:23:22AM -0600, Jens Axboe wrote:
> On 9/11/20 10:34 AM, Alessio Balsini wrote:
> > Extend the passthrough feature by handling asynchronous IO both for read
> > and write operations.
> > When an AIO request is received, targeting a FUSE file with passthrough
> > functionality enabled, a new identical AIO request is created, the file
> > pointer of which is updated with the file pointer of the lower file system,
> > and the completion handler is set with a special AIO passthrough handler.
> > The lower file system AIO request is allocated in dynamic kernel memory
> > and, when it completes, the allocated memory is freed and the completion
> > signal is propagated to the FUSE AIO request by triggering its completion
> > callback as well.
> >
> > Signed-off-by: Alessio Balsini <balsini@android.com>
> > ---
> > fs/fuse/passthrough.c | 66 +++++++++++++++++++++++++++++++++++++++++--
> > 1 file changed, 63 insertions(+), 3 deletions(-)
> >
> > diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
> > index 44a78e02f45d..87b57b26fd8a 100644
> > --- a/fs/fuse/passthrough.c
> > +++ b/fs/fuse/passthrough.c
> > @@ -2,10 +2,16 @@
> >
> > #include "fuse_i.h"
> >
> > +#include <linux/aio.h>
>
> What is this include for? It's not using any aio parts at all.
>
> --
> Jens Axboe
>
Slipped from a cleanup. Fixed.
Thanks!
Alessio
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [fuse-devel] [PATCH V8 0/3] fuse: Add support for passthrough read/write
2020-09-11 16:34 [PATCH V8 0/3] fuse: Add support for passthrough read/write Alessio Balsini
` (2 preceding siblings ...)
2020-09-11 16:34 ` [PATCH V8 3/3] fuse: Handle AIO read and write in passthrough Alessio Balsini
@ 2020-09-11 18:46 ` Antonio SJ Musumeci
2020-09-18 16:03 ` Alessio Balsini
3 siblings, 1 reply; 17+ messages in thread
From: Antonio SJ Musumeci @ 2020-09-11 18:46 UTC (permalink / raw)
To: Alessio Balsini, Miklos Szeredi
Cc: Akilesh Kailash, Amir Goldstein, David Anderson, Eric Yan,
Jann Horn, Jens Axboe, Martijn Coenen, Palmer Dabbelt,
Paul Lawrence, Stefano Duo, Zimuzo Ezeozue, fuse-devel,
kernel-team, linux-fsdevel, linux-kernel
On 9/11/2020 12:34 PM, Alessio Balsini via fuse-devel wrote:
> Add support for file system passthrough read/write of files when enabled in
> userspace through the option FUSE_PASSTHROUGH.
Might be more effort than it is worth but any thoughts on userland error
handling for passthrough? My use case, optionally, responds to read or
write errors in particular ways. It's not an unreasonable tradeoff to
disable passthrough if the user wants those features but was wondering
if there was any consideration of extending the protocol to pass
read/write errors back to the fuse server.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [fuse-devel] [PATCH V8 0/3] fuse: Add support for passthrough read/write
2020-09-11 18:46 ` [fuse-devel] [PATCH V8 0/3] fuse: Add support for passthrough read/write Antonio SJ Musumeci
@ 2020-09-18 16:03 ` Alessio Balsini
0 siblings, 0 replies; 17+ messages in thread
From: Alessio Balsini @ 2020-09-18 16:03 UTC (permalink / raw)
To: Antonio SJ Musumeci
Cc: Alessio Balsini, Miklos Szeredi, Akilesh Kailash, Amir Goldstein,
David Anderson, Eric Yan, Jann Horn, Jens Axboe, Martijn Coenen,
Palmer Dabbelt, Paul Lawrence, Stefano Duo, Zimuzo Ezeozue,
fuse-devel, kernel-team, linux-fsdevel, linux-kernel
Hi Antonio,
It's indeed a great idea to notify the FUSE daemon in case of lower file
system exceptions, otherwise transparent when passthrough is enabled.
I was already planning to work on this feature as a future extension, glad
to see that this is of interest to the community.
Thanks for your feedback,
Alessio
On Fri, Sep 11, 2020 at 02:46:04PM -0400, Antonio SJ Musumeci wrote:
> On 9/11/2020 12:34 PM, Alessio Balsini via fuse-devel wrote:
> > Add support for file system passthrough read/write of files when enabled in
> > userspace through the option FUSE_PASSTHROUGH.
> Might be more effort than it is worth but any thoughts on userland error
> handling for passthrough? My use case, optionally, responds to read or write
> errors in particular ways. It's not an unreasonable tradeoff to disable
> passthrough if the user wants those features but was wondering if there was
> any consideration of extending the protocol to pass read/write errors back
> to the fuse server.
^ permalink raw reply [flat|nested] 17+ messages in thread