linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] fuse: Add support for fuse stacked I/O
       [not found] <565394BE.4040506@codeaurora.org>
@ 2016-01-13 23:53 ` Nikhilesh Reddy
  2016-01-14  4:57   ` Greg KH
                     ` (2 more replies)
  0 siblings, 3 replies; 19+ messages in thread
From: Nikhilesh Reddy @ 2016-01-13 23:53 UTC (permalink / raw)
  To: Miklos Szeredi, fuse-devel, linux-kernel, linux-api, gregkh,
	torvalds, linux-fsdevel, viro, Richard Weinberger,
	Theodore Ts'o, jack, Antonio SJ Musumeci, sven.utcke,
	Nikolaus Rath

Add support for filesystem stacked read/write of files
when enabled through a userspace init option of FUSE_STACKED_IO.

When FUSE_STACKED_IO is enabled all the reads and writes
to the fuse mount point go directly to the native filesystem
rather than through the fuse daemon. All requests that aren't
read/write still go thought the userspace code.

Mmaped I/O is still not supported through stacking and can be
added in.

This allows for significantly better performance on read and writes.
The difference in performance  between fuse and the native lower
filesystem is negligible.

There is also a significant cpu/power savings that is achieved which
is really important on embedded systems that use fuse for I/O.

Signed-off-by: Nikhilesh Reddy <reddyn@codeaurora.org>
---
  fs/fuse/Makefile          |   2 +-
  fs/fuse/dev.c             |   4 ++
  fs/fuse/dir.c             |   3 ++
  fs/fuse/file.c            |  37 +++++++++++++--
  fs/fuse/fuse_i.h          |  10 ++++
  fs/fuse/fuse_stacked.h    |  31 +++++++++++++
  fs/fuse/inode.c           |   5 ++
  fs/fuse/stacked_io.c      | 113
++++++++++++++++++++++++++++++++++++++++++++++
  include/uapi/linux/fuse.h |   3 +-
  9 files changed, 202 insertions(+), 6 deletions(-)
  create mode 100644 fs/fuse/fuse_stacked.h
  create mode 100644 fs/fuse/stacked_io.c

diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index e95eeb4..d4f3a26 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -5,4 +5,4 @@
  obj-$(CONFIG_FUSE_FS) += fuse.o
  obj-$(CONFIG_CUSE) += cuse.o

-fuse-objs := dev.o dir.o file.o inode.o control.o
+fuse-objs := dev.o dir.o file.o inode.o control.o stacked_io.o
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ebb5e37..ccef1a2 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -7,6 +7,7 @@
  */

  #include "fuse_i.h"
+#include "fuse_stacked.h"

  #include <linux/init.h>
  #include <linux/module.h>
@@ -569,6 +570,8 @@ ssize_t fuse_simple_request(struct fuse_conn *fc,
struct fuse_args *args)
  	if (!ret && args->out.argvar) {
  		BUG_ON(args->out.numargs != 1);
  		ret = req->out.args[0].size;
+		if (req->private_lower_rw_file != NULL)
+			args->out.lower_filp = req->private_lower_rw_file;
  	}
  	fuse_put_request(fc, req);

@@ -1934,6 +1937,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
  	err = copy_out_args(cs, &req->out, nbytes);
  	fuse_copy_finish(cs);

+	fuse_setup_stacked_io(fc, req);
  	spin_lock(&fpq->lock);
  	clear_bit(FR_LOCKED, &req->flags);
  	if (!fpq->connected)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 5e2e087..856a2e4 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -428,6 +428,7 @@ static int fuse_create_open(struct inode *dir,
struct dentry *entry,
  	args.out.args[0].value = &outentry;
  	args.out.args[1].size = sizeof(outopen);
  	args.out.args[1].value = &outopen;
+	args.out.lower_filp = NULL;
  	err = fuse_simple_request(fc, &args);
  	if (err)
  		goto out_free_ff;
@@ -439,6 +440,8 @@ static int fuse_create_open(struct inode *dir,
struct dentry *entry,
  	ff->fh = outopen.fh;
  	ff->nodeid = outentry.nodeid;
  	ff->open_flags = outopen.open_flags;
+	if (args.out.lower_filp != NULL)
+		ff->rw_lower_file = args.out.lower_filp;
  	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
  			  &outentry.attr, entry_attr_timeout(&outentry), 0);
  	if (!inode) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 47f1811..80b1ad7 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -7,6 +7,7 @@
  */

  #include "fuse_i.h"
+#include "fuse_stacked.h"

  #include <linux/pagemap.h>
  #include <linux/slab.h>
@@ -21,8 +22,10 @@
  static const struct file_operations fuse_direct_io_file_operations;

  static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct
file *file,
-			  int opcode, struct fuse_open_out *outargp)
+			  int opcode, struct fuse_open_out *outargp,
+			  struct file **lower_filepp)
  {
+	int ret_val;
  	struct fuse_open_in inarg;
  	FUSE_ARGS(args);

@@ -38,8 +41,14 @@ static int fuse_send_open(struct fuse_conn *fc, u64
nodeid, struct file *file,
  	args.out.numargs = 1;
  	args.out.args[0].size = sizeof(*outargp);
  	args.out.args[0].value = outargp;
+	args.out.lower_filp = NULL;

-	return fuse_simple_request(fc, &args);
+	ret_val = fuse_simple_request(fc, &args);
+
+	if (args.out.lower_filp != NULL)
+		*lower_filepp = args.out.lower_filp;
+
+	return ret_val;
  }

  struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
@@ -50,6 +59,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
  	if (unlikely(!ff))
  		return NULL;

+	ff->rw_lower_file = NULL;
  	ff->fc = fc;
  	ff->reserved_req = fuse_request_alloc(0);
  	if (unlikely(!ff->reserved_req)) {
@@ -117,6 +127,7 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid,
struct file *file,
  		 bool isdir)
  {
  	struct fuse_file *ff;
+	struct file *lower_file;
  	int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;

  	ff = fuse_file_alloc(fc);
@@ -129,10 +140,12 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid,
struct file *file,
  		struct fuse_open_out outarg;
  		int err;

-		err = fuse_send_open(fc, nodeid, file, opcode, &outarg);
+		err = fuse_send_open(fc, nodeid, file, opcode, &outarg,
+				     &(lower_file));
  		if (!err) {
  			ff->fh = outarg.fh;
  			ff->open_flags = outarg.open_flags;
+			ff->rw_lower_file = lower_file;

  		} else if (err != -ENOSYS || isdir) {
  			fuse_file_free(ff);
@@ -252,6 +265,8 @@ void fuse_release_common(struct file *file, int opcode)
  	if (unlikely(!ff))
  		return;

+	fuse_stacked_release(ff);
+
  	req = ff->reserved_req;
  	fuse_prepare_release(ff, file->f_flags, opcode);

@@ -896,8 +911,10 @@ out:

  static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter
*to)
  {
+	ssize_t ret_val;
  	struct inode *inode = iocb->ki_filp->f_mapping->host;
  	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_file *ff = iocb->ki_filp->private_data;

  	/*
  	 * In auto invalidate mode, always update attributes on read.
@@ -912,7 +929,12 @@ static ssize_t fuse_file_read_iter(struct kiocb
*iocb, struct iov_iter *to)
  			return err;
  	}

-	return generic_file_read_iter(iocb, to);
+	if (ff && ff->rw_lower_file)
+		ret_val = fuse_stacked_read_iter(iocb, to);
+	else
+		ret_val = generic_file_read_iter(iocb, to);
+
+	return ret_val;
  }

  static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
@@ -1144,6 +1166,7 @@ static ssize_t fuse_perform_write(struct file *file,
  static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct
iov_iter *from)
  {
  	struct file *file = iocb->ki_filp;
+	struct fuse_file *ff = file->private_data;
  	struct address_space *mapping = file->f_mapping;
  	ssize_t written = 0;
  	ssize_t written_buffered = 0;
@@ -1177,8 +1200,14 @@ static ssize_t fuse_file_write_iter(struct kiocb
*iocb, struct iov_iter *from)
  	if (err)
  		goto out;

+	if (ff && ff->rw_lower_file) {
+		written = fuse_stacked_write_iter(iocb, from);
+		goto out;
+	}
+
  	if (iocb->ki_flags & IOCB_DIRECT) {
  		loff_t pos = iocb->ki_pos;
+
  		written = generic_file_direct_write(iocb, from, pos);
  		if (written < 0 || !iov_iter_count(from))
  			goto out;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ce394b5..3b9d3e3 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -157,6 +157,9 @@ struct fuse_file {

  	/** Has flock been performed on this file? */
  	bool flock:1;
+
+	/* the read write file */
+	struct file *rw_lower_file;
  };

  /** One input argument of a request */
@@ -236,6 +239,7 @@ struct fuse_args {
  		unsigned argvar:1;
  		unsigned numargs;
  		struct fuse_arg args[2];
+		struct file *lower_filp;
  	} out;
  };

@@ -374,6 +378,9 @@ struct fuse_req {

  	/** Request is stolen from fuse_file->reserved_req */
  	struct file *stolen_file;
+
+	/** fuse stacked_io lower file  */
+	struct file *private_lower_rw_file;
  };

  struct fuse_iqueue {
@@ -531,6 +538,9 @@ struct fuse_conn {
  	/** write-back cache policy (default is write-through) */
  	unsigned writeback_cache:1;

+	/** Stacked IO. */
+	unsigned stacked_io:1;
+
  	/*
  	 * The following bitfields are only for optimization purposes
  	 * and hence races in setting them will not cause malfunction
diff --git a/fs/fuse/fuse_stacked.h b/fs/fuse/fuse_stacked.h
new file mode 100644
index 0000000..e56d7be
--- /dev/null
+++ b/fs/fuse/fuse_stacked.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _FS_FUSE_STACKED_H
+#define _FS_FUSE_STACKED_H
+
+#include "fuse_i.h"
+
+#include <linux/fuse.h>
+#include <linux/file.h>
+
+void fuse_setup_stacked_io(struct fuse_conn *fc, struct fuse_req *req);
+
+ssize_t fuse_stacked_read_iter(struct kiocb *iocb, struct iov_iter *to);
+
+ssize_t fuse_stacked_write_iter(struct kiocb *iocb, struct iov_iter *from);
+
+void fuse_stacked_release(struct fuse_file *ff);
+
+#endif /* _FS_FUSE_STACKED_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 2913db2..bfc4f3f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -898,6 +898,11 @@ static void process_init_reply(struct fuse_conn
*fc, struct fuse_req *req)
  				fc->async_dio = 1;
  			if (arg->flags & FUSE_WRITEBACK_CACHE)
  				fc->writeback_cache = 1;
+			if (arg->flags & FUSE_STACKED_IO) {
+				fc->stacked_io = 1;
+				pr_info("FUSE: Stacked io is enabled [%s : %d]!\n",
+					current->comm, current->pid);
+			}
  			if (arg->time_gran && arg->time_gran <= 1000000000)
  				fc->sb->s_time_gran = arg->time_gran;
  		} else {
diff --git a/fs/fuse/stacked_io.c b/fs/fuse/stacked_io.c
new file mode 100644
index 0000000..07a72302
--- /dev/null
+++ b/fs/fuse/stacked_io.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "fuse_stacked.h"
+
+#include <linux/aio.h>
+#include <linux/fs_stack.h>
+
+void fuse_setup_stacked_io(struct fuse_conn *fc, struct fuse_req *req)
+{
+	int daemon_fd;
+	struct file *rw_lower_file = NULL;
+	struct fuse_open_out *open_out;
+	int open_out_index;
+
+	req->private_lower_rw_file = NULL;
+
+	if (!(fc->stacked_io))
+		return;
+
+	if ((req->in.h.opcode != FUSE_OPEN) &&
+	    (req->in.h.opcode != FUSE_CREATE))
+		return;
+
+	open_out_index = req->in.numargs - 1;
+
+	BUG_ON(open_out_index != 0 && open_out_index != 1);
+	BUG_ON(req->out.args[open_out_index].size != sizeof(*open_out));
+
+	open_out = req->out.args[open_out_index].value;
+
+	daemon_fd = (int)open_out->lower_fd;
+	if (daemon_fd < 0)
+		return;
+
+	rw_lower_file = fget_raw(daemon_fd);
+	if (!rw_lower_file)
+		return;
+	req->private_lower_rw_file = rw_lower_file;
+}
+
+static ssize_t fuse_stacked_read_write_iter(struct kiocb *iocb,
+					    struct iov_iter *iter, int do_write)
+{
+	ssize_t ret_val;
+	struct fuse_file *ff;
+	struct file *fuse_file, *lower_file;
+	struct inode *fuse_inode, *lower_inode;
+
+	ff = iocb->ki_filp->private_data;
+	fuse_file = iocb->ki_filp;
+	lower_file = ff->rw_lower_file;
+
+	/* lock lower file to prevent it from being released */
+	get_file(lower_file);
+	iocb->ki_filp = lower_file;
+	fuse_inode = fuse_file->f_path.dentry->d_inode;
+	lower_inode = file_inode(lower_file);
+
+	if (do_write) {
+		if (!lower_file->f_op->write_iter)
+			return -EIO;
+		ret_val = lower_file->f_op->write_iter(iocb, iter);
+
+		if (ret_val >= 0 || ret_val == -EIOCBQUEUED) {
+			fsstack_copy_inode_size(fuse_inode, lower_inode);
+			fsstack_copy_attr_times(fuse_inode, lower_inode);
+		}
+	} else {
+		if (!lower_file->f_op->read_iter)
+			return -EIO;
+		ret_val = lower_file->f_op->read_iter(iocb, iter);
+		if (ret_val >= 0 || ret_val == -EIOCBQUEUED)
+			fsstack_copy_attr_atime(fuse_inode, lower_inode);
+	}
+
+	iocb->ki_filp = fuse_file;
+	fput(lower_file);
+	/* unlock lower file */
+
+	return ret_val;
+}
+
+ssize_t fuse_stacked_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+	return fuse_stacked_read_write_iter(iocb, to, 0);
+}
+
+ssize_t fuse_stacked_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	return fuse_stacked_read_write_iter(iocb, from, 1);
+}
+
+void fuse_stacked_release(struct fuse_file *ff)
+{
+	if (!(ff->rw_lower_file))
+		return;
+
+	/* Release the lower file. */
+	fput(ff->rw_lower_file);
+	ff->rw_lower_file = NULL;
+}
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 5974fae..16061d2 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -253,6 +253,7 @@ struct fuse_file_lock {
  #define FUSE_ASYNC_DIO		(1 << 15)
  #define FUSE_WRITEBACK_CACHE	(1 << 16)
  #define FUSE_NO_OPEN_SUPPORT	(1 << 17)
+#define FUSE_STACKED_IO		(1 << 18)

  /**
   * CUSE INIT request/reply flags
@@ -484,7 +485,7 @@ struct fuse_create_in {
  struct fuse_open_out {
  	uint64_t	fh;
  	uint32_t	open_flags;
-	uint32_t	padding;
+	int32_t         lower_fd;/* lower layer file descriptor */
  };

  struct fuse_release_in {
-- 
1.8.2.1


-- 
Thanks
Nikhilesh Reddy

Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH] fuse: Add support for fuse stacked I/O
  2016-01-13 23:53 ` [PATCH] fuse: Add support for fuse stacked I/O Nikhilesh Reddy
@ 2016-01-14  4:57   ` Greg KH
  2016-01-14 18:57     ` Nikhilesh Reddy
  2016-01-14  8:25   ` Christoph Hellwig
  2016-01-19  3:07   ` Jann Horn
  2 siblings, 1 reply; 19+ messages in thread
From: Greg KH @ 2016-01-14  4:57 UTC (permalink / raw)
  To: Nikhilesh Reddy
  Cc: Miklos Szeredi, fuse-devel, linux-kernel, linux-api, torvalds,
	linux-fsdevel, viro, Richard Weinberger, Theodore Ts'o, jack,
	Antonio SJ Musumeci, sven.utcke, Nikolaus Rath

On Wed, Jan 13, 2016 at 03:53:10PM -0800, Nikhilesh Reddy wrote:
> Add support for filesystem stacked read/write of files
> when enabled through a userspace init option of FUSE_STACKED_IO.
> 
> When FUSE_STACKED_IO is enabled all the reads and writes
> to the fuse mount point go directly to the native filesystem
> rather than through the fuse daemon. All requests that aren't
> read/write still go thought the userspace code.
> 
> Mmaped I/O is still not supported through stacking and can be
> added in.
> 
> This allows for significantly better performance on read and writes.
> The difference in performance  between fuse and the native lower
> filesystem is negligible.
> 
> There is also a significant cpu/power savings that is achieved which
> is really important on embedded systems that use fuse for I/O.
> 
> Signed-off-by: Nikhilesh Reddy <reddyn@codeaurora.org>
> ---
>  fs/fuse/Makefile          |   2 +-
>  fs/fuse/dev.c             |   4 ++
>  fs/fuse/dir.c             |   3 ++
>  fs/fuse/file.c            |  37 +++++++++++++--
>  fs/fuse/fuse_i.h          |  10 ++++
>  fs/fuse/fuse_stacked.h    |  31 +++++++++++++
>  fs/fuse/inode.c           |   5 ++
>  fs/fuse/stacked_io.c      | 113
> ++++++++++++++++++++++++++++++++++++++++++++++

Your patch is line-wrapped and impossible to apply.

And why cc: me?  This isn't my area of the kernel...

greg k-h

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] fuse: Add support for fuse stacked I/O
  2016-01-13 23:53 ` [PATCH] fuse: Add support for fuse stacked I/O Nikhilesh Reddy
  2016-01-14  4:57   ` Greg KH
@ 2016-01-14  8:25   ` Christoph Hellwig
  2016-01-14 15:52     ` Nikolaus Rath
  2016-01-19  3:07   ` Jann Horn
  2 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2016-01-14  8:25 UTC (permalink / raw)
  To: Nikhilesh Reddy
  Cc: Miklos Szeredi, fuse-devel, linux-kernel, linux-api, gregkh,
	torvalds, linux-fsdevel, viro, Richard Weinberger,
	Theodore Ts'o, jack, Antonio SJ Musumeci, sven.utcke,
	Nikolaus Rath

On Wed, Jan 13, 2016 at 03:53:10PM -0800, Nikhilesh Reddy wrote:
> Add support for filesystem stacked read/write of files
> when enabled through a userspace init option of FUSE_STACKED_IO.
> 
> When FUSE_STACKED_IO is enabled all the reads and writes
> to the fuse mount point go directly to the native filesystem
> rather than through the fuse daemon. All requests that aren't
> read/write still go thought the userspace code.

Please write your stacked features as kernel drivers.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] fuse: Add support for fuse stacked I/O
  2016-01-14  8:25   ` Christoph Hellwig
@ 2016-01-14 15:52     ` Nikolaus Rath
  2016-01-15  9:29       ` Christoph Hellwig
  0 siblings, 1 reply; 19+ messages in thread
From: Nikolaus Rath @ 2016-01-14 15:52 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Nikhilesh Reddy, Miklos Szeredi, fuse-devel, linux-kernel,
	linux-api, gregkh, torvalds, linux-fsdevel, viro,
	Richard Weinberger, Theodore Ts'o, jack, Antonio SJ Musumeci,
	sven.utcke

On Jan 14 2016, Christoph Hellwig <hch@infradead.org> wrote:
> On Wed, Jan 13, 2016 at 03:53:10PM -0800, Nikhilesh Reddy wrote:
>> Add support for filesystem stacked read/write of files
>> when enabled through a userspace init option of FUSE_STACKED_IO.
>> 
>> When FUSE_STACKED_IO is enabled all the reads and writes
>> to the fuse mount point go directly to the native filesystem
>> rather than through the fuse daemon. All requests that aren't
>> read/write still go thought the userspace code.
>
> Please write your stacked features as kernel drivers.

Could you explain in more detail? I think I don't understand either the
why, the how, or what you mean with driver in this context (not sure
which one it is). 


(I'm not the original submitter but interested in this)


Best,
-Nikolaus

-- 
GPG encrypted emails preferred. Key id: 0xD113FCAC3C4E599F
Fingerprint: ED31 791B 2C5C 1613 AF38 8B8A D113 FCAC 3C4E 599F

             »Time flies like an arrow, fruit flies like a Banana.«

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] fuse: Add support for fuse stacked I/O
  2016-01-14  4:57   ` Greg KH
@ 2016-01-14 18:57     ` Nikhilesh Reddy
  2016-01-14 19:19       ` Linus Torvalds
  0 siblings, 1 reply; 19+ messages in thread
From: Nikhilesh Reddy @ 2016-01-14 18:57 UTC (permalink / raw)
  To: Greg KH
  Cc: Miklos Szeredi, fuse-devel, linux-kernel, linux-api, torvalds,
	linux-fsdevel, viro, Theodore Ts'o, jack,
	Antonio SJ Musumeci, sven.utcke, Nikolaus Rath,
	Richard Weinberger


>
> Your patch is line-wrapped and impossible to apply.
>
> And why cc: me?  This isn't my area of the kernel...
>
> greg k-h
>

Thanks for trying to apply the patch
and i am extremely  sorry for mangling the lines ( my config was broken)
I cc-ed you and a few other ppl to help with the review and the merge.
I am sorry if i bothered you.
But i would still be grateful for your comments


*I will send out a properly formatted patch shortly.*

To quote Nikolaus Rath
(the current maintainer of fuse userspace[libfuse]) :

[BEGIN QUOTE]
"Officially Miklos is still the maintainer, but I understand he has
almost no time for FUSE at the moment (neither kernel nor userspace). I
have taken over the userspace components, but as far as I know no one
stepped up for the kernel work.

The FUSE kernel components are maintained in the mainline Linux kernel
tree (and have been for quite some time), so nothing is being moved.

The way to get changes into the FUSE kernel module is the same procedure
as for any other kernel code: try to convince a maintainer to commit
your code. This doesn't need to be Miklos. If there's really no
maintainer available to review your patch, you can also try to send it
to Linus directly."
[END QUOTE]

I am hoping for some help from the maintainers to help get the patch 
merged. I have seen a lot of interest on the patch from the fuse 
community and would like to work with the kernel maintainers to help get 
the patch merged into the kernel.

*I will send out a properly formatted patch shortly.* sorry for the bad 
patch
-- 
Thanks
Nikhilesh Reddy

Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] fuse: Add support for fuse stacked I/O
  2016-01-14 18:57     ` Nikhilesh Reddy
@ 2016-01-14 19:19       ` Linus Torvalds
  2016-01-15 16:31         ` Andy Lutomirski
  0 siblings, 1 reply; 19+ messages in thread
From: Linus Torvalds @ 2016-01-14 19:19 UTC (permalink / raw)
  To: Nikhilesh Reddy
  Cc: Greg KH, Miklos Szeredi, fuse-devel, Linux Kernel Mailing List,
	Linux API, linux-fsdevel, Al Viro, Theodore Ts'o, Jan Kara,
	Antonio SJ Musumeci, sven.utcke, Nikolaus Rath,
	Richard Weinberger

On Thu, Jan 14, 2016 at 10:57 AM, Nikhilesh Reddy <reddyn@codeaurora.org> wrote:
>
> *I will send out a properly formatted patch shortly.*

So before you do that, can you explain in what cases fuse just wants
to pass through the IO?

Why aren't such cases just using unionfs or something?

In other words, I think that patch needs an explanation for the
*reason* for the new functionality.

               Linus

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] fuse: Add support for fuse stacked I/O
  2016-01-14 15:52     ` Nikolaus Rath
@ 2016-01-15  9:29       ` Christoph Hellwig
  0 siblings, 0 replies; 19+ messages in thread
From: Christoph Hellwig @ 2016-01-15  9:29 UTC (permalink / raw)
  To: Christoph Hellwig, Nikhilesh Reddy, Miklos Szeredi, fuse-devel,
	linux-kernel, linux-api, gregkh, torvalds, linux-fsdevel, viro,
	Richard Weinberger, Theodore Ts'o, jack, Antonio SJ Musumeci,
	sven.utcke

On Thu, Jan 14, 2016 at 07:52:00AM -0800, Nikolaus Rath wrote:
> > Please write your stacked features as kernel drivers.
> 
> Could you explain in more detail? I think I don't understand either the
> why, the how, or what you mean with driver in this context (not sure
> which one it is). 
> 
> 
> (I'm not the original submitter but interested in this)


If someone is doing stacked pass through I/O, which basically is some
form of unioning or file hiding it should be using overlayfs or a new
kernel stackable file system.  That's not to say it could be prototyped
entirely in userspace, but it's not something we should add any bypasses
for.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] fuse: Add support for fuse stacked I/O
  2016-01-14 19:19       ` Linus Torvalds
@ 2016-01-15 16:31         ` Andy Lutomirski
       [not found]           ` <CAB3-ZyT+xcU-0BnNvq-+GsDe6W8=MCWwxguS-+KQ39gajmM5pw@mail.gmail.com>
  0 siblings, 1 reply; 19+ messages in thread
From: Andy Lutomirski @ 2016-01-15 16:31 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: fuse-devel, Al Viro, Greg KH, Nikhilesh Reddy, linux-fsdevel,
	Nikolaus Rath, Linux API, Jan Kara, Theodore Ts'o,
	sven.utcke, Antonio SJ Musumeci, Miklos Szeredi,
	Richard Weinberger, Linux Kernel Mailing List

On Jan 14, 2016 11:19 AM, "Linus Torvalds"
<torvalds@linux-foundation.org> wrote:
>
> On Thu, Jan 14, 2016 at 10:57 AM, Nikhilesh Reddy <reddyn@codeaurora.org> wrote:
> >
> > *I will send out a properly formatted patch shortly.*
>
> So before you do that, can you explain in what cases fuse just wants
> to pass through the IO?
>
> Why aren't such cases just using unionfs or something?
>
> In other words, I think that patch needs an explanation for the
> *reason* for the new functionality.

Also, clarifying the functionality would be nice.  What is the
"underlying" fs you speak of?  What f_cred gets used?  Who opens the
underlying file, whatever that is?

Is the intent overlay-style stacking or is it something else entirely?

--Andy

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] fuse: Add support for fuse stacked I/O
       [not found]           ` <CAB3-ZyT+xcU-0BnNvq-+GsDe6W8=MCWwxguS-+KQ39gajmM5pw@mail.gmail.com>
@ 2016-01-15 17:51             ` Nikolaus Rath
  2016-01-15 19:29               ` Nikhilesh Reddy
  0 siblings, 1 reply; 19+ messages in thread
From: Nikolaus Rath @ 2016-01-15 17:51 UTC (permalink / raw)
  To: Antonio SJ Musumeci
  Cc: Andy Lutomirski, Linus Torvalds, fuse-devel, Al Viro, Greg KH,
	Nikhilesh Reddy, linux-fsdevel, Linux API, Jan Kara,
	Theodore Ts'o, sven.utcke, Miklos Szeredi,
	Richard Weinberger, Linux Kernel Mailing List

On Jan 15 2016, Antonio SJ Musumeci <trapexit@spawn.link> wrote:
> The idea is that you want to be able to reason about open, create, etc. but
> don't care about the data transfer.
>
> I have N filesystems I wish to unionize. When I create a new file I want to
> pick the drive with the most free space (or some other algo). creat is
> called, succeeds, and now the application issuing this starts writing. The
> FUSE fs doesn't care about the writes. It just wanted to pick the drive
> this file should have been created on. Anything I'd do with the FD after
> that I'm happy to short circuit. I don't need to be asked what to do when
> fstat'ing this FD or anything which in FUSE hands over the 'fh'. It's just
> a file descriptor for me and I'd simply be calling the same function.
>
> Ideally I think one would want to be able to select which functions to
> short circuit and maybe even have it so that a short circuited function
> could propagate back through FUSE on error. But the read and write short
> circuiting is probably the biggest win given the overhead.


I think you should avoid using the term "stacked" completely (which
would also make Christoph happy). There have been several discussions in
the past about adding a "fd delegation" function to FUSE. Generally, the
idea is that the FUSE userspace code tells the FUSE kernel module to
internally "delegate" to writes and reads for a given file (or even a
range in that file) to a different file descriptor provided by
userspace.

I think that function would be useful, and not just for union file
systems. There are many FUSE file systems that end up writing the data
into some other file on the disk without doing any transformations on
the data itself. Especially with the range feature, they would all
benefit from the ability to delegate reads and writes.

However, Miklos has said in the past that the performance gain from this
is very small. You can get almost as good a result by splicing from one
fd to the other in userspace. In that case this function could actually
be implemented completely in libfuse.


Do you have any benchmark results that compare a splice-in-userspace
approach with your patch?


Best,
-Nikolaus

-- 
GPG encrypted emails preferred. Key id: 0xD113FCAC3C4E599F
Fingerprint: ED31 791B 2C5C 1613 AF38 8B8A D113 FCAC 3C4E 599F

             »Time flies like an arrow, fruit flies like a Banana.«

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] fuse: Add support for fuse stacked I/O
  2016-01-15 17:51             ` Nikolaus Rath
@ 2016-01-15 19:29               ` Nikhilesh Reddy
  2016-01-15 21:38                 ` Nikolaus Rath
                                   ` (2 more replies)
  0 siblings, 3 replies; 19+ messages in thread
From: Nikhilesh Reddy @ 2016-01-15 19:29 UTC (permalink / raw)
  To: Nikolaus Rath
  Cc: Andy Lutomirski, Linus Torvalds, fuse-devel, Al Viro, Greg KH,
	linux-fsdevel, Linux API, Jan Kara, Theodore Ts'o,
	sven.utcke, Miklos Szeredi, Richard Weinberger,
	Linux Kernel Mailing List, Antonio SJ Musumeci

On Fri 15 Jan 2016 09:51:50 AM PST, Nikolaus Rath wrote:
> On Jan 15 2016, Antonio SJ Musumeci <trapexit@spawn.link> wrote:
>> The idea is that you want to be able to reason about open, create, etc. but
>> don't care about the data transfer.
>>
>> I have N filesystems I wish to unionize. When I create a new file I want to
>> pick the drive with the most free space (or some other algo). creat is
>> called, succeeds, and now the application issuing this starts writing. The
>> FUSE fs doesn't care about the writes. It just wanted to pick the drive
>> this file should have been created on. Anything I'd do with the FD after
>> that I'm happy to short circuit. I don't need to be asked what to do when
>> fstat'ing this FD or anything which in FUSE hands over the 'fh'. It's just
>> a file descriptor for me and I'd simply be calling the same function.
>>
>> Ideally I think one would want to be able to select which functions to
>> short circuit and maybe even have it so that a short circuited function
>> could propagate back through FUSE on error. But the read and write short
>> circuiting is probably the biggest win given the overhead.
>
>
> I think you should avoid using the term "stacked" completely (which
> would also make Christoph happy). There have been several discussions in
> the past about adding a "fd delegation" function to FUSE. Generally, the
> idea is that the FUSE userspace code tells the FUSE kernel module to
> internally "delegate" to writes and reads for a given file (or even a
> range in that file) to a different file descriptor provided by
> userspace.
>
> I think that function would be useful, and not just for union file
> systems. There are many FUSE file systems that end up writing the data
> into some other file on the disk without doing any transformations on
> the data itself. Especially with the range feature, they would all
> benefit from the ability to delegate reads and writes.
>
> However, Miklos has said in the past that the performance gain from this
> is very small. You can get almost as good a result by splicing from one
> fd to the other in userspace. In that case this function could actually
> be implemented completely in libfuse.
>
>
> Do you have any benchmark results that compare a splice-in-userspace
> approach with your patch?
>
>
> Best,
> -Nikolaus
>


Hi

@Linus
Thanks for taking the time to reply to my email. It means a lot.

FUSE allows users to implement extensions to filesystems ..such as enforcing policy or permissions without having to modify the kernel or maintain the policy in the kernel.

One such example is what was quoted by Antonio above .. 
Another example is a fuse based filesystem that tries to enforce additional permissions on a FAT based mount point.

>From what i could google there are many FUSE based filesystems out there that do things during the open call but simply pass through the read/and write I/O calls to the local "lower" filesystem where they actually store the data.

>From what i understand ...unionfs or overlayfs and similar filesystems are primarily used to support a merged or unified view of directories and do not offer mechanisms to add policy or other checks /extensions to the I/O operations without modifying the kernel..

The main motivation is to make FUSE performance better in such usecases without loosing out on the ease of implementing and extending in the userspace.



@Nikolaus
Our local benchmarks on embedded devices (where power and cpu usage is critical) show that splice doesnt help as much .. when running multiple cpu's results in increased power usage

The below results are on a specific device model.

Where IOPS is number of 4K based read or writes that could be performed each second.

                                 regular         spliced         Stacked I/O
sequencial write (MiBPS)	56.55633333	100.34445       141.7096667
sequencial read (MiBPS)	        49.644	        60.43434        122.367

random write (IOPS)	        2554.333333	4053.4545       8572
random read (IOPS)	        977.3333333	1223.34         1432.666667

The above tests were performed using a file size of 1GB

Using stacked I/O showed the best performance (almost the same as the native EXT4 filesystem that is storing the real file)

Also we measured that there is a 5% saving of Power and the CPU timeslices used. ( Splice did not improve this at all compared to default fuse)

Random I/O i.e seeking to random parts of a file and reading ( usecases such as elf and *.so loading from fuse based filesystems also improved 


Similarly when using MMAPED I/O ( in an extended patch to this one.. still in progress) showed a significant improvement about a 400% improvement over default fuse.

Also we can called it FUSE_DELEGATED_IO if that helps :).
I chose to call is stacked i/o since we are technically stacking the fuse read/writes on the ext4/fat or other filesystems.

Please let me know if you have any questions.

@everyone
Thanks so much for your comments and the interest.
Also many of you have shown support for the patch in private emails.
I would be grateful if you could voice the same support on the public thread so that everyone knows that there is interest in this patch.


-- 
Thanks
Nikhilesh Reddy

Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] fuse: Add support for fuse stacked I/O
  2016-01-15 19:29               ` Nikhilesh Reddy
@ 2016-01-15 21:38                 ` Nikolaus Rath
  2016-01-15 21:43                 ` Linus Torvalds
  2016-01-15 21:56                 ` [fuse-devel] " Hans Beckerus
  2 siblings, 0 replies; 19+ messages in thread
From: Nikolaus Rath @ 2016-01-15 21:38 UTC (permalink / raw)
  To: Nikhilesh Reddy
  Cc: Andy Lutomirski, Linus Torvalds, fuse-devel, Al Viro, Greg KH,
	linux-fsdevel, Linux API, Jan Kara, Theodore Ts'o,
	sven.utcke, Miklos Szeredi, Richard Weinberger,
	Linux Kernel Mailing List, Antonio SJ Musumeci

On Jan 15 2016, Nikhilesh Reddy <reddyn@codeaurora.org> wrote:
> Our local benchmarks on embedded devices (where power and cpu usage is
> critical) show that splice doesnt help as much .. when running
> multiple cpu's results in increased power usage
>
> The below results are on a specific device model.
>
> Where IOPS is number of 4K based read or writes that could be performed each second.
>
>                                  regular         spliced         Stacked I/O
> sequencial write (MiBPS)	56.55633333	100.34445       141.7096667
> sequencial read (MiBPS)	        49.644	        60.43434        122.367
>
> random write (IOPS)	        2554.333333	4053.4545       8572
> random read (IOPS)	        977.3333333	1223.34         1432.666667
>
> The above tests were performed using a file size of 1GB

That looks convincing to me.

> Also we can called it FUSE_DELEGATED_IO if that helps :).
> I chose to call is stacked i/o since we are technically stacking the
> fuse read/writes on the ext4/fat or other filesystems.

Yeah, but "stacked" has a pretty strong association with union/overlay
file systems (as you have seen), and the feature is not at all specific
to that usecase.

For example, many FUSE file systems store data in some remote server
over the network but cache it on a local disk. The access to the cached
data would benefit from the feature under discussion, but I have a hard
time thinking of such a FUSE file system being "stacked" on the file
system that happens to hold its cache.


Best,
-Nikolaus

-- 
GPG encrypted emails preferred. Key id: 0xD113FCAC3C4E599F
Fingerprint: ED31 791B 2C5C 1613 AF38 8B8A D113 FCAC 3C4E 599F

             »Time flies like an arrow, fruit flies like a Banana.«

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] fuse: Add support for fuse stacked I/O
  2016-01-15 19:29               ` Nikhilesh Reddy
  2016-01-15 21:38                 ` Nikolaus Rath
@ 2016-01-15 21:43                 ` Linus Torvalds
  2016-01-15 21:46                   ` Andy Lutomirski
  2016-01-15 21:56                 ` [fuse-devel] " Hans Beckerus
  2 siblings, 1 reply; 19+ messages in thread
From: Linus Torvalds @ 2016-01-15 21:43 UTC (permalink / raw)
  To: Nikhilesh Reddy
  Cc: Nikolaus Rath, Andy Lutomirski, fuse-devel, Al Viro, Greg KH,
	linux-fsdevel, Linux API, Jan Kara, Theodore Ts'o,
	sven.utcke, Miklos Szeredi, Richard Weinberger,
	Linux Kernel Mailing List, Antonio SJ Musumeci

On Fri, Jan 15, 2016 at 11:29 AM, Nikhilesh Reddy <reddyn@codeaurora.org> wrote:
>
> FUSE allows users to implement extensions to filesystems ..such as enforcing policy or permissions without having to modify the kernel or maintain the policy in the kernel.
>
> One such example is what was quoted by Antonio above ..
> Another example is a fuse based filesystem that tries to enforce additional permissions on a FAT based mount point.
>
> From what i could google there are many FUSE based filesystems out there that do things during the open call but simply pass through the read/and write I/O calls to the local "lower" filesystem where they actually store the data.

So I think these are valid use-cases, and I just think that they should

 (a) be documented in the commit message as explanations of why people
would do this/

 (b) not be called "stacked", because that tends to have some other
connotations to fs people.

I don't know what a better term would be, but you yourself used "pass
through". Maybe that (perhaps together with a clarification that it's
a per-file thing) might work fine.

Btw, why is mmap not passed through? That sounds fairly simple and
straightforward, I'm not seeing why it would be missing.

                 Linus

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] fuse: Add support for fuse stacked I/O
  2016-01-15 21:43                 ` Linus Torvalds
@ 2016-01-15 21:46                   ` Andy Lutomirski
  2016-01-15 21:53                     ` Linus Torvalds
  0 siblings, 1 reply; 19+ messages in thread
From: Andy Lutomirski @ 2016-01-15 21:46 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Nikhilesh Reddy, Nikolaus Rath, fuse-devel, Al Viro, Greg KH,
	linux-fsdevel, Linux API, Jan Kara, Theodore Ts'o,
	sven.utcke, Miklos Szeredi, Richard Weinberger,
	Linux Kernel Mailing List, Antonio SJ Musumeci

On Fri, Jan 15, 2016 at 1:43 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
> On Fri, Jan 15, 2016 at 11:29 AM, Nikhilesh Reddy <reddyn@codeaurora.org> wrote:
>>
>> FUSE allows users to implement extensions to filesystems ..such as enforcing policy or permissions without having to modify the kernel or maintain the policy in the kernel.
>>
>> One such example is what was quoted by Antonio above ..
>> Another example is a fuse based filesystem that tries to enforce additional permissions on a FAT based mount point.
>>
>> From what i could google there are many FUSE based filesystems out there that do things during the open call but simply pass through the read/and write I/O calls to the local "lower" filesystem where they actually store the data.
>
> So I think these are valid use-cases, and I just think that they should
>
>  (a) be documented in the commit message as explanations of why people
> would do this/
>
>  (b) not be called "stacked", because that tends to have some other
> connotations to fs people.
>
> I don't know what a better term would be, but you yourself used "pass
> through". Maybe that (perhaps together with a clarification that it's
> a per-file thing) might work fine.
>
> Btw, why is mmap not passed through? That sounds fairly simple and
> straightforward, I'm not seeing why it would be missing.
>

If mmap sets vm_file to the underlying thing, wouldn't CRIU and
anything else that uses map_files get confused?  Or did you have
something else in mind?

--Andy

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] fuse: Add support for fuse stacked I/O
  2016-01-15 21:46                   ` Andy Lutomirski
@ 2016-01-15 21:53                     ` Linus Torvalds
  2016-01-19  2:57                       ` Nikhilesh Reddy
  0 siblings, 1 reply; 19+ messages in thread
From: Linus Torvalds @ 2016-01-15 21:53 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Nikhilesh Reddy, Nikolaus Rath, fuse-devel, Al Viro, Greg KH,
	linux-fsdevel, Linux API, Jan Kara, Theodore Ts'o,
	sven.utcke, Miklos Szeredi, Richard Weinberger,
	Linux Kernel Mailing List, Antonio SJ Musumeci

On Fri, Jan 15, 2016 at 1:46 PM, Andy Lutomirski <luto@amacapital.net> wrote:

> If mmap sets vm_file to the underlying thing, wouldn't CRIU and
> anything else that uses map_files get confused?  Or did you have
> something else in mind?

Why would they care?

Also, I don't think you actually need to change vm_file - we have this
whole notion of "inode->i_data" vs "inode->i_mapping".

So I think you could set the "i_mapping" of the fuse inode to be the
i_mapping of the passed-through-to inode, and it should just work.

And no, I didn't look into this very deeply, and maybe there is some
annoying detail that would make that not work well. But that's part of
the whole point of the i_mapping indirection: so that you can share
the page cache when you have two separate anchor points. I think coda
uses it for the local caching, and block devices use it to not have
mapping aliases between different inodex that all are the same block
device.

                 Linus

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [fuse-devel] [PATCH] fuse: Add support for fuse stacked I/O
  2016-01-15 19:29               ` Nikhilesh Reddy
  2016-01-15 21:38                 ` Nikolaus Rath
  2016-01-15 21:43                 ` Linus Torvalds
@ 2016-01-15 21:56                 ` Hans Beckerus
  2 siblings, 0 replies; 19+ messages in thread
From: Hans Beckerus @ 2016-01-15 21:56 UTC (permalink / raw)
  To: Nikhilesh Reddy, Nikolaus Rath
  Cc: Jan Kara, Richard Weinberger, Miklos Szeredi, fuse-devel,
	Greg KH, Linux Kernel Mailing List, Andy Lutomirski, sven.utcke,
	Al Viro, Linux API, linux-fsdevel, Theodore Ts'o,
	Linus Torvalds

On 2016-01-15 8:29, Nikhilesh Reddy wrote:
> On Fri 15 Jan 2016 09:51:50 AM PST, Nikolaus Rath wrote:
>> On Jan 15 2016, Antonio SJ Musumeci <trapexit@spawn.link> wrote:
>>> The idea is that you want to be able to reason about open, create, etc. but
>>> don't care about the data transfer.
>>>
>>> I have N filesystems I wish to unionize. When I create a new file I want to
>>> pick the drive with the most free space (or some other algo). creat is
>>> called, succeeds, and now the application issuing this starts writing. The
>>> FUSE fs doesn't care about the writes. It just wanted to pick the drive
>>> this file should have been created on. Anything I'd do with the FD after
>>> that I'm happy to short circuit. I don't need to be asked what to do when
>>> fstat'ing this FD or anything which in FUSE hands over the 'fh'. It's just
>>> a file descriptor for me and I'd simply be calling the same function.
>>>
>>> Ideally I think one would want to be able to select which functions to
>>> short circuit and maybe even have it so that a short circuited function
>>> could propagate back through FUSE on error. But the read and write short
>>> circuiting is probably the biggest win given the overhead.
>>
>> I think you should avoid using the term "stacked" completely (which
>> would also make Christoph happy). There have been several discussions in
>> the past about adding a "fd delegation" function to FUSE. Generally, the
>> idea is that the FUSE userspace code tells the FUSE kernel module to
>> internally "delegate" to writes and reads for a given file (or even a
>> range in that file) to a different file descriptor provided by
>> userspace.
>>
>> I think that function would be useful, and not just for union file
>> systems. There are many FUSE file systems that end up writing the data
>> into some other file on the disk without doing any transformations on
>> the data itself. Especially with the range feature, they would all
>> benefit from the ability to delegate reads and writes.
I agree with Nikolaus here. I do believe there might be use-cases that 
could benefit from this.
I have a typical example were a FUSE fs wish to handle reads but really 
does not care about the writes other than
it should transparently write to the underlying fs. Simply getting a 
move of a file from the underlying fs to the
FUSE mount point if located on e.g. the same physical partition would 
result in a more or less instant operation, right?
But this also requires that the operations are selectable. A user should 
be able to choose which operation to bypass.
I understand though that this will need adaptations to libfuse as well.
Another question here is if an inotify write-type watch on the FUSE 
mount point will be affected by this or not?

>> However, Miklos has said in the past that the performance gain from this
>> is very small. You can get almost as good a result by splicing from one
>> fd to the other in userspace. In that case this function could actually
>> be implemented completely in libfuse.
>>
>>
>> Do you have any benchmark results that compare a splice-in-userspace
>> approach with your patch?
>>
>>
>> Best,
>> -Nikolaus
>>
>
> Hi
>
> @Linus
> Thanks for taking the time to reply to my email. It means a lot.
>
> FUSE allows users to implement extensions to filesystems ..such as enforcing policy or permissions without having to modify the kernel or maintain the policy in the kernel.
>
> One such example is what was quoted by Antonio above ..
> Another example is a fuse based filesystem that tries to enforce additional permissions on a FAT based mount point.
>
> >From what i could google there are many FUSE based filesystems out there that do things during the open call but simply pass through the read/and write I/O calls to the local "lower" filesystem where they actually store the data.
>
> >From what i understand ...unionfs or overlayfs and similar filesystems are primarily used to support a merged or unified view of directories and do not offer mechanisms to add policy or other checks /extensions to the I/O operations without modifying the kernel..
>
> The main motivation is to make FUSE performance better in such usecases without loosing out on the ease of implementing and extending in the userspace.
>
>
>
> @Nikolaus
> Our local benchmarks on embedded devices (where power and cpu usage is critical) show that splice doesnt help as much .. when running multiple cpu's results in increased power usage
>
> The below results are on a specific device model.
>
> Where IOPS is number of 4K based read or writes that could be performed each second.
>
>                                   regular         spliced         Stacked I/O
> sequencial write (MiBPS)	56.55633333	100.34445       141.7096667
> sequencial read (MiBPS)	        49.644	        60.43434        122.367
>
> random write (IOPS)	        2554.333333	4053.4545       8572
> random read (IOPS)	        977.3333333	1223.34         1432.666667
>
> The above tests were performed using a file size of 1GB
>
> Using stacked I/O showed the best performance (almost the same as the native EXT4 filesystem that is storing the real file)
>
> Also we measured that there is a 5% saving of Power and the CPU timeslices used. ( Splice did not improve this at all compared to default fuse)
>
> Random I/O i.e seeking to random parts of a file and reading ( usecases such as elf and *.so loading from fuse based filesystems also improved
>
>
> Similarly when using MMAPED I/O ( in an extended patch to this one.. still in progress) showed a significant improvement about a 400% improvement over default fuse.
>
> Also we can called it FUSE_DELEGATED_IO if that helps :).
> I chose to call is stacked i/o since we are technically stacking the fuse read/writes on the ext4/fat or other filesystems.
>
> Please let me know if you have any questions.
>
> @everyone
> Thanks so much for your comments and the interest.
> Also many of you have shown support for the patch in private emails.
> I would be grateful if you could voice the same support on the public thread so that everyone knows that there is interest in this patch.
>
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] fuse: Add support for fuse stacked I/O
  2016-01-15 21:53                     ` Linus Torvalds
@ 2016-01-19  2:57                       ` Nikhilesh Reddy
  2016-01-20 23:24                         ` Nikhilesh Reddy
  0 siblings, 1 reply; 19+ messages in thread
From: Nikhilesh Reddy @ 2016-01-19  2:57 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andy Lutomirski, Nikolaus Rath, fuse-devel, Al Viro, Greg KH,
	linux-fsdevel, Linux API, Jan Kara, Theodore Ts'o,
	sven.utcke, Miklos Szeredi, Richard Weinberger,
	Linux Kernel Mailing List, Antonio SJ Musumeci

On 01/15/2016 01:53 PM, Linus Torvalds wrote:
> On Fri, Jan 15, 2016 at 1:46 PM, Andy Lutomirski <luto@amacapital.net> wrote:
>
>> If mmap sets vm_file to the underlying thing, wouldn't CRIU and
>> anything else that uses map_files get confused?  Or did you have
>> something else in mind?
>
> Why would they care?
>
> Also, I don't think you actually need to change vm_file - we have this
> whole notion of "inode->i_data" vs "inode->i_mapping".
>
> So I think you could set the "i_mapping" of the fuse inode to be the
> i_mapping of the passed-through-to inode, and it should just work.
>
> And no, I didn't look into this very deeply, and maybe there is some
> annoying detail that would make that not work well. But that's part of
> the whole point of the i_mapping indirection: so that you can share
> the page cache when you have two separate anchor points. I think coda
> uses it for the local caching, and block devices use it to not have
> mapping aliases between different inodex that all are the same block
> device.
>
>                   Linus
>

Hi

Thanks for your support.

I am looking  into adding the mmap by change the i_mmaping but seems to 
be getting stuck for some stress tests.

Once i get that debugged and working i will definitely send out a new 
patch with that functionality.

( Sending it as a second patch will help make it easier to go through 
the legal redtape and procedures i am forced to follow).

For now i am going to update the current patch and call it "passthrough" 
as you suggested and also update the commit message giving  a clearer 
explanation of the motivation.

Once i get it tested I can send it out for consideration and review

Please do let me know if you have any questions or concerns on this and 
i will try my best to address them all.


-- 
Thanks
Nikhilesh Reddy

Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] fuse: Add support for fuse stacked I/O
  2016-01-13 23:53 ` [PATCH] fuse: Add support for fuse stacked I/O Nikhilesh Reddy
  2016-01-14  4:57   ` Greg KH
  2016-01-14  8:25   ` Christoph Hellwig
@ 2016-01-19  3:07   ` Jann Horn
  2016-01-20 23:07     ` Nikhilesh Reddy
  2 siblings, 1 reply; 19+ messages in thread
From: Jann Horn @ 2016-01-19  3:07 UTC (permalink / raw)
  To: Nikhilesh Reddy
  Cc: Miklos Szeredi, fuse-devel, linux-kernel, linux-api, gregkh,
	torvalds, linux-fsdevel, viro, Richard Weinberger,
	Theodore Ts'o, jack, Antonio SJ Musumeci, sven.utcke,
	Nikolaus Rath

2016-01-14 0:53 GMT+01:00 Nikhilesh Reddy <reddyn@codeaurora.org>:
> Add support for filesystem stacked read/write of files
> when enabled through a userspace init option of FUSE_STACKED_IO.
>
> When FUSE_STACKED_IO is enabled all the reads and writes
> to the fuse mount point go directly to the native filesystem
> rather than through the fuse daemon. All requests that aren't
> read/write still go thought the userspace code.

Maybe I missed it, but how does this guard against kernel stack
overflow and how does it interact with the "sb->s_stack_depth >
FILESYSTEM_MAX_STACK_DEPTH" stacking limit that overlayfs and ecryptfs
use?

As far as I can tell from a quick glance, someone could just stack
lots of FUSE files on top of each other and cause kernel stack
overflow that way, and that's nasty.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] fuse: Add support for fuse stacked I/O
  2016-01-19  3:07   ` Jann Horn
@ 2016-01-20 23:07     ` Nikhilesh Reddy
  0 siblings, 0 replies; 19+ messages in thread
From: Nikhilesh Reddy @ 2016-01-20 23:07 UTC (permalink / raw)
  To: Jann Horn
  Cc: Miklos Szeredi, fuse-devel, linux-kernel, linux-api, gregkh,
	torvalds, linux-fsdevel, viro, Richard Weinberger,
	Theodore Ts'o, jack, Antonio SJ Musumeci, sven.utcke,
	Nikolaus Rath

On 01/18/2016 07:07 PM, Jann Horn wrote:
> 2016-01-14 0:53 GMT+01:00 Nikhilesh Reddy <reddyn@codeaurora.org>:
>> Add support for filesystem stacked read/write of files
>> when enabled through a userspace init option of FUSE_STACKED_IO.
>>
>> When FUSE_STACKED_IO is enabled all the reads and writes
>> to the fuse mount point go directly to the native filesystem
>> rather than through the fuse daemon. All requests that aren't
>> read/write still go thought the userspace code.
>
> Maybe I missed it, but how does this guard against kernel stack
> overflow and how does it interact with the "sb->s_stack_depth >
> FILESYSTEM_MAX_STACK_DEPTH" stacking limit that overlayfs and ecryptfs
> use?
>
> As far as I can tell from a quick glance, someone could just stack
> lots of FUSE files on top of each other and cause kernel stack
> overflow that way, and that's nasty.
>
Hi
Thanks so much for your comment and for catching this.
I have fixed the code to prevent further stacking and will send it out 
in the updated version of the patch ( now called fuse passthrough ).

-- 
Thanks
Nikhilesh Reddy

Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] fuse: Add support for fuse stacked I/O
  2016-01-19  2:57                       ` Nikhilesh Reddy
@ 2016-01-20 23:24                         ` Nikhilesh Reddy
  0 siblings, 0 replies; 19+ messages in thread
From: Nikhilesh Reddy @ 2016-01-20 23:24 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andy Lutomirski, Nikolaus Rath, fuse-devel, Al Viro, Greg KH,
	linux-fsdevel, Linux API, Jan Kara, Theodore Ts'o,
	sven.utcke, Miklos Szeredi, Richard Weinberger,
	Linux Kernel Mailing List, Antonio SJ Musumeci

On 01/18/2016 06:57 PM, Nikhilesh Reddy wrote:
> On 01/15/2016 01:53 PM, Linus Torvalds wrote:
>> On Fri, Jan 15, 2016 at 1:46 PM, Andy Lutomirski <luto@amacapital.net>
>> wrote:
>>
>>> If mmap sets vm_file to the underlying thing, wouldn't CRIU and
>>> anything else that uses map_files get confused?  Or did you have
>>> something else in mind?
>>
>> Why would they care?
>>
>> Also, I don't think you actually need to change vm_file - we have this
>> whole notion of "inode->i_data" vs "inode->i_mapping".
>>
>> So I think you could set the "i_mapping" of the fuse inode to be the
>> i_mapping of the passed-through-to inode, and it should just work.
>>
>> And no, I didn't look into this very deeply, and maybe there is some
>> annoying detail that would make that not work well. But that's part of
>> the whole point of the i_mapping indirection: so that you can share
>> the page cache when you have two separate anchor points. I think coda
>> uses it for the local caching, and block devices use it to not have
>> mapping aliases between different inodex that all are the same block
>> device.
>>
>>                   Linus
>>
>
> Hi
>
> Thanks for your support.
>
> I am looking  into adding the mmap by change the i_mmaping but seems to
> be getting stuck for some stress tests.
>
> Once i get that debugged and working i will definitely send out a new
> patch with that functionality.
>
> ( Sending it as a second patch will help make it easier to go through
> the legal redtape and procedures i am forced to follow).
>
> For now i am going to update the current patch and call it "passthrough"
> as you suggested and also update the commit message giving  a clearer
> explanation of the motivation.
>
> Once i get it tested I can send it out for consideration and review
>
> Please do let me know if you have any questions or concerns on this and
> i will try my best to address them all.
>
>
I have just sent out the latest version of the patch in the email with 
the subject
[PATCH v3] fuse: Add support for passthrough read/write

Please do let me know if you have any questions or concerns on this and
i will try my best to address them all.

-- 
Thanks
Nikhilesh Reddy

Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2016-01-20 23:24 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <565394BE.4040506@codeaurora.org>
2016-01-13 23:53 ` [PATCH] fuse: Add support for fuse stacked I/O Nikhilesh Reddy
2016-01-14  4:57   ` Greg KH
2016-01-14 18:57     ` Nikhilesh Reddy
2016-01-14 19:19       ` Linus Torvalds
2016-01-15 16:31         ` Andy Lutomirski
     [not found]           ` <CAB3-ZyT+xcU-0BnNvq-+GsDe6W8=MCWwxguS-+KQ39gajmM5pw@mail.gmail.com>
2016-01-15 17:51             ` Nikolaus Rath
2016-01-15 19:29               ` Nikhilesh Reddy
2016-01-15 21:38                 ` Nikolaus Rath
2016-01-15 21:43                 ` Linus Torvalds
2016-01-15 21:46                   ` Andy Lutomirski
2016-01-15 21:53                     ` Linus Torvalds
2016-01-19  2:57                       ` Nikhilesh Reddy
2016-01-20 23:24                         ` Nikhilesh Reddy
2016-01-15 21:56                 ` [fuse-devel] " Hans Beckerus
2016-01-14  8:25   ` Christoph Hellwig
2016-01-14 15:52     ` Nikolaus Rath
2016-01-15  9:29       ` Christoph Hellwig
2016-01-19  3:07   ` Jann Horn
2016-01-20 23:07     ` Nikhilesh Reddy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).