linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch 0/3] fuse: implement missing functionality on "direct_io" files
@ 2009-03-30 20:00 Miklos Szeredi
  2009-03-30 20:02 ` [patch 1/3] fuse: allow kernel to access " Miklos Szeredi
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Miklos Szeredi @ 2009-03-30 20:00 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: jens.axboe, linux-kernel

Fuse allows filesystems to bypass the page cache using the "direct_io"
flag.  This is not the same as O_DIRECT, it's dictated by the
filesystem not the application.

Some functionality didn't work in this mode, however.  This short
patchset improves that:

 1. fuse: allow kernel to access "direct_io" files
 2. fuse: allow private mappings of "direct_io" files
 3. splice: implement default splice_read method

The third patch is not fuse specific, it implements ->splice_read()
for any filesystem which doesn't provide one.

Comments are welcome.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [patch 1/3] fuse: allow kernel to access "direct_io" files
  2009-03-30 20:00 [patch 0/3] fuse: implement missing functionality on "direct_io" files Miklos Szeredi
@ 2009-03-30 20:02 ` Miklos Szeredi
  2009-03-30 20:03 ` [patch 2/3] fuse: allow private mappings of " Miklos Szeredi
  2009-03-30 20:04 ` [patch 3/3] splice: implement default splice_read method Miklos Szeredi
  2 siblings, 0 replies; 6+ messages in thread
From: Miklos Szeredi @ 2009-03-30 20:02 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: linux-kernel

From: Miklos Szeredi <mszeredi@suse.cz>

Allow the kernel read and write on "direct_io" files.  This is
necessary for nfs export and execute support.

The implementation is simple: if an access from the kernel is
detected, don't perform get_user_pages(), just use the kernel address
provided by the requester to copy from/to the userspace filesystem.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dir.c  |    1 +
 fs/fuse/file.c |   53 ++++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 41 insertions(+), 13 deletions(-)

Index: linux-2.6/fs/fuse/file.c
===================================================================
--- linux-2.6.orig/fs/fuse/file.c	2009-03-25 10:30:52.000000000 +0100
+++ linux-2.6/fs/fuse/file.c	2009-03-25 10:35:22.000000000 +0100
@@ -386,7 +386,6 @@ void fuse_read_fill(struct fuse_req *req
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(struct fuse_read_in);
 	req->in.args[0].value = inarg;
-	req->out.argpages = 1;
 	req->out.argvar = 1;
 	req->out.numargs = 1;
 	req->out.args[0].size = count;
@@ -453,6 +452,7 @@ static int fuse_readpage(struct file *fi
 	attr_ver = fuse_get_attr_version(fc);
 
 	req->out.page_zeroing = 1;
+	req->out.argpages = 1;
 	req->num_pages = 1;
 	req->pages[0] = page;
 	num_read = fuse_send_read(req, file, inode, pos, count, NULL);
@@ -510,6 +510,8 @@ static void fuse_send_readpages(struct f
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	loff_t pos = page_offset(req->pages[0]);
 	size_t count = req->num_pages << PAGE_CACHE_SHIFT;
+
+	req->out.argpages = 1;
 	req->out.page_zeroing = 1;
 	fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
 	req->misc.read.attr_ver = fuse_get_attr_version(fc);
@@ -621,7 +623,6 @@ static void fuse_write_fill(struct fuse_
 	inarg->flags = file ? file->f_flags : 0;
 	req->in.h.opcode = FUSE_WRITE;
 	req->in.h.nodeid = get_node_id(inode);
-	req->in.argpages = 1;
 	req->in.numargs = 2;
 	if (fc->minor < 9)
 		req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
@@ -695,6 +696,7 @@ static int fuse_buffered_write(struct fi
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
+	req->in.argpages = 1;
 	req->num_pages = 1;
 	req->pages[0] = page;
 	req->page_offset = offset;
@@ -771,6 +773,7 @@ static ssize_t fuse_fill_write_pages(str
 	size_t count = 0;
 	int err;
 
+	req->in.argpages = 1;
 	req->page_offset = offset;
 
 	do {
@@ -935,21 +938,28 @@ static void fuse_release_user_pages(stru
 }
 
 static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
-			       unsigned nbytes, int write)
+			       unsigned *nbytesp, int write)
 {
+	unsigned nbytes = *nbytesp;
 	unsigned long user_addr = (unsigned long) buf;
 	unsigned offset = user_addr & ~PAGE_MASK;
 	int npages;
 
-	/* This doesn't work with nfsd */
-	if (!current->mm)
-		return -EPERM;
+	/* Special case for kernel I/O: can copy directly into the buffer */
+	if (segment_eq(get_fs(), KERNEL_DS)) {
+		if (write)
+			req->in.args[1].value = (void *) user_addr;
+		else
+			req->out.args[0].value = (void *) user_addr;
+
+		return 0;
+	}
 
 	nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
 	npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ);
 	down_read(&current->mm->mmap_sem);
-	npages = get_user_pages(current, current->mm, user_addr, npages, write,
+	npages = get_user_pages(current, current->mm, user_addr, npages, !write,
 				0, req->pages, NULL);
 	up_read(&current->mm->mmap_sem);
 	if (npages < 0)
@@ -957,6 +967,15 @@ static int fuse_get_user_pages(struct fu
 
 	req->num_pages = npages;
 	req->page_offset = offset;
+
+	if (write)
+		req->in.argpages = 1;
+	else
+		req->out.argpages = 1;
+
+	nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset;
+	*nbytesp = min(*nbytesp, nbytes);
+
 	return 0;
 }
 
@@ -979,15 +998,13 @@ static ssize_t fuse_direct_io(struct fil
 
 	while (count) {
 		size_t nres;
-		size_t nbytes_limit = min(count, nmax);
-		size_t nbytes;
-		int err = fuse_get_user_pages(req, buf, nbytes_limit, !write);
+		size_t nbytes = min(count, nmax);
+		int err = fuse_get_user_pages(req, buf, &nbytes, write);
 		if (err) {
 			res = err;
 			break;
 		}
-		nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset;
-		nbytes = min(nbytes_limit, nbytes);
+
 		if (write)
 			nres = fuse_send_write(req, file, inode, pos, nbytes,
 					       current->files);
@@ -1163,6 +1180,7 @@ static int fuse_writepage_locked(struct
 	fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1);
 
 	copy_highpage(tmp_page, page);
+	req->in.argpages = 1;
 	req->num_pages = 1;
 	req->pages[0] = tmp_page;
 	req->page_offset = 0;
Index: linux-2.6/fs/fuse/dir.c
===================================================================
--- linux-2.6.orig/fs/fuse/dir.c	2009-03-25 10:31:06.000000000 +0100
+++ linux-2.6/fs/fuse/dir.c	2009-03-25 10:31:10.000000000 +0100
@@ -1032,6 +1032,7 @@ static int fuse_readdir(struct file *fil
 		fuse_put_request(fc, req);
 		return -ENOMEM;
 	}
+	req->out.argpages = 1;
 	req->num_pages = 1;
 	req->pages[0] = page;
 	fuse_read_fill(req, file, inode, file->f_pos, PAGE_SIZE, FUSE_READDIR);

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [patch 2/3] fuse: allow private mappings of "direct_io" files
  2009-03-30 20:00 [patch 0/3] fuse: implement missing functionality on "direct_io" files Miklos Szeredi
  2009-03-30 20:02 ` [patch 1/3] fuse: allow kernel to access " Miklos Szeredi
@ 2009-03-30 20:03 ` Miklos Szeredi
  2009-03-30 20:04 ` [patch 3/3] splice: implement default splice_read method Miklos Szeredi
  2 siblings, 0 replies; 6+ messages in thread
From: Miklos Szeredi @ 2009-03-30 20:03 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: linux-kernel

From: Miklos Szeredi <mszeredi@suse.cz>

Allow MAP_PRIVATE mmaps of "direct_io" files.  This is necessary for
execute support.

MAP_SHARED mappings require some sort of coherency between the
underlying file and the mapping.  With "direct_io" it is difficult to
provide this, so for the moment just disallow shared (read-write and
read-only) mappings altogether.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/file.c |   12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

Index: linux-2.6/fs/fuse/file.c
===================================================================
--- linux-2.6.orig/fs/fuse/file.c	2009-03-30 18:47:55.000000000 +0200
+++ linux-2.6/fs/fuse/file.c	2009-03-30 19:07:11.000000000 +0200
@@ -1291,6 +1291,15 @@ static int fuse_file_mmap(struct file *f
 	return 0;
 }
 
+static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	/* Can't provide the coherency needed for MAP_SHARED */
+	if (vma->vm_flags & VM_MAYSHARE)
+		return -ENODEV;
+
+	return generic_file_mmap(file, vma);
+}
+
 static int convert_fuse_file_lock(const struct fuse_file_lock *ffl,
 				  struct file_lock *fl)
 {
@@ -1925,6 +1934,7 @@ static const struct file_operations fuse
 	.llseek		= fuse_file_llseek,
 	.read		= fuse_direct_read,
 	.write		= fuse_direct_write,
+	.mmap		= fuse_direct_mmap,
 	.open		= fuse_open,
 	.flush		= fuse_flush,
 	.release	= fuse_release,
@@ -1934,7 +1944,7 @@ static const struct file_operations fuse
 	.unlocked_ioctl	= fuse_file_ioctl,
 	.compat_ioctl	= fuse_file_compat_ioctl,
 	.poll		= fuse_file_poll,
-	/* no mmap and splice_read */
+	/* no splice_read */
 };
 
 static const struct address_space_operations fuse_file_aops  = {

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [patch 3/3] splice: implement default splice_read method
  2009-03-30 20:00 [patch 0/3] fuse: implement missing functionality on "direct_io" files Miklos Szeredi
  2009-03-30 20:02 ` [patch 1/3] fuse: allow kernel to access " Miklos Szeredi
  2009-03-30 20:03 ` [patch 2/3] fuse: allow private mappings of " Miklos Szeredi
@ 2009-03-30 20:04 ` Miklos Szeredi
  2009-03-30 20:59   ` Brad Boyer
  2 siblings, 1 reply; 6+ messages in thread
From: Miklos Szeredi @ 2009-03-30 20:04 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: jens.axboe, linux-kernel

From: Miklos Szeredi <mszeredi@suse.cz>

If f_op->splice_read() is not implemented fall back to a plain read.
Use vfs_readv() to read into previously allocated pages.

This will allow splice and functions using splice, such as the loop
device, to work on all filesystems.  This includes "direct_io" files
in fuse which bypass the page cache.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/pipe.c                 |   14 +++++
 fs/splice.c               |  120 ++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/fs.h        |    2 
 include/linux/pipe_fs_i.h |    2 
 4 files changed, 133 insertions(+), 5 deletions(-)

Index: linux-2.6/fs/pipe.c
===================================================================
--- linux-2.6.orig/fs/pipe.c	2009-03-30 21:11:58.000000000 +0200
+++ linux-2.6/fs/pipe.c	2009-03-30 21:22:06.000000000 +0200
@@ -268,6 +268,20 @@ int generic_pipe_buf_confirm(struct pipe
 	return 0;
 }
 
+/**
+ * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
+ * @pipe:	the pipe that the buffer belongs to
+ * @buf:	the buffer to put a reference to
+ *
+ * Description:
+ *	This function releases a reference to @buf.
+ */
+void generic_pipe_buf_release(struct pipe_inode_info *pipe,
+			      struct pipe_buffer *buf)
+{
+	page_cache_release(buf->page);
+}
+
 static const struct pipe_buf_operations anon_pipe_buf_ops = {
 	.can_merge = 1,
 	.map = generic_pipe_buf_map,
Index: linux-2.6/fs/splice.c
===================================================================
--- linux-2.6.orig/fs/splice.c	2009-03-30 21:11:58.000000000 +0200
+++ linux-2.6/fs/splice.c	2009-03-30 21:22:06.000000000 +0200
@@ -509,9 +509,116 @@ ssize_t generic_file_splice_read(struct
 
 	return ret;
 }
-
 EXPORT_SYMBOL(generic_file_splice_read);
 
+static const struct pipe_buf_operations default_pipe_buf_ops = {
+	.can_merge = 0,
+	.map = generic_pipe_buf_map,
+	.unmap = generic_pipe_buf_unmap,
+	.confirm = generic_pipe_buf_confirm,
+	.release = generic_pipe_buf_release,
+	.steal = generic_pipe_buf_steal,
+	.get = generic_pipe_buf_get,
+};
+
+static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
+			    unsigned long vlen, loff_t offset)
+{
+	mm_segment_t old_fs;
+	loff_t pos = offset;
+	ssize_t res;
+
+	old_fs = get_fs();
+	set_fs(get_ds());
+	/* The cast to a user pointer is valid due to the set_fs() */
+	res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos);
+	set_fs(old_fs);
+
+	return res;
+}
+
+ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
+				 struct pipe_inode_info *pipe, size_t len,
+				 unsigned int flags)
+{
+	unsigned int nr_pages;
+	unsigned int nr_freed;
+	size_t offset;
+	struct page *pages[PIPE_BUFFERS];
+	struct partial_page partial[PIPE_BUFFERS];
+	struct iovec vec[PIPE_BUFFERS];
+	pgoff_t index;
+	ssize_t res;
+	size_t this_len;
+	int error;
+	int i;
+	struct splice_pipe_desc spd = {
+		.pages = pages,
+		.partial = partial,
+		.flags = flags,
+		.ops = &default_pipe_buf_ops,
+		.spd_release = spd_release_page,
+	};
+
+	index = *ppos >> PAGE_CACHE_SHIFT;
+	offset = *ppos & ~PAGE_CACHE_MASK;
+	nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+	for (i = 0; i < nr_pages && i < PIPE_BUFFERS && len; i++) {
+		struct page *page;
+
+		page = alloc_page(GFP_HIGHUSER);
+		error = -ENOMEM;
+		if (!page)
+			goto err;
+
+		this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset);
+		vec[i].iov_base = (void __user *) kmap(page);
+		vec[i].iov_len = this_len;
+		pages[i] = page;
+		spd.nr_pages++;
+		len -= this_len;
+		offset = 0;
+	}
+
+	res = kernel_readv(in, vec, spd.nr_pages, *ppos);
+	if (res < 0)
+		goto err;
+
+	error = 0;
+	if (!res)
+		goto err;
+
+	nr_freed = 0;
+	for (i = 0; i < spd.nr_pages; i++) {
+		kunmap(pages[i]);
+		this_len = min_t(size_t, vec[i].iov_len, res);
+		partial[i].offset = 0;
+		partial[i].len = this_len;
+		if (!this_len) {
+			__free_page(pages[i]);
+			pages[i] = NULL;
+			nr_freed++;
+		}
+		res -= this_len;
+	}
+	spd.nr_pages -= nr_freed;
+
+	res = splice_to_pipe(pipe, &spd);
+	if (res > 0)
+		*ppos += res;
+
+	return res;
+
+err:
+	for (i = 0; i < spd.nr_pages; i++) {
+		kunmap(pages[i]);
+		__free_page(pages[i]);
+	}
+	return error;
+}
+EXPORT_SYMBOL(default_file_splice_read);
+
 /*
  * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
  * using sendpage(). Return the number of bytes sent.
@@ -916,11 +1023,10 @@ static long do_splice_to(struct file *in
 			 struct pipe_inode_info *pipe, size_t len,
 			 unsigned int flags)
 {
+	ssize_t (*splice_read)(struct file *, loff_t *,
+			       struct pipe_inode_info *, size_t, unsigned int);
 	int ret;
 
-	if (unlikely(!in->f_op || !in->f_op->splice_read))
-		return -EINVAL;
-
 	if (unlikely(!(in->f_mode & FMODE_READ)))
 		return -EBADF;
 
@@ -928,7 +1034,11 @@ static long do_splice_to(struct file *in
 	if (unlikely(ret < 0))
 		return ret;
 
-	return in->f_op->splice_read(in, ppos, pipe, len, flags);
+	splice_read = in->f_op->splice_read;
+	if (!splice_read)
+		splice_read = default_file_splice_read;
+
+	return splice_read(in, ppos, pipe, len, flags);
 }
 
 /**
Index: linux-2.6/include/linux/pipe_fs_i.h
===================================================================
--- linux-2.6.orig/include/linux/pipe_fs_i.h	2009-03-30 21:11:58.000000000 +0200
+++ linux-2.6/include/linux/pipe_fs_i.h	2009-03-30 21:22:06.000000000 +0200
@@ -147,5 +147,7 @@ void generic_pipe_buf_unmap(struct pipe_
 void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
+void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
+
 
 #endif
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h	2009-03-30 21:13:11.000000000 +0200
+++ linux-2.6/include/linux/fs.h	2009-03-30 21:22:06.000000000 +0200
@@ -2126,6 +2126,8 @@ extern int generic_segment_checks(const
 /* fs/splice.c */
 extern ssize_t generic_file_splice_read(struct file *, loff_t *,
 		struct pipe_inode_info *, size_t, unsigned int);
+extern ssize_t default_file_splice_read(struct file *, loff_t *,
+		struct pipe_inode_info *, size_t, unsigned int);
 extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
 		struct file *, loff_t *, size_t, unsigned int);
 extern ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *,

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [patch 3/3] splice: implement default splice_read method
  2009-03-30 20:04 ` [patch 3/3] splice: implement default splice_read method Miklos Szeredi
@ 2009-03-30 20:59   ` Brad Boyer
  2009-03-31 10:07     ` Miklos Szeredi
  0 siblings, 1 reply; 6+ messages in thread
From: Brad Boyer @ 2009-03-30 20:59 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-fsdevel, jens.axboe, linux-kernel

On Mon, Mar 30, 2009 at 10:04:28PM +0200, Miklos Szeredi wrote:
> From: Miklos Szeredi <mszeredi@suse.cz>
> 
> If f_op->splice_read() is not implemented fall back to a plain read.
> Use vfs_readv() to read into previously allocated pages.
> 
> This will allow splice and functions using splice, such as the loop
> device, to work on all filesystems.  This includes "direct_io" files
> in fuse which bypass the page cache.

Based on your description, I would have expected this patch to make
the loop driver work seamlessly. Unless I'm misreading something, I
think the loop driver will still error out if the fs driver in question
doesn't explicitly set splice_read.o

>From drivers/block/loop.c:
724         /* new backing store needs to support loop (eg splice_read) */
725         if (!inode->i_fop->splice_read)
726                 goto out_putf;
727 

Did you accidentally leave out some changes? Please let me know if I
am missing something obvious.

	Brad Boyer
	flar@allandria.com


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [patch 3/3] splice: implement default splice_read method
  2009-03-30 20:59   ` Brad Boyer
@ 2009-03-31 10:07     ` Miklos Szeredi
  0 siblings, 0 replies; 6+ messages in thread
From: Miklos Szeredi @ 2009-03-31 10:07 UTC (permalink / raw)
  To: flar; +Cc: miklos, linux-fsdevel, jens.axboe, linux-kernel

On Mon, 30 Mar 2009, Brad Boyer wrote:
> Based on your description, I would have expected this patch to make
> the loop driver work seamlessly. Unless I'm misreading something, I
> think the loop driver will still error out if the fs driver in question
> doesn't explicitly set splice_read.o

You're right.  Here's an updated patch.

Thanks,
Miklos

---
From: Miklos Szeredi <mszeredi@suse.cz>

If f_op->splice_read() is not implemented fall back to a plain read.
Use vfs_readv() to read into previously allocated pages.

This will allow splice and functions using splice, such as the loop
device, to work on all filesystems.  This includes "direct_io" files
in fuse which bypass the page cache.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 drivers/block/loop.c      |   11 ----
 fs/coda/file.c            |    9 ++-
 fs/fuse/file.c            |    1 
 fs/pipe.c                 |   14 +++++
 fs/read_write.c           |    7 --
 fs/splice.c               |  120 ++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/fs.h        |    2 
 include/linux/pipe_fs_i.h |    1 
 8 files changed, 140 insertions(+), 25 deletions(-)

Index: linux-2.6/fs/pipe.c
===================================================================
--- linux-2.6.orig/fs/pipe.c	2009-03-31 11:43:55.000000000 +0200
+++ linux-2.6/fs/pipe.c	2009-03-31 11:44:12.000000000 +0200
@@ -268,6 +268,20 @@ int generic_pipe_buf_confirm(struct pipe
 	return 0;
 }
 
+/**
+ * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
+ * @pipe:	the pipe that the buffer belongs to
+ * @buf:	the buffer to put a reference to
+ *
+ * Description:
+ *	This function releases a reference to @buf.
+ */
+void generic_pipe_buf_release(struct pipe_inode_info *pipe,
+			      struct pipe_buffer *buf)
+{
+	page_cache_release(buf->page);
+}
+
 static const struct pipe_buf_operations anon_pipe_buf_ops = {
 	.can_merge = 1,
 	.map = generic_pipe_buf_map,
Index: linux-2.6/fs/splice.c
===================================================================
--- linux-2.6.orig/fs/splice.c	2009-03-31 11:43:55.000000000 +0200
+++ linux-2.6/fs/splice.c	2009-03-31 11:44:12.000000000 +0200
@@ -509,9 +509,116 @@ ssize_t generic_file_splice_read(struct
 
 	return ret;
 }
-
 EXPORT_SYMBOL(generic_file_splice_read);
 
+static const struct pipe_buf_operations default_pipe_buf_ops = {
+	.can_merge = 0,
+	.map = generic_pipe_buf_map,
+	.unmap = generic_pipe_buf_unmap,
+	.confirm = generic_pipe_buf_confirm,
+	.release = generic_pipe_buf_release,
+	.steal = generic_pipe_buf_steal,
+	.get = generic_pipe_buf_get,
+};
+
+static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
+			    unsigned long vlen, loff_t offset)
+{
+	mm_segment_t old_fs;
+	loff_t pos = offset;
+	ssize_t res;
+
+	old_fs = get_fs();
+	set_fs(get_ds());
+	/* The cast to a user pointer is valid due to the set_fs() */
+	res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos);
+	set_fs(old_fs);
+
+	return res;
+}
+
+ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
+				 struct pipe_inode_info *pipe, size_t len,
+				 unsigned int flags)
+{
+	unsigned int nr_pages;
+	unsigned int nr_freed;
+	size_t offset;
+	struct page *pages[PIPE_BUFFERS];
+	struct partial_page partial[PIPE_BUFFERS];
+	struct iovec vec[PIPE_BUFFERS];
+	pgoff_t index;
+	ssize_t res;
+	size_t this_len;
+	int error;
+	int i;
+	struct splice_pipe_desc spd = {
+		.pages = pages,
+		.partial = partial,
+		.flags = flags,
+		.ops = &default_pipe_buf_ops,
+		.spd_release = spd_release_page,
+	};
+
+	index = *ppos >> PAGE_CACHE_SHIFT;
+	offset = *ppos & ~PAGE_CACHE_MASK;
+	nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+	for (i = 0; i < nr_pages && i < PIPE_BUFFERS && len; i++) {
+		struct page *page;
+
+		page = alloc_page(GFP_HIGHUSER);
+		error = -ENOMEM;
+		if (!page)
+			goto err;
+
+		this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset);
+		vec[i].iov_base = (void __user *) kmap(page);
+		vec[i].iov_len = this_len;
+		pages[i] = page;
+		spd.nr_pages++;
+		len -= this_len;
+		offset = 0;
+	}
+
+	res = kernel_readv(in, vec, spd.nr_pages, *ppos);
+	if (res < 0)
+		goto err;
+
+	error = 0;
+	if (!res)
+		goto err;
+
+	nr_freed = 0;
+	for (i = 0; i < spd.nr_pages; i++) {
+		kunmap(pages[i]);
+		this_len = min_t(size_t, vec[i].iov_len, res);
+		partial[i].offset = 0;
+		partial[i].len = this_len;
+		if (!this_len) {
+			__free_page(pages[i]);
+			pages[i] = NULL;
+			nr_freed++;
+		}
+		res -= this_len;
+	}
+	spd.nr_pages -= nr_freed;
+
+	res = splice_to_pipe(pipe, &spd);
+	if (res > 0)
+		*ppos += res;
+
+	return res;
+
+err:
+	for (i = 0; i < spd.nr_pages; i++) {
+		kunmap(pages[i]);
+		__free_page(pages[i]);
+	}
+	return error;
+}
+EXPORT_SYMBOL(default_file_splice_read);
+
 /*
  * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
  * using sendpage(). Return the number of bytes sent.
@@ -916,11 +1023,10 @@ static long do_splice_to(struct file *in
 			 struct pipe_inode_info *pipe, size_t len,
 			 unsigned int flags)
 {
+	ssize_t (*splice_read)(struct file *, loff_t *,
+			       struct pipe_inode_info *, size_t, unsigned int);
 	int ret;
 
-	if (unlikely(!in->f_op || !in->f_op->splice_read))
-		return -EINVAL;
-
 	if (unlikely(!(in->f_mode & FMODE_READ)))
 		return -EBADF;
 
@@ -928,7 +1034,11 @@ static long do_splice_to(struct file *in
 	if (unlikely(ret < 0))
 		return ret;
 
-	return in->f_op->splice_read(in, ppos, pipe, len, flags);
+	splice_read = in->f_op->splice_read;
+	if (!splice_read)
+		splice_read = default_file_splice_read;
+
+	return splice_read(in, ppos, pipe, len, flags);
 }
 
 /**
Index: linux-2.6/include/linux/pipe_fs_i.h
===================================================================
--- linux-2.6.orig/include/linux/pipe_fs_i.h	2009-03-31 11:43:55.000000000 +0200
+++ linux-2.6/include/linux/pipe_fs_i.h	2009-03-31 12:04:19.000000000 +0200
@@ -147,5 +147,6 @@ void generic_pipe_buf_unmap(struct pipe_
 void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
+void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
 
 #endif
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h	2009-03-31 11:43:55.000000000 +0200
+++ linux-2.6/include/linux/fs.h	2009-03-31 11:44:12.000000000 +0200
@@ -2126,6 +2126,8 @@ extern int generic_segment_checks(const
 /* fs/splice.c */
 extern ssize_t generic_file_splice_read(struct file *, loff_t *,
 		struct pipe_inode_info *, size_t, unsigned int);
+extern ssize_t default_file_splice_read(struct file *, loff_t *,
+		struct pipe_inode_info *, size_t, unsigned int);
 extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
 		struct file *, loff_t *, size_t, unsigned int);
 extern ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *,
Index: linux-2.6/drivers/block/loop.c
===================================================================
--- linux-2.6.orig/drivers/block/loop.c	2009-03-31 11:43:55.000000000 +0200
+++ linux-2.6/drivers/block/loop.c	2009-03-31 11:44:12.000000000 +0200
@@ -721,10 +721,6 @@ static int loop_change_fd(struct loop_de
 	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
 		goto out_putf;
 
-	/* new backing store needs to support loop (eg splice_read) */
-	if (!inode->i_fop->splice_read)
-		goto out_putf;
-
 	/* size of the new backing store needs to be the same */
 	if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
 		goto out_putf;
@@ -800,12 +796,7 @@ static int loop_set_fd(struct loop_devic
 	error = -EINVAL;
 	if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
 		const struct address_space_operations *aops = mapping->a_ops;
-		/*
-		 * If we can't read - sorry. If we only can't write - well,
-		 * it's going to be read-only.
-		 */
-		if (!file->f_op->splice_read)
-			goto out_putf;
+
 		if (aops->write_begin)
 			lo_flags |= LO_FLAGS_USE_AOPS;
 		if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
Index: linux-2.6/fs/coda/file.c
===================================================================
--- linux-2.6.orig/fs/coda/file.c	2009-03-31 11:43:55.000000000 +0200
+++ linux-2.6/fs/coda/file.c	2009-03-31 11:44:12.000000000 +0200
@@ -47,6 +47,8 @@ coda_file_splice_read(struct file *coda_
 		      struct pipe_inode_info *pipe, size_t count,
 		      unsigned int flags)
 {
+	ssize_t (*splice_read)(struct file *, loff_t *,
+			       struct pipe_inode_info *, size_t, unsigned int);
 	struct coda_file_info *cfi;
 	struct file *host_file;
 
@@ -54,10 +56,11 @@ coda_file_splice_read(struct file *coda_
 	BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
 	host_file = cfi->cfi_container;
 
-	if (!host_file->f_op || !host_file->f_op->splice_read)
-		return -EINVAL;
+	splice_read = host_file->f_op->splice_read;
+	if (!splice_read)
+		splice_read = default_file_splice_read;
 
-	return host_file->f_op->splice_read(host_file, ppos, pipe, count,flags);
+	return splice_read(host_file, ppos, pipe, count, flags);
 }
 
 static ssize_t
Index: linux-2.6/fs/fuse/file.c
===================================================================
--- linux-2.6.orig/fs/fuse/file.c	2009-03-31 11:44:12.000000000 +0200
+++ linux-2.6/fs/fuse/file.c	2009-03-31 11:44:12.000000000 +0200
@@ -1944,7 +1944,6 @@ static const struct file_operations fuse
 	.unlocked_ioctl	= fuse_file_ioctl,
 	.compat_ioctl	= fuse_file_compat_ioctl,
 	.poll		= fuse_file_poll,
-	/* no splice_read */
 };
 
 static const struct address_space_operations fuse_file_aops  = {
Index: linux-2.6/fs/read_write.c
===================================================================
--- linux-2.6.orig/fs/read_write.c	2009-03-31 11:44:10.000000000 +0200
+++ linux-2.6/fs/read_write.c	2009-03-31 11:45:48.000000000 +0200
@@ -749,12 +749,6 @@ static ssize_t do_sendfile(int out_fd, i
 		goto out;
 	if (!(in_file->f_mode & FMODE_READ))
 		goto fput_in;
-	retval = -EINVAL;
-	in_inode = in_file->f_path.dentry->d_inode;
-	if (!in_inode)
-		goto fput_in;
-	if (!in_file->f_op || !in_file->f_op->splice_read)
-		goto fput_in;
 	retval = -ESPIPE;
 	if (!ppos)
 		ppos = &in_file->f_pos;
@@ -778,6 +772,7 @@ static ssize_t do_sendfile(int out_fd, i
 	retval = -EINVAL;
 	if (!out_file->f_op || !out_file->f_op->sendpage)
 		goto fput_out;
+	in_inode = in_file->f_path.dentry->d_inode;
 	out_inode = out_file->f_path.dentry->d_inode;
 	retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
 	if (retval < 0)

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2009-03-31 10:08 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-03-30 20:00 [patch 0/3] fuse: implement missing functionality on "direct_io" files Miklos Szeredi
2009-03-30 20:02 ` [patch 1/3] fuse: allow kernel to access " Miklos Szeredi
2009-03-30 20:03 ` [patch 2/3] fuse: allow private mappings of " Miklos Szeredi
2009-03-30 20:04 ` [patch 3/3] splice: implement default splice_read method Miklos Szeredi
2009-03-30 20:59   ` Brad Boyer
2009-03-31 10:07     ` Miklos Szeredi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).