linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH]: v9fs: add readpage support
@ 2006-01-11  1:14 Eric Van Hensbergen
  2006-01-11 11:38 ` Andrew Morton
  0 siblings, 1 reply; 6+ messages in thread
From: Eric Van Hensbergen @ 2006-01-11  1:14 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, v9fs-developer

Subject: [PATCH] v9fs: add readpage support

v9fs mmap support was originally removed from v9fs at Al Viro's request,
but recently there have been requests from folks who want readpage
functionality (primarily to enable execution of files mounted via 9P).
This patch adds readpage support (but not writepage which contained most of
the objectionable code).  It passes FSX (and other regressions) so it
should be relatively safe.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>

---

 fs/9p/Makefile    |    1 
 fs/9p/v9fs_vfs.h  |    1 
 fs/9p/vfs_addr.c  |  109 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/9p/vfs_file.c  |    3 +
 fs/9p/vfs_inode.c |    1 
 5 files changed, 115 insertions(+), 0 deletions(-)
 create mode 100644 fs/9p/vfs_addr.c

2d3190fce2418fc07d672ceb9d072b1cffb8be7f
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index 3d02308..2f4ce43 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_9P_FS) := 9p2000.o
 	conv.o \
 	vfs_super.o \
 	vfs_inode.o \
+	vfs_addr.o \
 	vfs_file.o \
 	vfs_dir.o \
 	vfs_dentry.o \
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index c78502a..69cf290 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -39,6 +39,7 @@
  */
 
 extern struct file_system_type v9fs_fs_type;
+extern struct address_space_operations v9fs_addr_operations;
 extern struct file_operations v9fs_file_operations;
 extern struct file_operations v9fs_dir_operations;
 extern struct dentry_operations v9fs_dentry_operations;
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
new file mode 100644
index 0000000..6ec1d64
--- /dev/null
+++ b/fs/9p/vfs_addr.c
@@ -0,0 +1,109 @@
+/*
+ *  linux/fs/9p/vfs_addr.c
+ *
+ * This file contians vfs address (mmap) ops for 9P2000. 
+ *
+ *  Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
+ *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/version.h>
+#include <linux/pagemap.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "fid.h"
+
+/**
+ * v9fs_vfs_readpage - read an entire page in from 9P
+ *
+ * @file: file being read 
+ * @page: structure to page
+ *
+ */
+
+static int v9fs_vfs_readpage(struct file *filp, struct page *page)
+{  
+	char *buffer = NULL;
+	int retval = -EIO;
+	loff_t offset = page_offset(page);
+	int count = PAGE_CACHE_SIZE;
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+	int rsize = v9ses->maxdata - V9FS_IOHDRSZ;
+	struct v9fs_fid *v9f = filp->private_data;
+	struct v9fs_fcall *fcall = NULL;
+	int fid = v9f->fid;
+	int total = 0;
+	int result = 0;
+
+	buffer = kmap(page);
+	do {
+		if (count < rsize)
+			rsize = count;
+
+		result = v9fs_t_read(v9ses, fid, offset, rsize, &fcall);
+
+		if (result < 0) {
+			printk(KERN_ERR "v9fs_t_read returned %d\n",
+			       result);
+
+			kfree(fcall);
+			goto UnmapAndUnlock;
+		} else
+			offset += result;
+
+		memcpy(buffer, fcall->params.rread.data, result);
+
+		count -= result;
+		buffer += result;
+		total += result;
+
+		kfree(fcall);
+
+		if (result < rsize)
+			break;
+	} while (count);
+
+	memset(buffer, 0, count);
+	flush_dcache_page(page);
+	SetPageUptodate(page);
+	retval = 0;
+
+      UnmapAndUnlock:
+	kunmap(page);
+	unlock_page(page);
+	return retval;
+}
+
+struct address_space_operations v9fs_addr_operations = {
+      .readpage = v9fs_vfs_readpage,
+};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 6852f0e..feddc5c 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -289,6 +289,8 @@ v9fs_file_write(struct file *filp, const
 		total += result;
 	} while (count);
 
+	invalidate_inode_pages2(inode->i_mapping);
+
 	return total;
 }
 
@@ -299,4 +301,5 @@ struct file_operations v9fs_file_operati
 	.open = v9fs_file_open,
 	.release = v9fs_dir_release,
 	.lock = v9fs_file_lock,
+	.mmap = generic_file_mmap,
 };
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index a17b288..91f5524 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -177,6 +177,7 @@ struct inode *v9fs_get_inode(struct supe
 		inode->i_blocks = 0;
 		inode->i_rdev = 0;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+		inode->i_mapping->a_ops = &v9fs_addr_operations;
 
 		switch (mode & S_IFMT) {
 		case S_IFIFO:
-- 
1.1.0

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH]: v9fs: add readpage support
  2006-01-11  1:14 [PATCH]: v9fs: add readpage support Eric Van Hensbergen
@ 2006-01-11 11:38 ` Andrew Morton
  2006-01-11 13:05   ` Eric Van Hensbergen
  2006-01-13 21:45   ` Eric Van Hensbergen
  0 siblings, 2 replies; 6+ messages in thread
From: Andrew Morton @ 2006-01-11 11:38 UTC (permalink / raw)
  To: Eric Van Hensbergen; +Cc: linux-kernel, v9fs-developer

ericvh@gmail.com (Eric Van Hensbergen) wrote:
>
> Subject: [PATCH] v9fs: add readpage support
> 
> v9fs mmap support was originally removed from v9fs at Al Viro's request,
> but recently there have been requests from folks who want readpage
> functionality (primarily to enable execution of files mounted via 9P).
> This patch adds readpage support (but not writepage which contained most of
> the objectionable code).  It passes FSX (and other regressions) so it
> should be relatively safe.
> 
> +
> +static int v9fs_vfs_readpage(struct file *filp, struct page *page)
> +{  
> +	char *buffer = NULL;
> +	int retval = -EIO;
> +	loff_t offset = page_offset(page);
> +	int count = PAGE_CACHE_SIZE;
> +	struct inode *inode = filp->f_dentry->d_inode;
> +	struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
> +	int rsize = v9ses->maxdata - V9FS_IOHDRSZ;
> +	struct v9fs_fid *v9f = filp->private_data;
> +	struct v9fs_fcall *fcall = NULL;
> +	int fid = v9f->fid;
> +	int total = 0;
> +	int result = 0;
> +
> +	buffer = kmap(page);
> +	do {
> +		if (count < rsize)
> +			rsize = count;
> +
> +		result = v9fs_t_read(v9ses, fid, offset, rsize, &fcall);
> +
> +		if (result < 0) {
> +			printk(KERN_ERR "v9fs_t_read returned %d\n",
> +			       result);
> +
> +			kfree(fcall);
> +			goto UnmapAndUnlock;
> +		} else
> +			offset += result;
> +
> +		memcpy(buffer, fcall->params.rread.data, result);
> +
> +		count -= result;
> +		buffer += result;
> +		total += result;
> +
> +		kfree(fcall);

Minor thing: from my reading of v9fs_mux_rpc() there's potential for a
double-kfree here.  Either v9fs_mux_rpc() needs to be changed to
unambiguously zero out *rcall (or, better, v9fs_t_read does it) or you need
to zero fcall on each go around the loop.


> +		if (result < rsize)
> +			break;
> +	} while (count);
> +
> +	memset(buffer, 0, count);
> +	flush_dcache_page(page);
> +	SetPageUptodate(page);

if (result < rsize), is the page really up to date?

> +	retval = 0;
> +
> +      UnmapAndUnlock:
> +	kunmap(page);

eww, do you really indent labels like that?

> diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
> index 6852f0e..feddc5c 100644
> --- a/fs/9p/vfs_file.c
> +++ b/fs/9p/vfs_file.c
> @@ -289,6 +289,8 @@ v9fs_file_write(struct file *filp, const
>  		total += result;
>  	} while (count);
>  
> +	invalidate_inode_pages2(inode->i_mapping);
> +
>  	return total;
>  }

That's a really scary function you have there.  Can you explain the
thinking behind its use here?  What are we trying to achieve?



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH]: v9fs: add readpage support
  2006-01-11 11:38 ` Andrew Morton
@ 2006-01-11 13:05   ` Eric Van Hensbergen
  2006-01-13 21:45   ` Eric Van Hensbergen
  1 sibling, 0 replies; 6+ messages in thread
From: Eric Van Hensbergen @ 2006-01-11 13:05 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, v9fs-developer

On 1/11/06, Andrew Morton <akpm@osdl.org> wrote:
> ericvh@gmail.com (Eric Van Hensbergen) wrote:
> >
> > Subject: [PATCH] v9fs: add readpage support
> >
> > v9fs mmap support was originally removed from v9fs at Al Viro's request,
> > but recently there have been requests from folks who want readpage
> > functionality (primarily to enable execution of files mounted via 9P).
> > This patch adds readpage support (but not writepage which contained most of
> > the objectionable code).  It passes FSX (and other regressions) so it
> > should be relatively safe.
> >
> > +
> > +static int v9fs_vfs_readpage(struct file *filp, struct page *page)
> > +{
> > +     char *buffer = NULL;
> > +     int retval = -EIO;
> > +     loff_t offset = page_offset(page);
> > +     int count = PAGE_CACHE_SIZE;
> > +     struct inode *inode = filp->f_dentry->d_inode;
> > +     struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
> > +     int rsize = v9ses->maxdata - V9FS_IOHDRSZ;
> > +     struct v9fs_fid *v9f = filp->private_data;
> > +     struct v9fs_fcall *fcall = NULL;
> > +     int fid = v9f->fid;
> > +     int total = 0;
> > +     int result = 0;
> > +
> > +     buffer = kmap(page);
> > +     do {
> > +             if (count < rsize)
> > +                     rsize = count;
> > +
> > +             result = v9fs_t_read(v9ses, fid, offset, rsize, &fcall);
> > +
> > +             if (result < 0) {
> > +                     printk(KERN_ERR "v9fs_t_read returned %d\n",
> > +                            result);
> > +
> > +                     kfree(fcall);
> > +                     goto UnmapAndUnlock;
> > +             } else
> > +                     offset += result;
> > +
> > +             memcpy(buffer, fcall->params.rread.data, result);
> > +
> > +             count -= result;
> > +             buffer += result;
> > +             total += result;
> > +
> > +             kfree(fcall);
>
> Minor thing: from my reading of v9fs_mux_rpc() there's potential for a
> double-kfree here.  Either v9fs_mux_rpc() needs to be changed to
> unambiguously zero out *rcall (or, better, v9fs_t_read does it) or you need
> to zero fcall on each go around the loop.
>
>

Okay I'll take a look at this in the context of both the old mux and
the new mux code.

> > +             if (result < rsize)
> > +                     break;
> > +     } while (count);
> > +
> > +     memset(buffer, 0, count);
> > +     flush_dcache_page(page);
> > +     SetPageUptodate(page);
>
> if (result < rsize), is the page really up to date?
>

maybe?  Its been a while since I looked at this code, but I believe
the logic is that if you are approaching the end of the file you'll
get less than rsize bytes back and then you just fill in the rest of
the page with zeros.

> > +     retval = 0;
> > +
> > +      UnmapAndUnlock:
> > +     kunmap(page);
>
> eww, do you really indent labels like that?
>

No, something funky happened and I didn't proof the patch like I should have.

> > diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
> > index 6852f0e..feddc5c 100644
> > --- a/fs/9p/vfs_file.c
> > +++ b/fs/9p/vfs_file.c
> > @@ -289,6 +289,8 @@ v9fs_file_write(struct file *filp, const
> >               total += result;
> >       } while (count);
> >
> > +     invalidate_inode_pages2(inode->i_mapping);
> > +
> >       return total;
> >  }
>
> That's a really scary function you have there.  Can you explain the
> thinking behind its use here?  What are we trying to achieve?
>

Its quite possible I've done the wrong thing here.  The intent is to
make sure that any stuff that might be in the page cache due to an
mmap is flushed when I do a write.  This approach is overkill, I
should probably just flush anything in the cache that the write
affects.

I'll take another look at the mux stuff and also see if I can come up
with a less brute-force approach to invalidating the page cache.

thanks for the comments.

      -eric

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH]: v9fs: add readpage support
  2006-01-11 11:38 ` Andrew Morton
  2006-01-11 13:05   ` Eric Van Hensbergen
@ 2006-01-13 21:45   ` Eric Van Hensbergen
  2006-01-13 22:14     ` Andrew Morton
  1 sibling, 1 reply; 6+ messages in thread
From: Eric Van Hensbergen @ 2006-01-13 21:45 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, v9fs-developer, Linux FS Devel

On 1/11/06, Andrew Morton <akpm@osdl.org> wrote:
> ericvh@gmail.com (Eric Van Hensbergen) wrote:
> > +
> > +             result = v9fs_t_read(v9ses, fid, offset, rsize, &fcall);
> > +
> > +             if (result < 0) {
> > +                     printk(KERN_ERR "v9fs_t_read returned %d\n",
> > +                            result);
> > +
> > +                     kfree(fcall);
> > +                     goto UnmapAndUnlock;
> > +             } else
> > +                     offset += result;
> > +
> > +             memcpy(buffer, fcall->params.rread.data, result);
> > +
> > +             count -= result;
> > +             buffer += result;
> > +             total += result;
> > +
> > +             kfree(fcall);
>
> Minor thing: from my reading of v9fs_mux_rpc() there's potential for a
> double-kfree here.  Either v9fs_mux_rpc() needs to be changed to
> unambiguously zero out *rcall (or, better, v9fs_t_read does it) or you need
> to zero fcall on each go around the loop.
>
>

Okay, I had a chance to go back and look into this deeper when applied
on top of Lucho's new mux code.  It appears (to Lucho and I) that the
the v9fs_mux_rpc() code zero's out *rcall really early on, so I don't
see where this could lead to a double kfree.  If you still think we
are missing something, let me know and I'll have another look.

> > diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
> > index 6852f0e..feddc5c 100644
> > --- a/fs/9p/vfs_file.c
> > +++ b/fs/9p/vfs_file.c
> > @@ -289,6 +289,8 @@ v9fs_file_write(struct file *filp, const
> >               total += result;
> >       } while (count);
> >
> > +     invalidate_inode_pages2(inode->i_mapping);
> > +
> >       return total;
> >  }
>

I went looking for an example of how to do this better.  More or less,
v9fs reads and writes are similar to DirectIO since they don't go
through the page-cache.  So, I looked at what NFS does when it gets a
DirectIO write, and it looks (to me) like it does more or less the
same thing:
(from nfs_file_direct_write() in fs/nfs/direct.c)
        retval = nfs_direct_write(inode, ctx, &iov, pos, 1);
        if (mapping->nrpages)
                invalidate_inode_pages2(mapping);

Now, that being said, it still seems to me to be a bit heavy weight --
do folks have a better pointer to code that I can use as an example of
how to do this more efficiently?

       -eric

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH]: v9fs: add readpage support
  2006-01-13 21:45   ` Eric Van Hensbergen
@ 2006-01-13 22:14     ` Andrew Morton
  2006-01-13 22:27       ` Eric Van Hensbergen
  0 siblings, 1 reply; 6+ messages in thread
From: Andrew Morton @ 2006-01-13 22:14 UTC (permalink / raw)
  To: Eric Van Hensbergen; +Cc: linux-kernel, v9fs-developer, linux-fsdevel

Eric Van Hensbergen <ericvh@gmail.com> wrote:
>
> > > @@ -289,6 +289,8 @@ v9fs_file_write(struct file *filp, const
> > >               total += result;
> > >       } while (count);
> > >
> > > +     invalidate_inode_pages2(inode->i_mapping);
> > > +
> > >       return total;
> > >  }
> >
> 
> I went looking for an example of how to do this better.  More or less,
> v9fs reads and writes are similar to DirectIO since they don't go
> through the page-cache.

hm.  Why not?

>  So, I looked at what NFS does when it gets a
> DirectIO write, and it looks (to me) like it does more or less the
> same thing:
> (from nfs_file_direct_write() in fs/nfs/direct.c)
>         retval = nfs_direct_write(inode, ctx, &iov, pos, 1);
>         if (mapping->nrpages)
>                 invalidate_inode_pages2(mapping);
> 
> Now, that being said, it still seems to me to be a bit heavy weight --
> do folks have a better pointer to code that I can use as an example of
> how to do this more efficiently?

Not really.  If that's what you need to do then that's the way to do it. 
We've had nasty races and other problems wrt invalidate_inode_pages2 and
pagefaults, so I suggest you test that mix carefully.

Have you tried fsx-linux?  It's really good for finding data integrity
bugs.  There's a copy in
http://www.zip.com.au/~akpm/linux/patches/stuff/ext3-tools.tar.gz.

I'd suggest that you want the mapping->nrpages test - it'll be a useful
speedup in the common case.

Of course, someone could come in and add a page to pagecache via a
pagefault at any time after that test, but that's true of
invalidate_inode_pages2() in general.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH]: v9fs: add readpage support
  2006-01-13 22:14     ` Andrew Morton
@ 2006-01-13 22:27       ` Eric Van Hensbergen
  0 siblings, 0 replies; 6+ messages in thread
From: Eric Van Hensbergen @ 2006-01-13 22:27 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, v9fs-developer, linux-fsdevel

On 1/13/06, Andrew Morton <akpm@osdl.org> wrote:
> Eric Van Hensbergen <ericvh@gmail.com> wrote:
> >
> > I went looking for an example of how to do this better.  More or less,
> > v9fs reads and writes are similar to DirectIO since they don't go
> > through the page-cache.
>
> hm.  Why not?
>

At the moment we'd rather not cache anything with v9fs as it hides
operations from the servers, and in the case of synthetic servers it
can be a bad thing.  There is a somewhat well-understood strategy for
how to do cacheing of static files in a sane manner under 9P, but we
were holding off on trying to move that into v9fs for the time being. 
The only reason we added the read-only mmap support back in was to
support users who were trying to access executables over 9P
connections.

> >
> > Now, that being said, it still seems to me to be a bit heavy weight --
> > do folks have a better pointer to code that I can use as an example of
> > how to do this more efficiently?
>
> Not really.  If that's what you need to do then that's the way to do it.
> We've had nasty races and other problems wrt invalidate_inode_pages2 and
> pagefaults, so I suggest you test that mix carefully.
>
> Have you tried fsx-linux?  It's really good for finding data integrity
> bugs.  There's a copy in
> http://www.zip.com.au/~akpm/linux/patches/stuff/ext3-tools.tar.gz.
>

We've been regression testing with vanilla fsx, I'll upgrade to
fsx-linux and make sure we are clean.

>
> I'd suggest that you want the mapping->nrpages test - it'll be a useful
> speedup in the common case.
>

Yeah, as I was tracing through the invalidate_inode_pages2() this
morning I realized this is probably a good idea.  Should have a new
patch to you by the end of the weekend.

          -eric

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2006-01-13 22:27 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-01-11  1:14 [PATCH]: v9fs: add readpage support Eric Van Hensbergen
2006-01-11 11:38 ` Andrew Morton
2006-01-11 13:05   ` Eric Van Hensbergen
2006-01-13 21:45   ` Eric Van Hensbergen
2006-01-13 22:14     ` Andrew Morton
2006-01-13 22:27       ` Eric Van Hensbergen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).