All of lore.kernel.org
 help / color / mirror / Atom feed
From: Miklos Szeredi <miklos@szeredi.hu>
To: Maxim Patlasov <MPatlasov@parallels.com>
Cc: riel@redhat.com, dev@parallels.com, xemul@parallels.com,
	fuse-devel@lists.sourceforge.net, bfoster@redhat.com,
	linux-kernel@vger.kernel.org, jbottomley@parallels.com,
	linux-mm@kvack.org, viro@zeniv.linux.org.uk,
	linux-fsdevel@vger.kernel.org, akpm@linux-foundation.org,
	fengguang.wu@intel.com, devel@openvz.org, mgorman@suse.de
Subject: Re: [PATCH 10/16] fuse: Implement writepages callback
Date: Fri, 19 Jul 2013 18:50:37 +0200	[thread overview]
Message-ID: <20130719165037.GA18358@tucsk.piliscsaba.szeredi.hu> (raw)
In-Reply-To: <20130629174525.20175.18987.stgit@maximpc.sw.ru>

On Sat, Jun 29, 2013 at 09:45:29PM +0400, Maxim Patlasov wrote:
> From: Pavel Emelyanov <xemul@openvz.org>
> 
> The .writepages one is required to make each writeback request carry more than
> one page on it. The patch enables optimized behaviour unconditionally,
> i.e. mmap-ed writes will benefit from the patch even if fc->writeback_cache=0.

I rewrote this a bit, so we won't have to do the thing in two passes, which
makes it simpler and more robust.  Waiting for page writeback here is wrong
anyway, see comment above fuse_page_mkwrite().  BTW we had a race there because
fuse_page_mkwrite() didn't take the page lock.  I've also fixed that up and
pushed a series containing these patches up to implementing ->writepages() to

  git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git writepages

Passed some trivial testing but more is needed.

I'll get to the rest of the patches next week.

Thanks,
Miklos


Subject: fuse: Implement writepages callback
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sat, 29 Jun 2013 21:45:29 +0400

The .writepages one is required to make each writeback request carry more than
one page on it. The patch enables optimized behaviour unconditionally,
i.e. mmap-ed writes will benefit from the patch even if fc->writeback_cache=0.

[SzM: simplify, add comments]

Signed-off-by: Maxim Patlasov <MPatlasov@parallels.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/file.c |  139 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 139 insertions(+)

--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1579,6 +1579,144 @@ static int fuse_writepage(struct page *p
 	return err;
 }
 
+struct fuse_fill_wb_data {
+	struct fuse_req *req;
+	struct fuse_file *ff;
+	struct inode *inode;
+};
+
+static void fuse_writepages_send(struct fuse_fill_wb_data *data)
+{
+	struct fuse_req *req = data->req;
+	struct inode *inode = data->inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_inode *fi = get_fuse_inode(inode);
+
+	req->ff = fuse_file_get(data->ff);
+	spin_lock(&fc->lock);
+	list_add_tail(&req->list, &fi->queued_writes);
+	fuse_flush_writepages(inode);
+	spin_unlock(&fc->lock);
+}
+
+static int fuse_writepages_fill(struct page *page,
+		struct writeback_control *wbc, void *_data)
+{
+	struct fuse_fill_wb_data *data = _data;
+	struct fuse_req *req = data->req;
+	struct inode *inode = data->inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct page *tmp_page;
+	int err;
+
+	if (req) {
+		BUG_ON(!req->num_pages);
+		if (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
+		    (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write ||
+		    req->pages[req->num_pages - 1]->index + 1 != page->index) {
+
+			fuse_writepages_send(data);
+			data->req = NULL;
+		}
+	}
+	err = -ENOMEM;
+	tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+	if (!tmp_page)
+		goto out_unlock;
+
+	/*
+	 * The page must not be redirtied until the writeout is completed
+	 * (i.e. userspace has sent a reply to the write request).  Otherwise
+	 * there could be more than one temporary page instance for each real
+	 * page.
+	 *
+	 * This is ensured by holding the page lock in page_mkwrite() while
+	 * checking fuse_page_is_writeback().  We already hold the page lock
+	 * since clear_page_dirty_for_io() and keep it held until we add the
+	 * request to the fi->writepages list and increment req->num_pages.
+	 * After this fuse_page_is_writeback() will indicate that the page is
+	 * under writeback, so we can release the page lock.
+	 */
+	if (data->req == NULL) {
+		struct fuse_inode *fi = get_fuse_inode(inode);
+
+		err = -ENOMEM;
+		req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ);
+		if (!req) {
+			__free_page(tmp_page);
+			goto out_unlock;
+		}
+
+		fuse_write_fill(req, data->ff, page_offset(page), 0);
+		req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
+		req->in.argpages = 1;
+		req->background = 1;
+		req->num_pages = 0;
+		req->end = fuse_writepage_end;
+		req->inode = inode;
+
+		spin_lock(&fc->lock);
+		list_add(&req->writepages_entry, &fi->writepages);
+		spin_unlock(&fc->lock);
+
+		data->req = req;
+	}
+	set_page_writeback(page);
+
+	copy_highpage(tmp_page, page);
+	req->pages[req->num_pages] = tmp_page;
+	req->page_descs[req->num_pages].offset = 0;
+	req->page_descs[req->num_pages].length = PAGE_SIZE;
+
+	inc_bdi_stat(page->mapping->backing_dev_info, BDI_WRITEBACK);
+	inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
+	end_page_writeback(page);
+
+	/*
+	 * Protected by fc->lock against concurrent access by
+	 * fuse_page_is_writeback().
+	 */
+	spin_lock(&fc->lock);
+	req->num_pages++;
+	spin_unlock(&fc->lock);
+
+	err = 0;
+out_unlock:
+	unlock_page(page);
+
+	return err;
+}
+
+static int fuse_writepages(struct address_space *mapping,
+			   struct writeback_control *wbc)
+{
+	struct inode *inode = mapping->host;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_fill_wb_data data;
+	int err;
+
+	err = -EIO;
+	if (is_bad_inode(inode))
+		goto out;
+
+	data.req = NULL;
+	data.inode = inode;
+	data.ff = fuse_write_file(fc, get_fuse_inode(inode));
+	if (!data.ff)
+		goto out;
+
+	err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
+	if (data.req) {
+		/* Ignore errors if we can write at least one page */
+		BUG_ON(!data.req->num_pages);
+		fuse_writepages_send(&data);
+		err = 0;
+	}
+	fuse_file_put(data.ff, false);
+out:
+	return err;
+}
+
 static int fuse_launder_page(struct page *page)
 {
 	int err = 0;
@@ -2589,6 +2727,7 @@ static const struct file_operations fuse
 static const struct address_space_operations fuse_file_aops  = {
 	.readpage	= fuse_readpage,
 	.writepage	= fuse_writepage,
+	.writepages	= fuse_writepages,
 	.launder_page	= fuse_launder_page,
 	.readpages	= fuse_readpages,
 	.set_page_dirty	= __set_page_dirty_nobuffers,

WARNING: multiple messages have this Message-ID (diff)
From: Miklos Szeredi <miklos@szeredi.hu>
To: Maxim Patlasov <MPatlasov@parallels.com>
Cc: riel@redhat.com, dev@parallels.com, xemul@parallels.com,
	fuse-devel@lists.sourceforge.net, bfoster@redhat.com,
	linux-kernel@vger.kernel.org, jbottomley@parallels.com,
	linux-mm@kvack.org, viro@zeniv.linux.org.uk,
	linux-fsdevel@vger.kernel.org, akpm@linux-foundation.org,
	fengguang.wu@intel.com, devel@openvz.org, mgorman@suse.de
Subject: Re: [PATCH 10/16] fuse: Implement writepages callback
Date: Fri, 19 Jul 2013 18:50:37 +0200	[thread overview]
Message-ID: <20130719165037.GA18358@tucsk.piliscsaba.szeredi.hu> (raw)
In-Reply-To: <20130629174525.20175.18987.stgit@maximpc.sw.ru>

On Sat, Jun 29, 2013 at 09:45:29PM +0400, Maxim Patlasov wrote:
> From: Pavel Emelyanov <xemul@openvz.org>
> 
> The .writepages one is required to make each writeback request carry more than
> one page on it. The patch enables optimized behaviour unconditionally,
> i.e. mmap-ed writes will benefit from the patch even if fc->writeback_cache=0.

I rewrote this a bit, so we won't have to do the thing in two passes, which
makes it simpler and more robust.  Waiting for page writeback here is wrong
anyway, see comment above fuse_page_mkwrite().  BTW we had a race there because
fuse_page_mkwrite() didn't take the page lock.  I've also fixed that up and
pushed a series containing these patches up to implementing ->writepages() to

  git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git writepages

Passed some trivial testing but more is needed.

I'll get to the rest of the patches next week.

Thanks,
Miklos


Subject: fuse: Implement writepages callback
From: Pavel Emelyanov <xemul@openvz.org>
Date: Sat, 29 Jun 2013 21:45:29 +0400

The .writepages one is required to make each writeback request carry more than
one page on it. The patch enables optimized behaviour unconditionally,
i.e. mmap-ed writes will benefit from the patch even if fc->writeback_cache=0.

[SzM: simplify, add comments]

Signed-off-by: Maxim Patlasov <MPatlasov@parallels.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/file.c |  139 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 139 insertions(+)

--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1579,6 +1579,144 @@ static int fuse_writepage(struct page *p
 	return err;
 }
 
+struct fuse_fill_wb_data {
+	struct fuse_req *req;
+	struct fuse_file *ff;
+	struct inode *inode;
+};
+
+static void fuse_writepages_send(struct fuse_fill_wb_data *data)
+{
+	struct fuse_req *req = data->req;
+	struct inode *inode = data->inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_inode *fi = get_fuse_inode(inode);
+
+	req->ff = fuse_file_get(data->ff);
+	spin_lock(&fc->lock);
+	list_add_tail(&req->list, &fi->queued_writes);
+	fuse_flush_writepages(inode);
+	spin_unlock(&fc->lock);
+}
+
+static int fuse_writepages_fill(struct page *page,
+		struct writeback_control *wbc, void *_data)
+{
+	struct fuse_fill_wb_data *data = _data;
+	struct fuse_req *req = data->req;
+	struct inode *inode = data->inode;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct page *tmp_page;
+	int err;
+
+	if (req) {
+		BUG_ON(!req->num_pages);
+		if (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
+		    (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write ||
+		    req->pages[req->num_pages - 1]->index + 1 != page->index) {
+
+			fuse_writepages_send(data);
+			data->req = NULL;
+		}
+	}
+	err = -ENOMEM;
+	tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+	if (!tmp_page)
+		goto out_unlock;
+
+	/*
+	 * The page must not be redirtied until the writeout is completed
+	 * (i.e. userspace has sent a reply to the write request).  Otherwise
+	 * there could be more than one temporary page instance for each real
+	 * page.
+	 *
+	 * This is ensured by holding the page lock in page_mkwrite() while
+	 * checking fuse_page_is_writeback().  We already hold the page lock
+	 * since clear_page_dirty_for_io() and keep it held until we add the
+	 * request to the fi->writepages list and increment req->num_pages.
+	 * After this fuse_page_is_writeback() will indicate that the page is
+	 * under writeback, so we can release the page lock.
+	 */
+	if (data->req == NULL) {
+		struct fuse_inode *fi = get_fuse_inode(inode);
+
+		err = -ENOMEM;
+		req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ);
+		if (!req) {
+			__free_page(tmp_page);
+			goto out_unlock;
+		}
+
+		fuse_write_fill(req, data->ff, page_offset(page), 0);
+		req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
+		req->in.argpages = 1;
+		req->background = 1;
+		req->num_pages = 0;
+		req->end = fuse_writepage_end;
+		req->inode = inode;
+
+		spin_lock(&fc->lock);
+		list_add(&req->writepages_entry, &fi->writepages);
+		spin_unlock(&fc->lock);
+
+		data->req = req;
+	}
+	set_page_writeback(page);
+
+	copy_highpage(tmp_page, page);
+	req->pages[req->num_pages] = tmp_page;
+	req->page_descs[req->num_pages].offset = 0;
+	req->page_descs[req->num_pages].length = PAGE_SIZE;
+
+	inc_bdi_stat(page->mapping->backing_dev_info, BDI_WRITEBACK);
+	inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
+	end_page_writeback(page);
+
+	/*
+	 * Protected by fc->lock against concurrent access by
+	 * fuse_page_is_writeback().
+	 */
+	spin_lock(&fc->lock);
+	req->num_pages++;
+	spin_unlock(&fc->lock);
+
+	err = 0;
+out_unlock:
+	unlock_page(page);
+
+	return err;
+}
+
+static int fuse_writepages(struct address_space *mapping,
+			   struct writeback_control *wbc)
+{
+	struct inode *inode = mapping->host;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_fill_wb_data data;
+	int err;
+
+	err = -EIO;
+	if (is_bad_inode(inode))
+		goto out;
+
+	data.req = NULL;
+	data.inode = inode;
+	data.ff = fuse_write_file(fc, get_fuse_inode(inode));
+	if (!data.ff)
+		goto out;
+
+	err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
+	if (data.req) {
+		/* Ignore errors if we can write at least one page */
+		BUG_ON(!data.req->num_pages);
+		fuse_writepages_send(&data);
+		err = 0;
+	}
+	fuse_file_put(data.ff, false);
+out:
+	return err;
+}
+
 static int fuse_launder_page(struct page *page)
 {
 	int err = 0;
@@ -2589,6 +2727,7 @@ static const struct file_operations fuse
 static const struct address_space_operations fuse_file_aops  = {
 	.readpage	= fuse_readpage,
 	.writepage	= fuse_writepage,
+	.writepages	= fuse_writepages,
 	.launder_page	= fuse_launder_page,
 	.readpages	= fuse_readpages,
 	.set_page_dirty	= __set_page_dirty_nobuffers,

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2013-07-19 16:50 UTC|newest]

Thread overview: 79+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-06-29 17:41 [PATCH v5 00/16] fuse: An attempt to implement a write-back cache policy Maxim Patlasov
2013-06-29 17:41 ` Maxim Patlasov
2013-06-29 17:42 ` [PATCH 01/16] fuse: Linking file to inode helper Maxim Patlasov
2013-06-29 17:42   ` Maxim Patlasov
2013-06-29 17:42 ` [PATCH 02/16] fuse: Getting file for writeback helper Maxim Patlasov
2013-06-29 17:42   ` Maxim Patlasov
2013-06-29 17:42 ` [PATCH 03/16] fuse: Prepare to handle short reads Maxim Patlasov
2013-06-29 17:42   ` Maxim Patlasov
2013-06-29 17:42   ` Maxim Patlasov
2013-06-29 17:42 ` [PATCH 04/16] fuse: Prepare to handle multiple pages in writeback Maxim Patlasov
2013-06-29 17:42   ` Maxim Patlasov
2013-06-29 17:42 ` [PATCH 05/16] fuse: Connection bit for enabling writeback Maxim Patlasov
2013-06-29 17:42   ` Maxim Patlasov
2013-06-29 17:44 ` [PATCH 06/16] fuse: Trust kernel i_size only - v4 Maxim Patlasov
2013-06-29 17:44   ` Maxim Patlasov
2013-06-29 17:44 ` [PATCH 07/16] fuse: Trust kernel i_mtime only Maxim Patlasov
2013-06-29 17:44   ` Maxim Patlasov
2013-07-11 16:14   ` [PATCH 07/16] fuse: Trust kernel i_mtime only -v2 Maxim Patlasov
2013-06-29 17:45 ` [PATCH 08/16] fuse: Flush files on wb close Maxim Patlasov
2013-06-29 17:45   ` Maxim Patlasov
2013-07-11 16:18   ` [PATCH 08/16] fuse: Flush files on wb close -v2 Maxim Patlasov
2013-06-29 17:45 ` [PATCH 09/16] fuse: restructure fuse_readpage() Maxim Patlasov
2013-06-29 17:45   ` Maxim Patlasov
2013-06-29 17:45   ` Maxim Patlasov
2013-06-29 17:45 ` [PATCH 10/16] fuse: Implement writepages callback Maxim Patlasov
2013-06-29 17:45   ` Maxim Patlasov
2013-07-19 16:50   ` Miklos Szeredi [this message]
2013-07-19 16:50     ` Miklos Szeredi
2013-08-02 15:40     ` Maxim Patlasov
2013-08-02 15:40       ` Maxim Patlasov
2013-08-02 15:40       ` Maxim Patlasov
2013-08-06 16:25       ` Miklos Szeredi
2013-08-06 16:25         ` Miklos Szeredi
2013-08-06 16:26         ` Eric Boxer
2013-08-09 15:02         ` Maxim Patlasov
2013-08-09 15:02           ` Maxim Patlasov
2013-08-09 15:02           ` Maxim Patlasov
2013-08-30 10:12           ` Miklos Szeredi
2013-08-30 10:12             ` Miklos Szeredi
2013-08-30 10:12             ` Miklos Szeredi
2013-08-30 14:50             ` Maxim Patlasov
2013-08-30 14:50               ` Maxim Patlasov
2013-08-30 14:50               ` Maxim Patlasov
2013-09-03 10:31               ` Miklos Szeredi
2013-09-03 10:31                 ` Miklos Szeredi
2013-09-03 10:31                 ` Miklos Szeredi
2013-09-03 16:02                 ` Maxim Patlasov
2013-09-03 16:02                   ` Maxim Patlasov
2013-09-03 16:02                   ` Maxim Patlasov
2013-06-29 17:45 ` [PATCH 11/16] fuse: Implement write_begin/write_end callbacks Maxim Patlasov
2013-06-29 17:45   ` Maxim Patlasov
2013-06-29 17:46 ` [PATCH 12/16] fuse: fuse_writepage_locked() should wait on writeback Maxim Patlasov
2013-06-29 17:46   ` Maxim Patlasov
2013-06-29 17:46 ` [PATCH 13/16] fuse: fuse_flush() " Maxim Patlasov
2013-06-29 17:46   ` Maxim Patlasov
2013-06-29 17:46 ` [PATCH 14/16] fuse: Fix O_DIRECT operations vs cached writeback misorder - v2 Maxim Patlasov
2013-06-29 17:46   ` Maxim Patlasov
2013-06-29 17:47 ` [PATCH 15/16] fuse: Turn writeback cache on Maxim Patlasov
2013-06-29 17:47   ` Maxim Patlasov
2013-06-29 17:48 ` [PATCH 16/16] mm: strictlimit feature Maxim Patlasov
2013-06-29 17:48   ` Maxim Patlasov
2013-07-01 21:16   ` Andrew Morton
2013-07-01 21:16     ` Andrew Morton
2013-07-02  8:33     ` Maxim Patlasov
2013-07-02  8:33       ` Maxim Patlasov
2013-07-02  8:33       ` Maxim Patlasov
2013-07-02 17:44   ` [PATCH] mm: strictlimit feature -v2 Maxim Patlasov
2013-07-02 17:44     ` Maxim Patlasov
2013-07-02 19:38     ` Andrew Morton
2013-07-02 19:38       ` Andrew Morton
2013-07-03 11:01       ` Maxim Patlasov
2013-07-03 11:01         ` Maxim Patlasov
2013-07-03 11:01         ` Maxim Patlasov
2013-07-03 23:16         ` Jan Kara
2013-07-03 23:16           ` Jan Kara
2013-07-03 23:16           ` Jan Kara
2013-07-05 13:14           ` Maxim Patlasov
2013-07-05 13:14             ` Maxim Patlasov
2013-07-05 13:14             ` Maxim Patlasov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130719165037.GA18358@tucsk.piliscsaba.szeredi.hu \
    --to=miklos@szeredi.hu \
    --cc=MPatlasov@parallels.com \
    --cc=akpm@linux-foundation.org \
    --cc=bfoster@redhat.com \
    --cc=dev@parallels.com \
    --cc=devel@openvz.org \
    --cc=fengguang.wu@intel.com \
    --cc=fuse-devel@lists.sourceforge.net \
    --cc=jbottomley@parallels.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=riel@redhat.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=xemul@parallels.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.